Skip to content

Commit

Permalink
a number of reasonable normalisations is added
Browse files Browse the repository at this point in the history
  • Loading branch information
grammarware committed Dec 23, 2012
1 parent a0ff89e commit dffe614
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 10 deletions.
8 changes: 8 additions & 0 deletions rascal/src/io/bibtex/Access.rsc
Expand Up @@ -2,6 +2,8 @@
module io::bibtex::Access

import io::bibtex::Parser;
import util::Math;
import String;
import Rascalware;

public str gimmeStr(BibEntry e, str key) = gimmeStr(e.attrs[key]);
Expand All @@ -10,3 +12,9 @@ public str gimmeStr(bracketed(BibString s)) = gimmeStr(s);
public str gimmeStr(raw(str s)) = s;
public str gimmeStr(bibseq(list[BibString] ss)) = folds(gimmeStr,ss);
public default str gimmeStr(BibString s) = "<s>";

public tuple[int,int] gimmeRange(BibEntry e, str key) = gimmeRange(gimmeStr(e,key));
public tuple[int,int] gimmeRange(str s) = (/<i1:\d+>\s*\-\-?\s*<i2:\d+>/ := s)?<toInt(i1),toInt(i2)>:<0,0>;

public int gimmeInt(BibEntry e, str key) = toInt(gimmeStr(e,key));

46 changes: 41 additions & 5 deletions rascal/src/io/bibtex/Normal.rsc
Expand Up @@ -11,12 +11,8 @@ import IO;
list[str] dontcap = ["a","an","and","as","at","but","by","for","from","in","into","nor","of","on","or","over",
"per","the","to","upon","vs.","vs","with"];

public void main()
{
println(bib2str([normalise(e) | e := loc2bib(|home:///workspace/bibtex/icse2010.bib|)[0]]));
}

BibEntry normalise(BibEntry e) = runall(e,[locVSaddr,capTitle,doiVSurl]);
BibEntry normalise(BibEntry e) = runall(e,[locVSaddr,capTitle,doiVSurl,normPages]);

// The idea is that "address" usually refers to the publisher, while "location" refers to the event.
// Since some BibTeX processors do not accept both, location is preferred here.
Expand Down Expand Up @@ -62,3 +58,43 @@ BibEntry doiVSurl(BibEntry e)
}
return e;
}

// The properties pages and numpages must be synchronised
BibEntry normPages(BibEntry e)
{
if ("pages" notin e.attrs)
{
println("[!] Cannot fix: no information about pages.");
return e;
}
int i1,i2;
<i1,i2> = gimmeRange(e,"pages");
if (i1+i2 == 0)
{
println("[!] Cannot understand \"<gimmeStr(e,"pages")>\" pages.");
return e;
}
if ("numpages" notin e.attrs)
{
println("[x] Added numpages based on pages.");
e.attrs += ("numpages":bracketed(raw("<i2-i1+1>")));
return e;
}
if (i2-i1+1 != gimmeInt(e,"numpages"))
{
println("[x] Fixed wrong numpages based on pages.");
e.attrs = e.attrs
- ("numpages":e.attrs["numpages"])
+ ("numpages":bracketed(raw("<i2-i1+1>")));
return e;
}
if ("<i1>--<i2>" != gimmeStr(e,"pages"))
{
println("[x] Fixed slightly deviant format of pages.");
e.attrs = e.attrs
- ("pages":e.attrs["pages"])
+ ("pages":bracketed(raw("<i1>--<i2>")));
return e;
}
return e;
}
5 changes: 0 additions & 5 deletions rascal/src/io/bibtex/Parser.rsc
Expand Up @@ -55,11 +55,6 @@ BibString normalise(list[BibString] bs)
}


public void main()
{
iprintln(loc2bib(|home:///workspace/bibtex/icse2010.bib|)[0]);
//loc2bib(|home:///workspace/zaytsev.bib|);
}

// TODO will be useful later for checking unparser completeness
public void do()
Expand Down

0 comments on commit dffe614

Please sign in to comment.