Skip to content

Commit

Permalink
preliminary parsing facility of BibTeX files: for now strings remain …
Browse files Browse the repository at this point in the history
…unstructured
  • Loading branch information
grammarware committed Dec 23, 2012
1 parent dab70ef commit 010f9c8
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 0 deletions.
24 changes: 24 additions & 0 deletions rascal/src/io/bibtex/Parser.rsc
@@ -0,0 +1,24 @@
@contributor{Vadim Zaytsev - vadim@grammarware.net - SWAT, CWI}
module io::bibtex::Parser

import io::bibtex::Syntax;
import ParseTree;
import String;
import IO;

alias BibLib = list[BibEntry];
alias BibEntry = tuple[str kind, str name, map[str,str] attrs];

public BibLib loc2bib(loc l) = str2bib(readFile(l));
BibLib str2bib(str s) = library2list(parse(#BibLibrary,trim(replaceAll(s,"\uC2A0"," "))));

BibLib library2list(BibLibrary b) = [mapEntry(e) | OneBibEntry e <- b.es];

BibEntry mapEntry(OneBibEntry e) = <"<e.kind.name>","<e.name>",mapKVs(e.pairs)>;

map[str,str] mapKVs({BibPair ","}+ ps) = ("<p.key>":"<p.val>" | BibPair p <- ps);

public void main()
{
loc2bib(|home:///workspace/zaytsev.bib|);
}
25 changes: 25 additions & 0 deletions rascal/src/io/bibtex/Syntax.rsc
@@ -0,0 +1,25 @@
@contributor{Vadim Zaytsev - vadim@grammarware.net - SWAT, CWI}
module io::bibtex::Syntax

layout L = WS;
lexical WS = [\uC2A0 \ \n\r\t]* !>> [\uC2A0 \ \n\r\t]; // note the nonbreakable space (0xC2A0 in Unicode)

syntax OneBibEntry = BibEntryType kind "{" BibEntryName name "," {BibPair ","}+ pairs ","? "}";
lexical BibEntryType = "@" [a-zA-Z]+ name;
lexical BibEntryName = ![,]+ >> [,];
syntax BibPair = BibKey key "=" BibValue val;
lexical BibKey = [a-z]+;

lexical BibValue
= BibValueQ
| BibValueC
| [a-zA-Z0-9]+ !>> [a-zA-Z0-9]
;

lexical BibValueQ = [\"] BQElement* [\"] ;
lexical BQElement = ![\"\\{}] | [\\] [\"\'`&$%a-zA-Z] | BibValueC;

lexical BibValueC = [{] BCElement* [}];
lexical BCElement = BibValueC | ![{}];

start syntax BibLibrary = OneBibEntry+ es;

0 comments on commit 010f9c8

Please sign in to comment.