-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Python to BGF extractor, implemented in Rascal
- Loading branch information
1 parent
1de61a1
commit 474c0ca
Showing
3 changed files
with
131 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
@contributor{Vadim Zaytsev - vadim@grammarware.net - SWAT, CWI} | ||
module extract::Python2BGF | ||
|
||
import ValueIO; | ||
import IO; | ||
import List; | ||
import String; | ||
import ParseTree; | ||
import syntax::BGF; | ||
import io::WriteBGF; | ||
import extract::PyParsing; | ||
import normal::BGF; | ||
|
||
public void main(list[str] args) | ||
{ | ||
loc src = |cwd:///|+args[0]; | ||
writeBGF(normalise(pypafile2bgf(parse(#PyPaFile,|cwd:///|+args[0]))),|cwd:///|+args[1]); | ||
println("Extraction completed."); | ||
} | ||
|
||
BGFGrammar pypafile2bgf((PyPaFile)`<PyPaFragment+ pps>`) | ||
{ | ||
list[BGFProduction] ps = []; | ||
for ((PyPaFragment)`<PyPaLHS lhs><PyPaDef d><PyPaExpr2 rhs>;` <- pps) | ||
if ("<lhs>" == "defaultWhitespace") | ||
; | ||
elseif ("<rhs>" == "Forward()") | ||
; | ||
elseif ((PyPaLHS)`<PyPaExpr2 lhs1>` := lhs) | ||
ps += production("","<lhs1>",expr2expr(rhs)); | ||
elseif ((PyPaLHS)`<{PyPaExpr2 ","}+ lhss>` := lhs && (PyPaExpr2)`(Keyword(<Id x>).suppress() for <x> in "<QChars t>".split())` := rhs) | ||
{ | ||
nts = ["<z>" | z <- lhss]; | ||
vals = [terminal(c) | c <- split(" ","<t>")]; | ||
if (size(nts) != size(vals)) | ||
throw "Impossible to match (un)packing."; | ||
for (i <- [0..size(nts)-1]) | ||
ps += production("",nts[i],vals[i]); | ||
} | ||
else | ||
throw "Give up."; | ||
return grammar([],ps); | ||
} | ||
BGFExpression expr2expr((PyPaExpr2)`<Id n>`) = nonterminal("<n>"); | ||
BGFExpression expr2expr((PyPaExpr2)`<Id n>.copy()`) = nonterminal("<n>"); // pyparsing idiosyncrasy | ||
BGFExpression expr2expr((PyPaExpr2)`"<QChars t>"`) = terminal("<t>"); | ||
//BGFExpression expr2expr((PyPaExpr2)`StringEnd()`) = terminal("\n"); // this works correctly, but the next line is more general | ||
BGFExpression expr2expr((PyPaExpr2)`<Id f>()`) = nonterminal("<f>"); | ||
BGFExpression expr2expr((PyPaExpr2)`Suppress("<QChars t>")`) = terminal("<t>"); // not for AST, but we don't care | ||
BGFExpression expr2expr((PyPaExpr2)`(<PyPaExpr2 e>)`) = expr2expr(e); | ||
BGFExpression expr2expr((PyPaExpr2)`<PyPaExpr2 e>.setParseAction(<PyPaExpr2 _>)`) = expr2expr(e); // disregard parsing actions | ||
BGFExpression expr2expr((PyPaExpr2)`<PyPaExpr2 e>.setWhitespaceChars(<PyPaExpr2 _>)`) = expr2expr(e); // disregard layout variations | ||
BGFExpression expr2expr((PyPaExpr2)`<PyPaExpr2 e1>+<PyPaExpr2 e2>`) = sequence([expr2expr(e1),expr2expr(e2)]); | ||
BGFExpression expr2expr((PyPaExpr2)`<PyPaExpr2 e1>^<PyPaExpr2 e2>`) = choice([expr2expr(e1),expr2expr(e2)]); | ||
BGFExpression expr2expr((PyPaExpr2)`NotAny(<PyPaExpr2 _>)`) = epsilon(); // no way to represent negative production rules in BGF | ||
BGFExpression expr2expr((PyPaExpr2)`Word(alphas)`) = val(string()); | ||
BGFExpression expr2expr((PyPaExpr2)`Word(nums)`) = val(integer()); | ||
BGFExpression expr2expr((PyPaExpr2)`Optional(<PyPaExpr2 e>)`) = optional(expr2expr(e)); | ||
BGFExpression expr2expr((PyPaExpr2)`oneOf("<QChars t>")`) = choice([terminal(c) | c <- split(" ","<t>")]); | ||
BGFExpression expr2expr((PyPaExpr2)`ZeroOrMore(<PyPaExpr2 e>)`) = star(expr2expr(e)); | ||
BGFExpression expr2expr((PyPaExpr2)`Group(OneOrMore(<PyPaExpr2 e>))`) = plus(expr2expr(e)); | ||
BGFExpression expr2expr((PyPaExpr2)`OneOrMore(<PyPaExpr2 e>)`) = plus(expr2expr(e)); | ||
default BGFExpression expr2expr(PyPaExpr2 rhs) | ||
{ | ||
iprintln(rhs); | ||
println("What to do with <rhs>?"); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/bin/sh | ||
|
||
LOCAL=${PWD} | ||
cd `dirname $0` | ||
cd ../.. | ||
SLPS=${PWD} | ||
cd ${LOCAL} | ||
|
||
if [ $# -ne 2 ]; then | ||
echo "This tool extracts a BGF grammar from a Python source, assuming PyParsing library." | ||
echo "If you want faster performance, run it directly from the Rascal console." | ||
echo "Usage: py2rsc <input-py> <output-bgf>" | ||
exit 1 | ||
elif [ ! -r $1 ]; then | ||
echo "Oops: $1 not found or not readable." | ||
exit 1 | ||
fi | ||
|
||
cp $1 ${SLPS}/shared/rascal/src/src.py | ||
cd ${SLPS}/shared/rascal/src && java -Xmx1G -Xss32m -jar ${SLPS}/download/rascal.jar extract::Python2BGF src.py $2 | ||
mv ${SLPS}/shared/rascal/src/$2 ${LOCAL} | ||
rm ${SLPS}/shared/rascal/src/src.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
_Literal = Literal ; | ||
defaultWhitespace = ParserElement.DEFAULT_WHITE_CHARS ; | ||
ParserElement.setDefaultWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", "")) ; | ||
|
||
expr = Forward() ; | ||
|
||
_IF, _THEN, _ELSE = (Keyword(i).suppress() for i in "if then else".split()) ; | ||
|
||
name = NotAny(_IF | _THEN | _ELSE)+Word( | ||
alphas | ||
).setParseAction(lambda tok: str(tok[0])) ; | ||
literal = ( | ||
Optional("-") + Word(nums) | ||
).setParseAction(lambda tok: t.Literal("".join(tok))) ; | ||
atom = name ^ literal ^ (Suppress("(") + expr + Suppress(")")) ; | ||
|
||
ifThenElse = ( | ||
_IF + expr + _THEN + expr + _ELSE + expr | ||
).setParseAction(lambda tok: t.IfThenElse(*tok)) ; | ||
|
||
operators = oneOf("== + -") ; | ||
|
||
binary = ( | ||
atom + ZeroOrMore((operators + atom).setParseAction(lambda tok: (tok[0], tok[1]))) | ||
).setParseAction(_lassoc) ; | ||
apply = ( | ||
name.copy() + Group(OneOrMore(atom)) | ||
).setParseAction(lambda tok: t.Apply(tok[0], *(i for i in tok[1]))) ; | ||
|
||
expr << (binary ^ apply ^ ifThenElse) ; | ||
|
||
function = ( | ||
name.copy() + OneOrMore(name.copy().setParseAction(ArgumentCollector.append)) + Suppress("=") + expr | ||
).setParseAction(lambda tok: t.Function(tok[0], ArgumentCollector.get(), tok[-1], ArgumentCollector.reset())) ; | ||
|
||
name.setParseAction(ArgumentCollector.bind) ; | ||
|
||
program = OneOrMore( | ||
function | ||
).setParseAction(lambda tok: t.Program(list(tok))) + StringEnd().setWhitespaceChars(defaultWhitespace) ; |