Skip to content

Commit

Permalink
Guided convergence in Rascal: Abstract Normal Form; prodsigs, etc
Browse files Browse the repository at this point in the history
  • Loading branch information
grammarware committed Jun 18, 2012
1 parent 0f5ef68 commit b6f06b2
Show file tree
Hide file tree
Showing 12 changed files with 404 additions and 47 deletions.
18 changes: 18 additions & 0 deletions shared/rascal/src/analyse/Metrics.rsc
Expand Up @@ -3,18 +3,36 @@ module analyse::Metrics

import syntax::BGF;

@doc{All nonterminals in a grammar: defined or used}
public set[str] allNs(list[BGFProduction] ps) = definedNs(ps) + usedNs(ps);
public set[str] allNs(BGFGrammar g) = allNs(g.prods);

@doc{Top nonterminals in a grammar: defined but not used}
public set[str] topNs(list[BGFProduction] ps) = definedNs(ps) - usedNs(ps);
public set[str] topNs(BGFGrammar g) = topNs(g.prods);

@doc{Bottom nonterminals in a grammar: used but not defined}
public set[str] bottomNs(list[BGFProduction] ps) = usedNs(ps) - definedNs(ps);
public set[str] bottomNs(BGFGrammar g) = bottomNs(g.prods);

@doc{Leaf nonterminals in a grammar: not using any others}
//public set[str] leafNs(list[BGFProduction] ps) = {n | n <- definedNs(ps), production(_,n,rhs) <- ps, /nonterminal(n2) := rhs, n2 != n};
public set[str] leafNs(list[BGFProduction] ps) = {n | n <- definedNs(ps), (calls(n,ps)-n)=={} };
public set[str] leafNs(BGFGrammar g) = leafNs(g.prods);

@doc{All terminals used in a grammar}
public set[str] allTs(list[BGFProduction] ps) = {s | /terminal(str s) := ps};
public set[str] allTs(BGFGrammar g) = allTs(g.prods);
@doc{All nonterminals used in a grammar}
public set[str] usedNs(list[BGFProduction] ps) = {s | /nonterminal(str s) := ps};
public set[str] usedNs(BGFGrammar g) = usedNs(g.prods);

@doc{All nonterminals defined in a grammar}
public set[str] definedNs(list[BGFProduction] ps) = {s | production(_,str s,_) <- ps};
public set[str] definedNs(BGFGrammar g) = definedNs(g.prods);

public rel[str,str] calls(list[BGFProduction] ps) = {<n1,n2> | production(_,n1,rhs) <- ps, /nonterminal(n2) := rhs};
public set[str] calls(str x, list[BGFProduction] ps) = {n2 | production(_,x,rhs) <- ps, /nonterminal(n2) := rhs};
public list[BGFProduction] prodsOfN(str x, list[BGFProduction] ps) = [p | p <- ps, production(_,x,_) := p];
60 changes: 55 additions & 5 deletions shared/rascal/src/converge/Guided.rsc
Expand Up @@ -6,28 +6,78 @@ import Set;
import Map;
import List;
import io::ReadBGF;
import io::WriteCBGF;
import syntax::BGF;
import syntax::CBGF;
import analyse::Metrics;
import normal::ANF;
import transform::XBGF;
import transform::CBGF;

list[str] sources =
["antlr","dcg","ecore","emf","jaxb","om","python","rascal-a","rascal-c","sdf","txl","xsd"];
["master","antlr","dcg","ecore","emf","jaxb","om","python","rascal-a","rascal-c","sdf","txl","xsd"];

public str prodsig(str n, list[BGFProduction] ps) = prodsig(prodsOfN(n,ps));
public str prodsig([]) = "";
public str prodsig(list[BGFProduction] ps) = (prodsig(ps[0]) | it + "/" + prodsig(p) | p <- tail(ps));
public str prodsig(BGFProduction p) = signature(p.rhs,p.lhs);

str signature(nonterminal(x),n) = x == n ? "0" : "1";
str signature(optional(e),n) = nonterminal(n) := e ? "O" : "?";
str signature(plus(e),n) = nonterminal(n) := e ? "P" : "+";
str signature(star(e),n) = nonterminal(n) := e ? "S" : "*";
str signature(anything(),_) = "a";
str signature(seplistplus(e,s),n) = nonterminal(n) := e ? "0*" : "1*";
str signature(sepliststar(e,s),n) = "?";
str signature(sequence(L),n) = ("" | it + signature(e,n) | e <- L);
default str signature(BGFExpression e, str n) = "_";

public void main()
{
map[str,BGFGrammar] bgfs = ();
map[str,CBGFSequence] cbgfs = ();
println("Reading the grammars...");
//BGFGrammar master = readBGF(|home:///projects/slps/topics/convergence/guided/bgf/master.bgf|);
for (src <- sources)
bgfs[src] = readBGF(|home:///projects/slps/topics/convergence/guided/bgf/<src>.bgf|);
println("Parsing the grammarbase with <size(bgfs)> grammars is done.");
println("Normalising the grammars...");
for (src <- sources)
{
cbgfs[src] = normal::ANF::normalise(bgfs[src]);
println("Normalising <src> with");
//iprintln(cbgfs[src]);
bgfs[src] = transform(forward(cbgfs[src]),bgfs[src]);
//if (src == "rascal-c")
// iprintln(bgfs[src]);
}
println("Starting with the root.");
for (src <- sources)
{
println(" * Roots in <src>: <bgfs[src].roots>");
if (isEmpty(bgfs[src].roots))
if(src=="master")
{
bgfs[src].roots = toList(topNs(bgfs[src]));
println(" * Roots in <src> changed to tops: <bgfs[src].roots>");
println(" * In the master grammar, the root is called <bgfs[src].roots[0]>, prodsig <prodsig(bgfs[src].roots[0],bgfs[src].prods)>");
//namemapping?
continue;
}
if (size(bgfs[src].roots)==1)
{
println(" * In <src>: maps to <bgfs[src].roots[0]>, prodsig <prodsig(bgfs[src].roots[0],bgfs[src].prods)>");
}
else
{
println(" * In <src>: unconclusive, looking at definitions of <bgfs[src].roots>");
for (r <- bgfs[src].roots)
println(" * <r>: prodsig <prodsig(r,bgfs[src].prods)>");
}
//if (isEmpty(bgfs[src].roots))
//{
// bgfs[src].roots = toList(topNs(bgfs[src]));
// println(" * Roots in <src> changed to tops: <bgfs[src].roots>");
//}
}
println("Writing CBGFs...");
for (src <- sources)
writeCBGF(cbgfs[src],|home:///projects/slps/topics/convergence/guided/bgf/<src>.cbgf|);
println("Done.");
}
29 changes: 7 additions & 22 deletions shared/rascal/src/io/WriteBGF.rsc
Expand Up @@ -7,33 +7,18 @@ import lang::xml::DOM;

public void writeBGF(BGFGrammar bgf, loc f)
{
//grammar (list[str] roots, list[BGFProduction] prods)
if (grammar(list[str] roots, list[BGFProduction] prods) := bgf)
{
list[Node] xml1 = [element(none(),"root",[charData(s)]) | s <- roots];
list[Node] xml2 = [prod2xml(p) | p <- prods];
//println(xml);
list[Node] xml1 = [element(none(),"root",[charData(s)]) | s <- bgf.roots];
list[Node] xml2 = [prod2xml(p) | p <- bgf.prods];
writeFile(f,xmlRaw(document(element(namespace("bgf","http://planet-sl.org/bgf"),"grammar",xml1+xml2))));
}
else throw "ERROR: grammar expected in place of <bgf>";
//Node N = parseXMLDOMTrim(readFile(f));
//if (document(element(namespace(_,"http://planet-sl.org/xbgf"),"sequence",L)) := N)
// return [mapxbgf(step) | step <- L, element(namespace(_,"http://planet-sl.org/xbgf"),name,kids) := step];
//else
// throw "<f> is not a proper XBGF file";
}

public Node prod2xml(BGFProduction p)
{
if (production (str label, str lhs, BGFExpression rhs) := p)
{
list[Node] kids = [];
if (label!="") kids += element(none(),"label",[charData(label)]);
kids += element(none(),"nonterminal",[charData(lhs)]);
kids += expr2xml(rhs);
return element(namespace("bgf","http://planet-sl.org/bgf"),"production",kids);
}
else throw "ERROR: production rule expected in place of <p>";
list[Node] kids = [];
if (p.label!="") kids += element(none(),"label",[charData(p.label)]);
kids += element(none(),"nonterminal",[charData(p.lhs)]);
kids += expr2xml(p.rhs);
return element(namespace("bgf","http://planet-sl.org/bgf"),"production",kids);
}

public Node expr2xml(BGFExpression ex)
Expand Down
91 changes: 91 additions & 0 deletions shared/rascal/src/io/WriteCBGF.rsc
@@ -0,0 +1,91 @@
@contributor{Vadim Zaytsev - vadim@grammarware.net - SWAT, CWI}
module io::WriteCBGF

import IO;
import syntax::BGF;
import syntax::XBGF;
import syntax::CBGF;
import lang::xml::DOM;
import io::WriteBGF;

public void writeCBGF(CBGFSequence cbgf, loc f)
{
list[Node] xml = [cbgf2xml(x) | x <- cbgf];
writeFile(f,xmlRaw(document(element(namespace("cbgf","http://planet-sl.org/cbgf"),"relationship",xml))));
}

Node cbgf2xml(CBGFCommand step)
{
switch(step)
{
// TODO: commented lines produce code incompatible with SLPS
case abridge_detour(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"abridge-detour",[prod2xml(p)]);
case abstractize_concretize(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"abstractize-concretize",[prod2xml(p)]);
case addH_removeH(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"addH-removeH",[prod2xml(p)]);
case addV_removeV(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"addV-removeV",[prod2xml(p)]);
case anonymize_deanonymize(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"anonymize-deanonymize",[prod2xml(p)]);
case appear_disapper(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"appear-disapper",[prod2xml(p)]);
case chain_unchain(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"chain-unchain",[prod2xml(p)]);
//case clone_equate(str x, str y, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"clone-equate",[prod2xml(prod)]);
//case concatT_splitT(str y, list[str] xs, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"concatT-splitT",[prod2xml(prod)]);
case concretize_abstractize(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"concretize-abstractize",[prod2xml(p)]);
case deanonymize_anonymize(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"deanonymize-anonymize",[prod2xml(p)]);
case define_undefine(list[BGFProduction] ps): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"define-undefine",[prod2xml(p) | p <- ps]);
case designate_unlabel(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"designate-unlabel",[prod2xml(p)]);
case detour_abridge(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"detour-abridge",[prod2xml(p)]);
case deyaccify_yaccify(list[BGFProduction] ps): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"deyaccify-yaccify",[prod2xml(p) | p <- ps]);
case disappear_appear(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"disappear-appear",[prod2xml(p)]);
case downgrade_upgrade(BGFProduction p1,BGFProduction p2): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"downgrade-upgrade",[prod2xml(p1),prod2xml(p2)]);
case eliminate_introduce(list[BGFProduction] ps): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"eliminate-introduce",[prod2xml(p) | p <- ps]);
//case equate_clone(str x, str y, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"equate-clone",[prod2xml(prod)]);
case extract_inline(BGFProduction p, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"extract-inline",[prod2xml(p),element(none(),"in",[context2xml(w)])]);
case factor_factor(BGFExpression e1, BGFExpression e2, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"factor-factor",[expr2xml(e1),expr2xml(e2),element(none(),"in",[context2xml(w)])]);
case fold_unfold(str s, globally()): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"fold-unfold",[element(none(),"nonterminal",[charData(s)])]);
case fold_unfold(str s, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"fold-unfold",[element(none(),"nonterminal",[charData(s)]),element(none(),"in",[context2xml(w)])]);
case horizontal_vertical(XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"horizontal-vertical",[context2xml(w)]);
case inject_project(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"inject-project",[prod2xml(p)]);
case inline_extract(BGFProduction p, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"inline-extract",[prod2xml(p),element(none(),"in",[context2xml(w)])]);
case introduce_eliminate(list[BGFProduction] ps): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"introduce-eliminate",[prod2xml(p) | p <- ps]);
case iterate_assoc(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"iterate-assoc",[prod2xml(p)]);
case assoc_iterate(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"assoc-iterate",[prod2xml(p)]);
case massage_massage(BGFExpression e1, BGFExpression e2, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"massage-massage",[expr2xml(e1),expr2xml(e2),element(none(),"in",[context2xml(w)])]);
case narrow_widen(BGFExpression e1, BGFExpression e2, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"narrow-widen",[expr2xml(e1),expr2xml(e2),element(none(),"in",[context2xml(w)])]);
case permute_permute(BGFProduction p1, BGFProduction p2): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"permute-permute",[prod2xml(p1),prod2xml(p2)]);
case project_inject(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"project-inject",[prod2xml(p)]);
//case redefine_redefine(list[BGFProduction] ps1, list[BGFProduction] ps2): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"redefine-redefine",[prod2xml(prod)]);
case removeH_addH(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"removeH-addH",[prod2xml(p)]);
case removeV_addV(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"removeV-addV",[prod2xml(p)]);
//case renameL_renameL(str x, str y): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"renameL-renameL",[prod2xml(prod)]);
case renameN_renameN(str s1, str s2): return element(namespace("xbgf","http://planet-sl.org/xbgf"),"rename",[element(none(),"nonterminal",[element(none(),"from",[charData(s1)]),element(none(),"to",[charData(s2)])])]);
//case renameS_renameS(str x, str y, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"renameS-renameS",[prod2xml(prod),element(none(),"in",[context2xml(w)])]);
//case renameT_renameT(str x, str y): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"renameT-renameT",[prod2xml(prod)]);
case replace_replace(BGFExpression e1, BGFExpression e2, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"replace-replace",[expr2xml(e1),expr2xml(e2),element(none(),"in",[context2xml(w)])]);
case reroot_reroot(list[str] xs1, list[str] xs2): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"reroot-reroot",[element(none(),"from",[element(none(),"root",[charData(r)]) | r <- xs1]),element(none(),"to",[element(none(),"root",[charData(r)]) | r <- xs2])]);
//case splitN_unite(str x, list[BGFProduction] ps, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"splitN-unite",[prod2xml(prod),element(none(),"in",[context2xml(w)])]);
//case splitT_concatT(str x, list[str] ys, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"splitT-concatT",[prod2xml(prod),element(none(),"in",[context2xml(w)])]);
case unchain_chain(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"unchain-chain",[prod2xml(p)]);
case undefine_define(list[BGFProduction] ps): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"undefine-define",[prod2xml(p) | p <- ps]);
case unfold_fold(str s, globally()): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"unfold-fold",[element(none(),"nonterminal",[charData(s)])]);
case unfold_fold(str s, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"unfold-fold",[element(none(),"nonterminal",[charData(s)]),element(none(),"in",[context2xml(w)])]);
//case unite_splitN(str x, list[BGFProduction] ps, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"unite-splitN",[prod2xml(prod),element(none(),"in",[context2xml(w)])]);
case unlabel_designate(BGFProduction p): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"unlabel-designate",[prod2xml(p)]);
case upgrade_downgrade(BGFProduction p1, BGFProduction p2): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"upgrade-downgrade",[prod2xml(p1),prod2xml(p2)]);
case vertical_horizontal(XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"vertical-horizontal",[context2xml(w)]);
case widen_narrow(BGFExpression e1, BGFExpression e2, XBGFScope w): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"widen-narrow",[expr2xml(e1),expr2xml(e2),element(none(),"in",[context2xml(w)])]);
case yaccify_deyaccify(list[BGFProduction] ps): return element(namespace("cbgf","http://planet-sl.org/cbgf"),"yaccify-deyaccify",[prod2xml(p) | p <- ps]);
// default
default:
throw "ERROR: <step>";
}
}

Node context2xml(XBGFScope w)
{
switch(w)
{
case inlabel(str s): return element(none(),"label",[charData(s)]);
case innt(str s): return element(none(),"nonterminal",[charData(s)]);
case globally(): return comment("globally");
default: throw "ERROR in context: <w>";
}
}
5 changes: 0 additions & 5 deletions shared/rascal/src/io/WriteXBGF.rsc
Expand Up @@ -12,11 +12,6 @@ public void writeXBGF(XBGFSequence xbgf, loc f)
list[Node] xml = [xbgf2xml(x) | x <- xbgf];
//println(xml);
writeFile(f,xmlRaw(document(element(namespace("xbgf","http://planet-sl.org/xbgf"),"sequence",xml))));
//Node N = parseXMLDOMTrim(readFile(f));
//if (document(element(namespace(_,"http://planet-sl.org/xbgf"),"sequence",L)) := N)
// return [mapxbgf(step) | step <- L, element(namespace(_,"http://planet-sl.org/xbgf"),name,kids) := step];
//else
// throw "<f> is not a proper XBGF file";
}

Node xbgf2xml(XBGFCommand step)
Expand Down
77 changes: 77 additions & 0 deletions shared/rascal/src/normal/ANF.rsc
@@ -0,0 +1,77 @@
@contributor{Vadim Zaytsev - vadim@grammarware.net - SWAT, CWI}
@doc{Abstract Normal Form}
module normal::ANF

import syntax::BGF;
import syntax::XBGF;
import syntax::CBGF;
import transform::CBGF;
import transform::XBGF;
import analyse::Metrics;
import Set;
import List;
import IO; //debug

CBGFSequence normalise(BGFGrammar g)
= (topNs(g) - leafNs(g) == toSet(g.roots)
? []
: [reroot_reroot(g.roots,toList(topNs(g) - leafNs(g)))])
+ normStage2(normStage1(g.prods),g);

CBGFSequence normStage1(list[BGFProduction] ps) = [*normStage1(p) | p <- ps];

CBGFSequence normStage2(CBGFSequence cbgf, BGFGrammar g)
{
list[BGFProduction] afterps = transform(forward(cbgf),g).prods;
set[str] used = usedNs(afterps);
set[str] epsilons = {};
//iprintln(afterps);
for(q <- afterps)
{
if (choice(_) := q.rhs)
cbgf += vertical_horizontal(innt(q.lhs));
if (epsilon() := q.rhs)
epsilons += q.lhs;
}
//println(epsilons);
for (n <- epsilons)
if (n in used)
cbgf += undefine_define([production("",n,epsilon())]);
else
cbgf += eliminate_introduce([production("",n,epsilon())]);
return cbgf;
}

CBGFSequence normStage1(BGFProduction p)
{
CBGFSequence cbgf = [];
if (p.label != "")
cbgf += unlabel_designate(p);
if (/selectable(_,_) := p)
cbgf += anonymize_deanonymize(markAllSelectors(p));
if (/terminal(_) := p)
cbgf += abstractize_concretize(markAllTerminals(p));
// TODO: distribute
return visit(cbgf)
{
case anonymize_deanonymize(production(_,n,rhs)) => anonymize_deanonymize(production("",n,rhs))
case abstractize_concretize(production(_,n,rhs)) => abstractize_concretize(production("",n,rhs))
};
}

BGFProduction markAllSelectors(BGFProduction p) = visit(p)
{
case selectable(s,e) => marked(selectable(s,e))
};

BGFProduction markAllTerminals(BGFProduction p) = visit(p)
{
case terminal(t) => marked(terminal(t))
case selectable(s,e) => e
};

//void main()
//{
// BGFProduction p = production("","function",sequence([selectable("n",nonterminal("ID")),plus(selectable("a",nonterminal("ID"))),terminal("="),selectable("e",nonterminal("expr")),plus(nonterminal("NEWLINE"))]));
// iprintln(markAllTerminals
//}

0 comments on commit b6f06b2

Please sign in to comment.