Skip to content

Commit

Permalink
Guided convergence: re-implemented according to the paper
Browse files Browse the repository at this point in the history
Also, pretty-printing of BGF as BNF
Also, a "lib::Rascalware" module in order to simplify VVZ's life with Rascal
Also, some massaging of BGF internal syntax wrt beautification of related Rascal code
Also, lots of debugging the Prodsigs code (need a test suite!)
  • Loading branch information
grammarware committed Jul 3, 2012
1 parent 777587d commit fc2e428
Show file tree
Hide file tree
Showing 7 changed files with 296 additions and 33 deletions.
27 changes: 14 additions & 13 deletions shared/rascal/src/analyse/Metrics.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,45 @@ import syntax::BGF;
import List;

@doc{All nonterminals in a grammar: defined or used}
public set[str] allNs(list[BGFProduction] ps) = definedNs(ps) + usedNs(ps);
public set[str] allNs(BGFProdList ps) = definedNs(ps) + usedNs(ps);
public set[str] allNs(BGFGrammar g) = allNs(g.prods);

@doc{Top nonterminals in a grammar: defined but not used}
public set[str] topNs(list[BGFProduction] ps) = definedNs(ps) - usedNRNs(ps);
public set[str] topNs(BGFProdList ps) = definedNs(ps) - usedNRNs(ps);
// We allow top nonterminals to refer to themselves. In general, this is arguable.
public set[str] topNs(BGFGrammar g) = topNs(g.prods);

@doc{Bottom nonterminals in a grammar: used but not defined}
public set[str] bottomNs(list[BGFProduction] ps) = usedNs(ps) - definedNs(ps);
public set[str] bottomNs(BGFProdList ps) = usedNs(ps) - definedNs(ps);
public set[str] bottomNs(BGFGrammar g) = bottomNs(g.prods);

@doc{Leaf nonterminals in a grammar: not using any others}
//public set[str] leafNs(list[BGFProduction] ps) = {n | n <- definedNs(ps), production(_,n,rhs) <- ps, /nonterminal(n2) := rhs, n2 != n};
public set[str] leafNs(list[BGFProduction] ps) = {n | n <- definedNs(ps), (calls(n,ps)-n)=={} };
//public set[str] leafNs(BGFProdList ps) = {n | n <- definedNs(ps), production(_,n,rhs) <- ps, /nonterminal(n2) := rhs, n2 != n};
public set[str] leafNs(BGFProdList ps) = {n | n <- definedNs(ps), (calls(n,ps)-n)=={} };
public set[str] leafNs(BGFGrammar g) = leafNs(g.prods);
@doc{All terminals used in a grammar}
public set[str] allTs(list[BGFProduction] ps) = {s | /terminal(str s) := ps};
public set[str] allTs(BGFProdList ps) = {s | /terminal(str s) := ps};
public set[str] allTs(BGFGrammar g) = allTs(g.prods);
@doc{All nonterminals used in a grammar}
public set[str] usedNs(list[BGFProduction] ps) = {s | /nonterminal(str s) := ps};
public set[str] usedNs(BGFExpression e) = {s | /nonterminal(str s) := e};
public set[str] usedNs(BGFProdList ps) = {s | /nonterminal(str s) := ps};
public set[str] usedNs(BGFGrammar g) = usedNs(g.prods);
@doc{All nonterminals used non-recursively in a grammar}
public set[str] usedNRNs(list[BGFProduction] ps) = {s | p <- ps, /nonterminal(str s) := p.rhs, s != p.lhs};
public set[str] usedNRNs(BGFProdList ps) = {s | p <- ps, /nonterminal(str s) := p.rhs, s != p.lhs};
public set[str] usedNRNs(BGFGrammar g) = usedNRNs(g.prods);
@doc{All nonterminals defined in a grammar}
public set[str] definedNs(list[BGFProduction] ps) = {s | production(_,str s,_) <- ps};
public set[str] definedNs(BGFProdList ps) = {s | production(_,str s,_) <- ps};
public set[str] definedNs(BGFGrammar g) = definedNs(g.prods);
@doc{All nonterminals defined in a grammar by one production rule each}
public set[str] definedOnceNs(list[BGFProduction] ps) = {x | str x <- definedNs(ps), size(prodsOfN(x,ps))==1 };
public set[str] definedOnceNs(BGFProdList ps) = {x | str x <- definedNs(ps), size(prodsOfN(x,ps))==1 };
public set[str] definedOnceNs(BGFGrammar g) = definedOnceNs(g.prods);
public rel[str,str] calls(list[BGFProduction] ps) = {<n1,n2> | production(_,n1,rhs) <- ps, /nonterminal(n2) := rhs};
public set[str] calls(str x, list[BGFProduction] ps) = {n2 | production(_,x,rhs) <- ps, /nonterminal(n2) := rhs};
public rel[str,str] calls(BGFProdList ps) = {<n1,n2> | production(_,n1,rhs) <- ps, /nonterminal(n2) := rhs};
public set[str] calls(str x, BGFProdList ps) = {n2 | production(_,x,rhs) <- ps, /nonterminal(n2) := rhs};
public list[BGFProduction] prodsOfN(str x, list[BGFProduction] ps) = [p | p <- ps, production(_,x,_) := p];
public BGFProdList prodsOfN(str x, BGFProdList ps) = [p | p <- ps, production(_,x,_) := p];
58 changes: 44 additions & 14 deletions shared/rascal/src/analyse/Prodsigs.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@ module analyse::Prodsigs

import syntax::BGF;
import analyse::Metrics;
import lib::Rascalware;
import Relation; //domain
import List;
import Set;

data Footprint
= fpnt()
| fpopt()
| fpplus()
| fpstar()
| fpmany(set[Footprint] fps)
| fpmany(list[Footprint] fps)
| fpempty()
;

Expand All @@ -21,7 +24,16 @@ Footprint makefp(n, nonterminal(n)) = fpnt();
Footprint makefp(n, optional(nonterminal(n))) = fpopt();
Footprint makefp(n, plus(nonterminal(n))) = fpplus();
Footprint makefp(n, star(nonterminal(n))) = fpstar();
Footprint makefp(n, sequence(L)) = fpmany({makefp(e) | e <- L});
Footprint makefp(n, sequence(L))
{
s = [fp | e <- L, fp := makefp(n,e), fp != fpempty()];
if(len(s)==0)
return fpempty();
elseif(len(s)==1)
return getOneFrom(s);
else
return fpmany(s);
}
default Footprint makefp(str n, BGFExpression x) = fpempty();
Signature makesig(BGFProduction p) = {<n,makefp(n,p.rhs)> | n <- usedNs(p.rhs)};
Expand All @@ -33,41 +45,47 @@ bool eqfp(fpplus(), fpstar()) = true;
bool eqfp(fpstar(), fpplus()) = true;
bool eqfp(fpstar(), fpstar()) = true;
bool eqfp(fpempty(), fpempty()) = true;
bool eqfp(fpmany(L1), fpmany(L2))
bool eqfp(fpmany(L1), fpmany(L2)) = multiseteq(L1,L2);
default bool eqfp(Footprint pi, Footprint xi) = false;
bool equivfp(fpmany(L1), fpmany(L2))
{
//tuple[Footprint,set[Footprint]]
<head,tail> = takeOneFrom(L1);
if (isEmpty(L1)) return isEmpty(L2);
<car,cdr> = List::takeOneFrom(L1);
for (e <- L2)
if (eqfp(head,e))
return eqfp(tail, L2 - e);
if (eqfp(car,e))
return eqfp(fpmany(cdr), fpmany(L2 - e));
return false;
}
default bool eqfp(Footprint pi, Footprint xi) = false;
default bool equivfp(Footprint pi, Footprint xi) = eqfp(pi,xi);
// strong equivalence relies on natural equality of footprints
// (i.e., == of rels)
bool eqps(BGFProduction p1, BGFProduction p2) = eqps(makesig(p1),makesig(p2));
default bool eqps(Signature p, Signature q) = geqps(p,q,bool(p,q){return p == q;});
default bool eqps(Signature p, Signature q) = geqps(p,q,eqfp,true);
// weak equivalence relies on equivalence of footprints
bool weqps(BGFProduction p1, BGFProduction p2) = weqps(makesig(p1),makesig(p2));
default bool weqps(Signature p, Signature q) = geqps(p,q,eqfp);
default bool weqps(Signature p, Signature q) = geqps(p,q,equivfp,false);
// footprint-comparator-parametrised equivalence
bool geqps(Signature p, Signature q, bool(Footprint,Footprint) cmp)// = p == q;
bool geqps(Signature p, Signature q, bool(Footprint,Footprint) cmp, bool strong)// = p == q;
{
if (strong && len(p) != len(q)) return false;
for (<n,pi> <- p)
{
bool match = false;
//for(<m,xi> <- q, cmp(pi,xi))
for(<m,xi> <- q, cmp(pi,xi))
if (match)
return false; // multiple matches!
else
{
match = true;
q -= <m,xi>;
q -= {<m,xi>};
}
if (!match)
if (strong && !match)
return false;
}
// all matched!
Expand All @@ -78,9 +96,10 @@ NameMatch makenamematch(BGFProduction p1, BGFProduction p2) = makenamematch(make
NameMatch makenamematch(Signature p, Signature q)
{
NameMatch nm = {};
set[str] unmatched = range(q);
for (<a,pi> <- p, <b,xi> <- q, !eqfp(pi,xi))
set[str] unmatched = domain(q);
for (<a,pi> <- p, <b,xi> <- q, eqfp(pi,xi))
{
//println("Checking <a>:<pi> vs <b>:<xi>...<eqfp(pi,xi)>");
nm += <a,b>;
// TODO: should exit if we want to work with non-equivalent signatures.
// But do we, really?
Expand All @@ -90,3 +109,14 @@ NameMatch makenamematch(Signature p, Signature q)
nm += {<"",c> | c <- unmatched};
return nm;
}
public str pp(Signature sig) = "\<"+joinStrings(["<n>: <pp(f)>" | <n,f> <- sig],", ")+"\>";
public str pp(fpnt()) = "1";
public str pp(fpopt()) = "?";
public str pp(fpplus()) = "+";
public str pp(fpstar()) = "*";
public str pp(fpmany(L)) = joinStrings([pp(f) | f <- L],"");
public str pp(fpempty()) = "0";
public default str pp(Footprint sig) = "XXX";
public str pp(NameMatch nm) = "\<"+joinStrings(["<(n=="")?"OMEGA":n> = <m>" | <n,m> <- nm],", ")+"\>";
134 changes: 134 additions & 0 deletions shared/rascal/src/converge/PureGuided.rsc
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
@contributor{Vadim Zaytsev - vadim@grammarware.net - SWAT, CWI}
module converge::PureGuided

import syntax::BGF;
import syntax::XBGF;
import syntax::CBGF;
import analyse::Prodsigs;
import analyse::Metrics;
import normal::ANF;
import export::BNF;
import io::ReadBGF;
import transform::XBGF;
import transform::CBGF;
import lib::Rascalware;
//import IO;

list[str] sources =
//["antlr","dcg","ecore","emf","jaxb","om","python","rascal-a","rascal-c","sdf","txl","xsd"];
["sdf"];

bool conflicted(NameMatch a, NameMatch b)
{
println("a o b = <a o b>");
return !isEmpty(a o b);
}

tuple[NameMatch,BGFProdList,BGFProdList]
matchProds(NameMatch known, BGFProdList mps, BGFProdList sps)
{
BGFProdList ps1 = [*prodsOfN(n,mps) | <n,_> <- known];
BGFProdList ps2 = [*prodsOfN(n,sps) | <n,_> <- known];
println("Trying to match production rules:");
for (p <- ps1) println(" <pp(p)>\t <pp(analyse::Prodsigs::makesig(p))>");
println(" vs");
for (p <- ps2) println(" <pp(p)>\t <pp(analyse::Prodsigs::makesig(p))>");
// check for strong prodsig-equivalence first
println("Looking for strong equivalence.");
//println("<pp(analyse::Prodsigs::makesig(p1))> vs <pp(analyse::Prodsigs::makesig(p2))>");
//println("Equality: <analyse::Prodsigs::eqps(p1,p2)>; equivalence: <analyse::Prodsigs::weqps(p1,p2)>");
for (p1 <- ps1, p2 <- ps2, analyse::Prodsigs::eqps(p1,p2))
{
nm = analyse::Prodsigs::makenamematch(p1,p2);
//println("Found prodsig-equivalent production rules:\n <pp(p1)> &\n <pp(p2)>");
println("Found prodsig-equivalent production rules: <pp(nm)>");
if (!isEmpty(nm-known))
println("Will assume that <pp(nm)> after <pp(known)>");
return <nm, mps - p1, sps - p2>;
}
// check for weak prodsig-equivalence now
println("Looking for weak equivalence.");
for (p1 <- ps1, p2 <- ps2, analyse::Prodsigs::weqps(p1,p2))
{
nm = analyse::Prodsigs::makenamematch(p1,p2);
//println("Found weakly prodsig-equivalent production rules:\n <pp(p1)> &\n <pp(p2)>");
println("Found weakly prodsig-equivalent production rules: <pp(nm)>");
if (conflicted(nm,known))
println("Naming conflict, reconsider.");
else
{
if (!isEmpty(nm-known))
println("Will assume that <pp(nm)> after <pp(known)>");
return <nm, mps - p1, sps - p2>;
}
}
//println(assumeRenamings(servant,known));
println("No match found.");
}
BGFProdList assumeRenamings(BGFProdList where, NameMatch naming)
{
BGFProdList ps = where;
for (<n1,n2> <- naming)
if (n1 != n2 && n2 in allNs(ps) && n1 != "")
ps = transform(forward([renameN_renameN(n2,n1)]),grammar([],ps)).prods;
return ps;
}
void converge(BGFGrammar master, BGFGrammar servant)
{
println("Master grammar:\n<pp(master)>");
CBGFSequence acbgf = []; // normalisation
CBGFSequence ncbgf = []; // nominal matching
CBGFSequence scbgf = []; // structural matching
//println("Input: <src>");
println("Normalising the grammar...");
ncbgf = normal::ANF::normalise(servant);
servant = transform(forward(ncbgf),servant);
//iprintln(ncbgf);
println("Servant grammar:\n<pp(servant)>");
println("Starting with the root: <master.roots>, <servant.roots>.");
// TODO: multiple roots
NameMatch known = {<master.roots[0],servant.roots[0]>};
ps1 = master.prods;
ps2 = assumeRenamings(servant.prods, known);
int cx = 10;
//println("Let\'s go!\n<isEmpty(ps1)>");
while(!isEmpty(ps1))
{
print("...<cx>...");
cx -= 1;
<nnm,ps1a,ps2a> = matchProds(known, ps1, ps2);
ps1 = ps1a;
ps2 = assumeRenamings(ps2a,nnm);
known = known + nnm;
if (cx==0)
break;
}
println("Done with the grammar.");
println("Nominal matching: <pp(known)>");
println("<pp(servant)>");
}
public void main()
{
master = loadSimpleGrammar(|home:///projects/slps/topics/convergence/guided/bgf/master.bgf|);
for (src <- sources)
converge(master,loadSimpleGrammar(|home:///projects/slps/topics/convergence/guided/bgf/<src>.bgf|));
println("Done.");
}
BGFGrammar loadSimpleGrammar(loc l)
{
BGFGrammar g = readBGF(l), q;
//return g;
//we simplify our life by converting built-in types ("values") to regular nonterminals
if (/val(string()) := g)
q = transform([replace(val(string()),nonterminal("STRING"),globally())],g);
else
q = g;
if (/val(integer()) := q)
q = transform([replace(val(integer()),nonterminal("INTEGER"),globally())],q);
//return <g,q>;
return q;
}
59 changes: 59 additions & 0 deletions shared/rascal/src/export/BNF.rsc
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
@contributor{Vadim Zaytsev - vadim@grammarware.net - SWAT, CWI}
module export::BNF

import IO;
import syntax::BGF;
import lang::xml::DOM;
import List;
import String;

public str pp(BGFGrammar bgf)
= (isEmpty(bgf.roots) ? "" : "Roots: <bgf.roots>\n")
+ pp(bgf.prods);

public str pp(BGFProdList ps)
{
str s = "";
for(p <- ps)
s += "<pp(p)>\n";
return trim(s);
}
public str pp(BGFProduction p)
= (p.label!="" ? "[<p.label>] " : "")
+ "<p.lhs> ::= <pp(p.rhs)>";
public str pp(BGFExprList es)
{
str s = "";
for(e <- es)
s += "<pp(e)> ";
return s;
}
public str ppc(BGFExprList es)
{
str s = "<pp(es[0])>";
for(e <- tail(es))
s += " | <pp(e)>";
return s;
}
public str pp(epsilon()) = "EPSILON";
public str pp(empty()) = "EMPTY";
public str pp(val(string())) = "STR";
public str pp(val(integer())) = "INT";
public str pp(anything()) = "ANYTHING";
public str pp(terminal(str s)) = "\"<s>\"";
public str pp(nonterminal(str s)) = "<s>";
public str pp(selectable(s,e)) = "<s>::<pp(e)>";
public str pp(sequence(L)) = "(<pp(L)>)";
public str pp(choice(L)) = "(<ppc(L)>)";
public str pp(marked(e)) = "\<<pp(e)>\>";
public str pp(optional(e)) = "<pp(e)>?";
public str pp(plus(e)) = "<pp(e)>+";
public str pp(star(e)) = "<pp(e)>*";
public str pp(seplistplus(e1,e2)) = "{<pp(e1)> <pp(e2)>}+";
public str pp(sepliststar(e1,e2)) = "{<pp(e1)> <pp(e2)>}*";
public default str pp(BGFExpression e) = "UNKNOWN";
Loading

0 comments on commit fc2e428

Please sign in to comment.