Skip to content

Commit

Permalink
Guided Grammar Convergence: treating multiple hypotheses is implement…
Browse files Browse the repository at this point in the history
…ed correctly (finally!)
  • Loading branch information
grammarware committed Jul 6, 2012
1 parent d7c39e2 commit 06e4bbb
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 161 deletions.
4 changes: 2 additions & 2 deletions shared/rascal/src/analyse/Prodsigs.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,15 @@ default bool weqps(Signature p, Signature q) = geqps(p,q,equivfp,false);
// footprint-comparator-parametrised equivalence
bool geqps(Signature p, Signature q, bool(Footprint,Footprint) cmp, bool strong)// = p == q;
{
println("[?] Checking <pp(p)> and <pp(q)> for <strong?"strong":"weak"> equivalence.");
//println("[?] Checking <pp(p)> and <pp(q)> for <strong?"strong":"weak"> equivalence.");
if (strong && len(p) != len(q)) return false;
for (<n,pi> <- p)
{
bool match = false;
for(<m,xi> <- q, cmp(pi,xi))
{
// TODO: make recursive
println(" [?] <n> == <m> as <pp(pi)>?");
//println(" [?] <n> == <m> as <pp(pi)>?");
//if (match)
// return false; // multiple matches!
//else
Expand Down
222 changes: 95 additions & 127 deletions shared/rascal/src/converge/PureGuided.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import export::BNF;
import io::ReadBGF;
import io::WriteBGF;
import transform::XBGF;
import transform::library::Core;
import transform::CBGF;
import lib::Rascalware;
import IO;
Expand All @@ -19,13 +20,12 @@ import Relation;

list[str] sources =
//["antlr","dcg","ecore","emf","jaxb","om","python","rascal-a","rascal-c","sdf","txl","xsd"];
//["emf","jaxb","om","rascal-c","sdf","xsd"];
["txl"];
//["emf","jaxb","om","rascal-c","sdf","xsd","txl"];
["python"];
// atom/expr: antlr, dcg
// arg/string: ecore, rascal-a
// good: emf, jaxb, om, rascal-c, sdf, xsd
// good: emf, jaxb, om, rascal-c, sdf, xsd, txl
// multiroot: python
// unknown: txl

BGFProduction getSingleProd(str n, BGFProdList ps)
{
Expand All @@ -43,117 +43,9 @@ BGFProduction unwind(BGFProduction p1, BGFProdList ps1)
bool strong(BGFProduction p1, BGFProdList ps1, BGFProduction p2, BGFProdList ps2)
= analyse::Prodsigs::eqps(unwind(p1,ps1),unwind(p2,ps2));

//bool strong(BGFProduction p1, BGFProdList ps1, BGFProduction p2, BGFProdList ps2)
//{
// println("<unwind(p1,ps1)> vs <unwind(p2,ps2)>");
// s1 = analyse::Prodsigs::makesig(unwind(p1,ps1));
// s2 = analyse::Prodsigs::makesig(unwind(p2,ps2));
// println("<s1> vs <s2>");
// println("EQ: <analyse::Prodsigs::eqps(s1,s2)>; EQUIV: <analyse::Prodsigs::weqps(s1,s2)>");
// return analyse::Prodsigs::eqps(unwind(p1,ps1),unwind(p2,ps2));
//}

bool weak(BGFProduction p1, BGFProdList ps1, BGFProduction p2, BGFProdList ps2)
= analyse::Prodsigs::weqps(unwind(p1,ps1),unwind(p2,ps2));

NameMatch tryHypothesis(NameMatch known, BGFProdList mps, BGFProdList sps, int cx)
{
NameMatch nnm;
BGFProdList ps1, ps2, ps1a, ps2a;
while(!isEmpty(mps))
{
print("...<cx>...");
cx -= 1;
//<nnm,ps1a,ps2a> = matchProds(known, mps, sps);
// BEGIN
BGFProdList ps1 = [*prodsOfN(n,mps) | <n,_> <- known];
BGFProdList ps2 = [*prodsOfN(n,sps) | <n,_> <- known];
println("Trying to match production rules:");
for (p <- ps1) println(" <pp(p)>\t <pp(analyse::Prodsigs::makesig(p))>");
println(" vs");
for (p <- ps2) println(" <pp(p)>\t <pp(analyse::Prodsigs::makesig(p))>");
// check for strong prodsig-equivalence first, then for the weak one
megabreak = false;
for (bool(BGFProduction,BGFProdList,BGFProduction,BGFProdList) eq <- [strong,weak])
{
println("Looking for <eq> equivalence.");
for (p1 <- ps1, p2 <- ps2, eq(p1,mps,p2,sps))
{
for (nm <- analyse::Prodsigs::makenamematches(p1,p2))
{
println("Trying <nm>...");
truenm = tryMatch(nm,known,p1,mps,p2,sps);
println("Got <truenm>...");
if (!isEmpty(invert(truenm) o known))
println("Naming conflict, reconsider.");
else
{
nnm = truenm;
ps1a = mps - p1;
ps2a = sps - p2;
megabreak = true;
break;
}
}
if (megabreak) break;
}
if (megabreak) break;
}
println("No match found.");
// END
mps = ps1a;
sps = assumeRenamings(ps2a,nnm);
known = known + nnm;
if (cx==0)
break;
}
return known;
}
tuple[NameMatch,BGFProdList,BGFProdList]
matchProds(NameMatch known, BGFProdList mps, BGFProdList sps)
{
BGFProdList ps1 = [*prodsOfN(n,mps) | <n,_> <- known];
BGFProdList ps2 = [*prodsOfN(n,sps) | <n,_> <- known];
println("Trying to match production rules:");
for (p <- ps1) println(" <pp(p)>\t <pp(analyse::Prodsigs::makesig(p))>");
println(" vs");
for (p <- ps2) println(" <pp(p)>\t <pp(analyse::Prodsigs::makesig(p))>");
//println("<pp(analyse::Prodsigs::makesig(p1))> vs <pp(analyse::Prodsigs::makesig(p2))>");
//println("Equality: <analyse::Prodsigs::eqps(p1,p2)>; equivalence: <analyse::Prodsigs::weqps(p1,p2)>");
// check for strong prodsig-equivalence first, then for the weak one
for (bool(BGFProduction,BGFProdList,BGFProduction,BGFProdList) eq <- [strong,weak])
{
println("Looking for <eq> equivalence.");
for (p1 <- ps1, p2 <- ps2, eq(p1,mps,p2,sps))
{
//nm = analyse::Prodsigs::makenamematch(p1,p2);
//nms = analyse::Prodsigs::makenamematches(p1,p2);
//if (len(nms)==1)
// nm = getOneFrom(nms);
//else
// println("----multiple versions!");
//println("!!!nm = <nm>");
//println("!!!nms = <nms>");
//
for (nm <- analyse::Prodsigs::makenamematches(p1,p2))
{
truenm = tryMatch(nm,known,p1,mps,p2,sps);
if (!isEmpty(invert(truenm) o known))
println("Naming conflict, reconsider.");
else
return <truenm, mps - p1, sps - p2>;
}
}
}
println("No match found.");
}
NameMatch tryMatch( NameMatch nm, NameMatch known,
BGFProduction p1, BGFProdList mps,
BGFProduction p2, BGFProdList sps)
Expand All @@ -176,21 +68,87 @@ NameMatch tryMatch( NameMatch nm, NameMatch known,
println("Will assume that <a> == <b>");
truenm += <a,b>;
}
//if (isEmpty(invert(truenm) o known))
return truenm;
// println("Naming conflict, reconsider.");
//return {};
return truenm;
}
BGFProdList assumeRenamings(BGFProdList where, NameMatch naming)
{
BGFProdList ps = where;
BGFProdList ps = [p | p <- where, <"",p.lhs> notin naming];
for (<n1,n2> <- naming)
if (n1 != n2 && n2 in allNs(ps) && n1 != "")
ps = transform(forward([renameN_renameN(n2,n1)]),grammar([],ps)).prods;
if (n1 != n2 && n2 in allNs(ps) && n1 notin [""])
// dirty
ps = transform::library::Core::performRenameN(n2,n1,grammar([],ps)).prods;
//if (n1 in ["STRING","INTEGER"])
// ps = transform(forward([replace_replace(nonterminal(n2),nonterminal(n1),globally())]),grammar([],ps)).prods;
//else
// ps = transform(forward([renameN_renameN(n2,n1)]),grammar([],ps)).prods;
return ps;
}
set[NameMatch] nominalMatch(NameMatch known, BGFProdList mps, BGFProdList sps)
{
if (isEmpty(mps))
{
if (!isEmpty(sps))
println("Disregarded servant production rules: <sps>");
return {known};
}
NameMatch nnm;
//BGFProdList ps1, ps2, ps1a, ps2a;
BGFProdList ps1 = [*prodsOfN(n,mps) | <n,_> <- known];
BGFProdList ps2 = [*prodsOfN(n,sps) | <n,_> <- known];
println("Trying to match production rules:");
for (p <- ps1) println(" <pp(p)>\t <pp(analyse::Prodsigs::makesig(p))>");
println(" vs");
for (p <- ps2) println(" <pp(p)>\t <pp(analyse::Prodsigs::makesig(p))>");
// check for strong prodsig-equivalence first, then for the weak one
megabreak = false;
for (bool(BGFProduction,BGFProdList,BGFProduction,BGFProdList) eq <- [strong,weak])
{
println("Looking for <split(" ",split("(","<eq>")[0])[1]> equivalence.");
for (p1 <- ps1, p2 <- ps2, eq(p1,mps,p2,sps))
{
matches = analyse::Prodsigs::makenamematches(p1,p2);
if (isEmpty(matches))
return {}; //rollback?
nms = {};
for (nm <- matches)
{
//println("Trying <pp(nm)>...");
truenm = tryMatch(nm,known,p1,mps,p2,sps);
//println("Got <pp(truenm)> with <pp(known)>...");
if (!isEmpty(invert(truenm) o known) || !isEmpty(truenm o invert(known)))
println("Naming conflict, reconsider.");
else
{
newmatch = nominalMatch(known + truenm, mps - p1, assumeRenamings(sps - p2, truenm));
if (!isEmpty(newmatch))
nms += newmatch;
//nnm = truenm;
//ps1a = mps - p1;
//ps2a = sps - p2;
//megabreak = true;
//break;
}
}
if (!isEmpty(nms))
return nms;
//if (megabreak) break;
}
//if (megabreak) break;
}
println("No match found.");
// END
//mps = ps1a;
//sps = assumeRenamings(ps2a,nnm);
//known = known + nnm;
return {};
}
//BGFGrammar
NameMatch converge(BGFGrammar master, BGFGrammar servant)
{
Expand All @@ -202,28 +160,38 @@ NameMatch converge(BGFGrammar master, BGFGrammar servant)
println("Normalising the grammar...");
acbgf = normal::ANF::normalise(servant);
servant = transform(forward(acbgf),servant);
println("<pp(servant)>");
//iprintln(ncbgf);
println("Servant grammar:\n<pp(servant)>");
println("Starting with the root: <master.roots>, <servant.roots>.");
println("------------------------------");
// TODO: multiple roots
NameMatch known = {<master.roots[0],servant.roots[0]>};
ps1 = master.prods;
ps2 = assumeRenamings(servant.prods, known);
known = tryHypothesis(known,ps1,ps2,2);
NameMatch known;
//= {<master.roots[0],servant.roots[0]>};
set[NameMatch] nknown = {};
//ps1 = master.prods;
//ps2 = assumeRenamings(servant.prods, known);
for (rootmatch <- {<r1,r2> | r1 <- master.roots, r2 <- servant.roots})
nknown += nominalMatch({rootmatch}, master.prods, assumeRenamings(servant.prods, {rootmatch}));
if (len(nknown)==1)
known = getOneFrom(nknown);
elsefor (k <- nknown)
println("Got version: <pp(k)>");
println("Done with the grammar.");
println("Nominal matching: <pp(known)>");
println("[!] Nominal matching: <pp(known)>");
for (<a,b> <- known)
if (a==b)
;
elseif (a=="")
;
else
ncbgf += renameN_renameN(b,a);
println("Done with the grammar.");
// Assume nominal matching!
servant = transform(forward(ncbgf),servant);
println("<pp(servant)>");
//println("<pp(servant)>");
//return servant;
return known;
}
Expand Down
4 changes: 3 additions & 1 deletion shared/rascal/src/lib/Rascalware.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,6 @@ public bool multiseteq(list[&T] xs, list[&T] ys) = sort(xs) == sort(ys);
public set[&T] toSet(list[&T] x) = List::toSet(x);
public list[&T] toList(set[&T] x) = Set::toList(x);
public &T getOneFrom(set[&T] x) = Set::getOneFrom(x);
public &T getOneFrom(set[&T] x) = Set::getOneFrom(x);
public list[&T] slice(list[&T] lst, int begin, int l) = List::slice(lst,begin,l);
44 changes: 22 additions & 22 deletions shared/rascal/src/normal/ANF.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ CBGFSequence normAllStages(BGFGrammar gr)
dropAllTerminals,
dropAllHorizontals,
dropAllUnknowns,
dropAllChains
dropAllChains,
dropAllChainsToValues
])
{
c2 = f(g);
Expand Down Expand Up @@ -65,22 +66,6 @@ CBGFSequence normAllStages(BGFGrammar gr)
}
//c1 += horizontal_vertical(innt(n));
}
//elseif (production(_,n,choice(L)) := ps[0])
//{
// //println("Horizontal!");
// // go over all horizontal production rules
// for (e <- L)
// if (nonterminal(_) !:= e)
// {
// c2 = [extract_inline(production("",uniqueName(n,allNs(g)),e),innt(n))];
// // global extract can introduce conflicts with subsequent extracts,
// // that's why we need to transform immediately
// g = transform(forward(c2),g);
// c1 += c2;
// }
//}
//else
// iprintln(ps);
}
//iprintln(c1);
// now we can have constuctions like this:
Expand Down Expand Up @@ -124,11 +109,26 @@ CBGFSequence dropAllChains(BGFGrammar g)
{
CBGFSequence cbgf = [];
set[str] defined = definedOnceNs(g.prods);
for(p <- g.prods, nonterminal(str n) := p.rhs)
if (n == p.lhs)
cbgf += abridge_detour(p);
elseif (n in defined && n notin usedNs(g.prods - p))
cbgf += unchain_chain(p);
for(p <- g.prods)
if (nonterminal(str n) := p.rhs)
{
if (n == p.lhs)
cbgf += abridge_detour(p);
elseif (n in defined && n notin usedNs(g.prods - p))
cbgf += unchain_chain(p);
}
//elseif (val(_) := p.rhs)
// cbgf += unfold_fold(p.lhs,globally());
return cbgf;
}
CBGFSequence dropAllChainsToValues(BGFGrammar g)
{
CBGFSequence cbgf = [];
set[str] defined = definedOnceNs(g.prods);
set[str] used = usedNs(g.prods);
for(p <- g.prods, val(_) := p.rhs, p.lhs in defined, p.lhs in used)
cbgf += inline_extract(p,globally());
return cbgf;
}
Expand Down
Loading

0 comments on commit 06e4bbb

Please sign in to comment.