diff --git a/topics/export/hypertext/bgf2fancy.xslt b/topics/export/hypertext/bgf2fancy.xslt index 43ad0175..bacfcbb6 100644 --- a/topics/export/hypertext/bgf2fancy.xslt +++ b/topics/export/hypertext/bgf2fancy.xslt @@ -77,30 +77,30 @@
  • Number of defined nonterminal symbols: - - - - - - +
  • Root nonterminal symbols: - - - + + + - - + + , + + + + +
  • - Top nonterminal symbols: + Other top nonterminal symbols: @@ -148,7 +148,12 @@
  • Number of used terminal symbols: - + + + + + +
  • diff --git a/topics/grammars/ada/Makefile b/topics/grammars/ada/Makefile index 8c5aaae5..da8e8053 100644 --- a/topics/grammars/ada/Makefile +++ b/topics/grammars/ada/Makefile @@ -1,9 +1,9 @@ -build: - cd kempe && make build - cd kellogg && make build - cd laemmel-verhoef && make build - cd lncs-2219 && make build - cd lncs-4348 && make build +extract: + cd kempe && make extract + cd kellogg && make extract + cd laemmel-verhoef && make extract + cd lncs-2219 && make extract + cd lncs-4348 && make extract clean: cd kempe && make clean @@ -20,12 +20,12 @@ test: cd lncs-4348 && make test diff: - cp kempe/ada.bgf kempe.bgf - cp laemmel-verhoef/ada.bgf laemmel-verhoef.bgf - cp lncs-2219/ada.bgf lncs-2219.bgf - cp lncs-4348/ada.bgf lncs-4348.bgf - make build + make extract ../../../shared/tools/gdts kempe/ada.bgf kempe.bgf || exit -1 ../../../shared/tools/gdts laemmel-verhoef/ada.bgf laemmel-verhoef.bgf || exit -1 ../../../shared/tools/gdts lncs-2219/ada.bgf lncs-2219.bgf || exit -1 ../../../shared/tools/gdts lncs-4348/ada.bgf lncs-4348.bgf || exit -1 + cp kempe/ada.bgf kempe.bgf + cp laemmel-verhoef/ada.bgf laemmel-verhoef.bgf + cp lncs-2219/ada.bgf lncs-2219.bgf + cp lncs-4348/ada.bgf lncs-4348.bgf diff --git a/topics/grammars/ada/hunter.py b/topics/grammars/ada/hunter.py deleted file mode 100755 index 9b9c4373..00000000 --- a/topics/grammars/ada/hunter.py +++ /dev/null @@ -1,677 +0,0 @@ -#!/usr/local/bin/python -# -*- coding: utf-8 -*- -import os, sys -import elementtree.ElementTree as ET -sys.path.append(os.getcwd().split('slps')[0]+'slps/shared/python') -import BGF, slpsns - -config = {} - -debug = False - -def isAlpha(x): - return reduce(lambda a,b:a and b=='_' or b.isalnum(),x,True) - -def isQNumber(x): - if x =='.': - return False - else: - return reduce(lambda a,b:a and b=='.' or b.isdigit(),x,True) - -def splitTokenStream(s): - ts = [s[0]] - i = 1 - alpha = isAlpha(s[0]) - while (i>>>>',poss - print 'ats >>>>>>',ats - for i in range(0,len(poss[0])): - if poss[0][i][0] == poss[1][i][0]: - # same alternative - tmp = ats[poss[0][i][0]][:poss[0][i][1]] - tmp.append([ats[poss[0][i][0]][poss[0][i][1]:poss[1][i][1]][1:]]) - tmp.extend(ats[poss[0][i][0]][poss[1][i][1]+1:]) - ats[poss[0][i][0]] = tmp - #print '-->',tmp - else: - newats = [] - for j in range(0,len(ats)): - if jposs[1][i][0]: - newats.append(ats[j]) - elif j==poss[0][i][0]: - before = ats[j][:poss[0][i][1]] - tmp = [ats[j][poss[0][i][1]+1:]] - elif j==poss[1][i][0]: - tmp.append(ats[j][:poss[1][i][1]]) - before.append(tmp) - before.extend(ats[j][poss[1][i][1]+1:]) - newats.append(before) - else: - tmp.append(ats[j]) - ats = newats - #print '--> not implemented yet' - if len(ats) == 1 and len(ats[0]) == 1 and type(ats[0][0]) == type([]): - ats = ats[0][0] - return ats - -def findSpecialGroups(ats,start,end): - global debug - poss = [[],[]] - i = j = level = 0 - lp = {} - for i in range(0,len(ats)): - for j in range(0,len(ats[i])): - if ats[i][j] == start: - poss[0].append((i,j)) - poss[1].append(None) - lp[level] = len(poss[0])-1 - level += 1 - if ats[i][j] == end: - level -= 1 - poss[1][lp.pop(level)] = (i,j) - if len(poss[0]) != len(poss[1]): - print 'STEP 6 deadlock: number of start-?-symbol and end-?-symbol occurrences do not match.' - return ats - if debug and poss[0]: - print 'poss >>>>>',poss - print 'ats >>>>>>',ats - for i in range(0,len(poss[0])): - if poss[0][i][0] != poss[1][i][0]: - newats = [] - for j in range(0,len(ats)): - if jposs[1][i][0]: - newats.append(ats[j]) - elif j==poss[0][i][0]: - before = ats[j][:poss[0][i][1]+1] - tmp = [ats[j][poss[0][i][1]+1:]] - elif j==poss[1][i][0]: - tmp.append(ats[j][:poss[1][i][1]]) - before.append(tmp) - before.extend(ats[j][poss[1][i][1]:]) - newats.append(before) - else: - tmp.append(ats[j]) - ats = newats - if len(ats) == 1 and len(ats[0]) == 1 and type(ats[0][0]) == type([]): - ats = ats[0][0] - return ats - -def mapsymbols(ts,symbol,special): - for i in range(0,len(ts)): - if ts[i] == symbol: - ts[i] = special - elif type(ts[i]) == type([]): - ts[i] = mapsymbols(ts[i],symbol,special) - return ts - -def mapmany(s): - global debug - i = 0 - es = [] - while(i0: - subexprs.append(s[i]) - if s[i] == 'START-OPTION-SYMBOL': - level += 1 - if s[i] == 'END-OPTION-SYMBOL': - level -= 1 - i += 1 - i -= 1 - if debug: - print '>>>opt>>>',subexprs[:-1] - se = mapmany(subexprs[:-1]) - opt = BGF.Optional() - opt.setExpr(se) - es.append(opt) - elif s[i] == 'START-REPEAT-SYMBOL': - if debug: - print 'REPEAT GO' - subexprs = [] - i += 1 - level = 1 - while level>0: - subexprs.append(s[i]) - if s[i] == 'START-REPEAT-SYMBOL': - level += 1 - if s[i] == 'END-REPEAT-SYMBOL': - level -= 1 - i += 1 - i -= 1 - if debug: - print '>>>rep>>>',subexprs[:-1] - se = mapmany(subexprs[:-1]) - opt = BGF.Star() - opt.setExpr(se) - es.append(opt) - else: - es.append(map2expr(s[i])) - i += 1 - if len(es) == 1: - e = BGF.Expression(es[0]) - else: - e = BGF.Sequence() - for a in es: - e.add(BGF.Expression(a)) - return e - -def endOfContext(a,i,e): - s = a[i] - level = 1 - j = i+1 - while level>0 and j>>zz>>>',z - return z - -def map2expr(ss): - global debug - ess = [] - es = [] - i = 0 - while i>>context>>>',ss[i:j] - e = BGF.Star() - e.setExpr(map2expr(ss[i+1:j-1])) - es.append(e) - i = j - elif ss[i] == 'START-PLUS-SYMBOL': - j = endOfContext(ss,i,'END-PLUS-SYMBOL') - if debug: - print '>>>context>>>',ss[i:j] - e = BGF.Plus() - e.setExpr(map2expr(ss[i+1:j-1])) - es.append(e) - i = j - elif ss[i] == 'START-OPTION-SYMBOL': - j = endOfContext(ss,i,'END-OPTION-SYMBOL') - if debug: - print '>>>context>>>',ss[i:j] - e = BGF.Optional() - e.setExpr(map2expr(ss[i+1:j-1])) - es.append(e) - i = j - elif ss[i] == 'START-GROUP-SYMBOL': - j = endOfContext(ss,i,'END-GROUP-SYMBOL') - if i == 0 and j == len(ss): - ss = ss[1:-1] - continue - if debug: - print '>>>context>>>',ss[i:j] - es.append(map2expr(ss[i+1:j-1])) - i = j - elif ss[i] == 'DEFINITION-SEPARATOR-SYMBOL': - if debug: - print 'It is a bar! It is a choice!' - ess.append(es) - es = [] - i += 1 - elif ss[i][0] == config['start-terminal-symbol']: - if debug: - print 'TERMINAL',ss[i][1:-1] - e = BGF.Terminal() - e.setName(ss[i][1:-1]) - es.append(e) - i += 1 - else: - if debug: - print 'NONTERMINAL',ss[i] - e = BGF.Nonterminal() - e.setName(ss[i]) - es.append(e) - i += 1 - ess.append(es) - if len(ess) == 0: - print 'Serialisation error: empty output sequence!' - return - elif len(ess) == 1: - if len(ess[0]) == 0: - print 'Serialisation error: empty internal output sequence!' - return BGF.Expression(BGF.Epsilon()) - elif len(ess[0]) == 1: - return BGF.Expression(ess[0][0]) - else: - e = BGF.Sequence() - for x in ess[0]: - e.add(BGF.Expression(x)) - return BGF.Expression(e) - else: - e = BGF.Choice() - for es in ess: - if len(es) == 0: - print 'Serialisation error: empty internal output sequence!' - return BGF.Expression(BGF.Epsilon()) - elif len(es) == 1: - e.add(BGF.Expression(es[0])) - else: - ee = BGF.Sequence() - for x in es: - ee.add(BGF.Expression(x)) - e.add(BGF.Expression(ee)) - return BGF.Expression(e) - print 'Dead code reached!' - return - -def mapglue(b,s): - a = b[:] - i = 0 - while i < len(a): - if type(a[i])==type([]): - a[i] = mapglue(a[i],s) - i += 1 - continue - if a[i][0] != s[0]: - # first char mismatch - i += 1 - continue - combined = a[i] - j = i+1 - while j0 and not isAlpha(q[-1][1:-1]) and not isAlpha(x): - # addition on terminals - q[-1] = q[-1][:-1] + y[1:] - else: - q.append(y) - r = p[:2] - r.extend(q) - if debug and p != r: - print '>>>in>>>>',p - print '>>>out>>>',r - return r - -def assembleQualifiedNumbers(ts): - ds = [] - for x in ts: - if len(ds)>0 and (isQNumber(x) or x=='.') and isQNumber(ds[-1]): - ds[-1] += x - else: - ds.append(x) - return ds - -def decomposeSymbols(p,defd): - # [label, nt, ...] - q = p[:2] - for x in p[2:]: - match = False - if x not in defd and x.find(config['undefined-nonterminals-are-terminals'])>-1: - for d in defd: - if x[:len(d)] == d and x[len(d):].find(config['undefined-nonterminals-are-terminals'])<0: - nt = d - t = x[len(d):] - if debug: - print '->->->->->',x,'matches as',nt,'+',t - q.append(nt) - # todo: need to be adjusted if the order of phases is changed - #q.append(config['start-terminal-symbol']+t+config['end-terminal-symbol']) - q.append(t) - match = True - break - if x[-len(d):] == d and x[:-len(d)].find(config['undefined-nonterminals-are-terminals'])<0: - nt = d - t = x[:-len(d)] - if debug: - print '=>=>=>=>=>',x,'matches as',t,'+',nt - # todo: need to be adjusted if the order of phases is changed - #q.append(config['start-terminal-symbol']+t+config['end-terminal-symbol']) - q.append(t) - q.append(nt) - match = True - break - if not match: - q.append(x) - return q - -if __name__ == "__main__": - if len(sys.argv) != 4: - print 'Usage:' - print ' extract.py input.txt config.edd output.bgf' - sys.exit(-1) - #f = open('src.grammar.txt','r') - f = open(sys.argv[1],'r') - # STEP 0: read the file, remove whitespace (?) - print 'STEP 0: reading the file, removing whitespace, getting the configuration.' - tokens = splitTokenStream(f.read()) - f.close() - readConfig(sys.argv[2]) - #print tokens - # STEP 1: assemble terminal symbols - print 'STEP 1: assembling terminal symbols according to start-terminal-symbol and end-terminal-symbol.' - if 'start-terminal-symbol' in config.keys() and 'end-terminal-symbol' in config.keys(): - tokens = assembleBracketedSymbols(tokens,config['start-terminal-symbol'],config['end-terminal-symbol']) - else: - print 'STEP 1 skipped, sorry: start-terminal-symbol and end-terminal-symbol are not both specified.' - # STEP 2: assemble nonterminal symbols - print 'STEP 2: assembling nonterminal symbols according to start-nonterminal-symbol and end-nonterminal-symbol.' - if 'start-nonterminal-symbol' in config.keys() and 'end-nonterminal-symbol' in config.keys(): - tokens = assembleBracketedSymbols(tokens,config['start-nonterminal-symbol'],config['end-nonterminal-symbol']) - else: - print 'STEP 2 skipped, sorry: start-nonterminal-symbol and end-nonterminal-symbol are not both specified.' - # STEP 3: assembling composite metasymbols together - print 'STEP 3: assembling metasymbols according to their possible values.' - tokens = assembleQualifiedNumbers(tokens) - for k in config.keys(): - if len(config[k])>1: - print 'STEP 3: going to glue tokens that resemble', config[k].replace('\n','\\n') - tokens = mapglue(tokens,config[k]) - if debug: - print tokens - # STEP 4: slice according to defining-symbol - print 'STEP 4: splitting the token stream into productions according to defining-symbol.' - if 'defining-symbol' in config.keys(): - prods = useDefiningSymbol(tokens,config['defining-symbol']) - else: - print 'STEP 4 skipped, sorry: defining-symbol is not specified.' - # TODO - # STEP 4a.1: [sanity check] Infer terminator-symbol - print 'STEP 4: inferring terminator-symbol by looking at the productions.' - if 'terminator-symbol' not in config.keys(): - ts = findCommonTail(prods[:-1]) - if ts: - print 'STEP 4 successful: inferred terminator-symbol:',ts - config['terminator-symbol'] = ts - need2fix = [-1] - else: - (need2fix,ts) = findMostProbableTail(prods) - if ts: - print 'STEP 4 successful: inferred the most probable terminator-symbol:',ts - config['terminator-symbol'] = ts - else: - # ORLY? - print 'STEP 4 unsuccessful, sorry.' - for p in prods: - print '%40s'%p[1],'>>>>>>',p[-2:] - # STEP 4a.2: adjusting the terminator-symbol on the unfit productions - poststep4 = 0 - for f in need2fix: - for i in range(0,len(config['terminator-symbol'])): - if prods[f][-len(config['terminator-symbol'])+i:] == config['terminator-symbol'][:len(config['terminator-symbol'])-i]: - prods[f] = prods[f][:-len(config['terminator-symbol'])+i] - prods[f].extend(config['terminator-symbol']) - poststep4 += 1 - break - if poststep4 > 0: - print 'STEP 4 also adjusted',poststep4,'productions that did not quite fit the expectations.' - # STEP 4b: splitting the token stream into productions according to terminator-symbol; inferring defining-symbol - # TODO - prods = map(lambda p:p[:-(len(config['terminator-symbol']))] if p[-(len(config['terminator-symbol'])):] == config['terminator-symbol'] else p,prods) - # STEP 5: slice insides according to definition-separator-symbol - step5 = False - for s in ('definition-separator-symbol','start-group-symbol','end-group-symbol','start-star-symbol','end-star-symbol','start-plus-symbol','end-plus-symbol','start-option-symbol','end-option-symbol'): - if s in config.keys(): - print 'STEP 5: marking',s+'.' - step5 = True - prods = map(lambda p:map(lambda x:s.upper() if x==config[s] else x,p),prods) - if not step5: - print 'STEP 5 skipped: sorry, no metasymbols specified.' - # STEP 6: - print 'STEP 6: executing special extraction commands.' - step6 = False - defined = map(lambda x:x[1],prods) - defined.append(config['defining-symbol']) - if 'ignore-extra-newlines' in config.keys(): - print 'STEP 6: ignoring extra newlines.' - step6 = True - prods = map(lambda x:filter(lambda y:y!='\n',x),prods) - if 'decompose-symbols' in config.keys(): - print 'STEP 6 (part of rule 4): decomposing compound symbols.' - step6 = True - prods = map(lambda x:decomposeSymbols(x,defined),prods) - if 'undefined-nonterminals-are-terminals' in config.keys(): - print 'STEP 6 (rule 5): turning undefined nonterminals into terminals.' - step6 = True - if 'start-terminal-symbol' not in config.keys() and 'end-terminal-symbol' not in config.keys(): - config['start-terminal-symbol'] = config['end-terminal-symbol'] = '"' - prods = map(lambda p:map(lambda x:x if x in defined or x.find(config['undefined-nonterminals-are-terminals'])>-1 or (x.isupper() and len(x)>1) or x=='' else config['start-terminal-symbol']+x+config['end-terminal-symbol'],p),prods) - if 'glue-nonalphanumeric-terminals' in config.keys(): - print 'STEP 6 (part of rule 3): glueing non-alphanumeric terminal symbols together.' - step6 = True - prods = map(glueTerminals,prods) - if not step6: - print 'STEP 6 skipped, sorry: no special commands found in the configuration.' - # STEP X: validating bracketing? - # ... - # RESULT - if debug: - print 'RESULT:' - for p in prods: - print p[0],'is defined as:' - print '\t',p[2:] - # FINAL STEP: compose BGF - bgf = BGF.Grammar() - for q in prods: - p = BGF.Production() - if 'disregard-labels' not in config.keys() and q[0]: - p.setLabel(q[0]) - p.setNT(q[1]) - p.setExpr(map2expr(q[2:])) - bgf.addProd(p) - ET.ElementTree(bgf.getXml()).write(sys.argv[3]) - print 'FINAL STEP: BGF written.' diff --git a/topics/grammars/ada/kempe/Makefile b/topics/grammars/ada/kempe/Makefile index 69c1c055..ec956d85 100644 --- a/topics/grammars/ada/kempe/Makefile +++ b/topics/grammars/ada/kempe/Makefile @@ -1,8 +1,8 @@ -build: +extract: cat src.syntax.summary.txt | grep -v 'quotation mark' > src.prepared.txt perl -pi -w -e 's/{\|/{TERMINALBAR/g;' src.prepared.txt perl -pi -w -e 's/J\./0\./g;' src.prepared.txt - ../hunter.py src.prepared.txt config.edd ada-1.bgf + ../../hunter.py src.prepared.txt config.edd ada-1.bgf ../../../../shared/tools/xbgf post-extract.xbgf ada-1.bgf ada-2.bgf ../../../../shared/tools/xbgf correct.xbgf ada-2.bgf ada.bgf ../../../../shared/tools/bgf2bnf ada.bgf ada.bnf diff --git a/topics/grammars/ada/kempe/config.edd b/topics/grammars/ada/kempe/config.edd index 5545280b..e723c69d 100644 --- a/topics/grammars/ada/kempe/config.edd +++ b/topics/grammars/ada/kempe/config.edd @@ -7,7 +7,9 @@ ] { } - _ + + _ + diff --git a/topics/grammars/ada/laemmel-verhoef/Makefile b/topics/grammars/ada/laemmel-verhoef/Makefile index 91ff6736..406a04d6 100644 --- a/topics/grammars/ada/laemmel-verhoef/Makefile +++ b/topics/grammars/ada/laemmel-verhoef/Makefile @@ -1,5 +1,5 @@ -build: - ../hunter.py src.context.free.syntax.txt config.edd ada.raw.bgf +extract: + ../../hunter.py src.context.free.syntax.txt config.edd ada.raw.bgf ../../../../shared/tools/xbgf correct.xbgf ada.raw.bgf ada.bgf test: diff --git a/topics/grammars/ada/lncs-2219/Makefile b/topics/grammars/ada/lncs-2219/Makefile index 5b10e03f..29ff3e84 100644 --- a/topics/grammars/ada/lncs-2219/Makefile +++ b/topics/grammars/ada/lncs-2219/Makefile @@ -1,10 +1,10 @@ -build: +extract: cp src.syntax.summary.txt src.prepared.txt perl -pi -w -e 's/{\|/{TERMINALBAR/g;' src.prepared.txt perl -pi -w -e 's/–/-/g;' src.prepared.txt perl -pi -w -e 's/:\n/: /g;' src.prepared.txt perl -pi -w -e 's/J\./0\./g;' src.prepared.txt - ../hunter.py src.prepared.txt config.edd ada-1.bgf + ../../hunter.py src.prepared.txt config.edd ada-1.bgf ../../../../shared/tools/xbgf post-extract.xbgf ada-1.bgf ada-2.bgf ../../../../shared/tools/xbgf correct.xbgf ada-2.bgf ada.bgf ../../../../shared/tools/bgf2bnf ada.bgf ada.bnf @@ -14,4 +14,4 @@ test: ../../../../shared/tools/checkbgf ada.bgf clean: - rm -f *.bgf *.bnf src.prepared.txt + rm -f *.bgf *.bnf src.prepared.txt *.html diff --git a/topics/grammars/ada/lncs-2219/config.edd b/topics/grammars/ada/lncs-2219/config.edd index 1a1cbfe4..07be4039 100644 --- a/topics/grammars/ada/lncs-2219/config.edd +++ b/topics/grammars/ada/lncs-2219/config.edd @@ -7,9 +7,16 @@ ] { } - _ + + underline + digit + + _ + - + + terminate + diff --git a/topics/grammars/ada/lncs-2219/post-extract.xbgf b/topics/grammars/ada/lncs-2219/post-extract.xbgf index c028dd2f..dd687794 100644 --- a/topics/grammars/ada/lncs-2219/post-extract.xbgf +++ b/topics/grammars/ada/lncs-2219/post-extract.xbgf @@ -9,24 +9,6 @@ | - - - - underline - - - underline - - - - - - digit - - - digit - - diff --git a/topics/grammars/ada/lncs-4348/Makefile b/topics/grammars/ada/lncs-4348/Makefile index 4c6413bc..ba0cc5aa 100644 --- a/topics/grammars/ada/lncs-4348/Makefile +++ b/topics/grammars/ada/lncs-4348/Makefile @@ -1,9 +1,9 @@ -build: +extract: cp src.syntax.summary.txt src.prepared.txt perl -pi -w -e 's/{\|/{TERMINALBAR/g;' src.prepared.txt perl -pi -w -e 's/–/-/g;' src.prepared.txt perl -pi -w -e 's/J\./0\./g;' src.prepared.txt - ../hunter.py src.prepared.txt config.edd ada-1.bgf + ../../hunter.py src.prepared.txt config.edd ada-1.bgf ../../../../shared/tools/xbgf post-extract.xbgf ada-1.bgf ada-2.bgf ../../../../shared/tools/xbgf correct.xbgf ada-2.bgf ada.bgf ../../../../shared/tools/bgf2bnf ada.bgf ada.bnf @@ -13,4 +13,4 @@ test: ../../../../shared/tools/checkbgf ada.bgf clean: - rm -f *.bgf *.bnf src.prepared.txt + rm -f *.bgf *.bnf src.prepared.txt *.html diff --git a/topics/grammars/ada/lncs-4348/config.edd b/topics/grammars/ada/lncs-4348/config.edd index 1a1cbfe4..ac24504b 100644 --- a/topics/grammars/ada/lncs-4348/config.edd +++ b/topics/grammars/ada/lncs-4348/config.edd @@ -7,9 +7,15 @@ ] { } - _ + + underline + + _ + - + + terminate + diff --git a/topics/grammars/ada/lncs-4348/post-extract.xbgf b/topics/grammars/ada/lncs-4348/post-extract.xbgf index ff361121..dd687794 100644 --- a/topics/grammars/ada/lncs-4348/post-extract.xbgf +++ b/topics/grammars/ada/lncs-4348/post-extract.xbgf @@ -9,15 +9,6 @@ | - - - - underline - - - underline - - diff --git a/topics/grammars/eiffel/Makefile b/topics/grammars/eiffel/Makefile new file mode 100644 index 00000000..f4e29fad --- /dev/null +++ b/topics/grammars/eiffel/Makefile @@ -0,0 +1,14 @@ +extract: + cp src.manually.fixed.txt src.prepared.txt + perl -pi -w -e 's/C\+\+/CPP/g;' src.prepared.txt + ../hunter.py src.prepared.txt config.edd eiffel-1.bgf + ../../../shared/tools/xbgf post-extraction.xbgf eiffel-1.bgf eiffel-2.bgf + ../../../shared/tools/xbgf correct.xbgf eiffel-2.bgf eiffel.bgf + ../../../shared/tools/bgf2html eiffel.bgf eiffel.html + +test: + ls -1 *.bgf | xargs -n1 ../../../../shared/tools/validate bgf + ../../../../shared/tools/checkbgf ada.bgf + +clean: + rm -f *.bgf *.bnf src.prepared.txt *.html diff --git a/topics/grammars/eiffel/README.txt b/topics/grammars/eiffel/README.txt new file mode 100644 index 00000000..643a496b --- /dev/null +++ b/topics/grammars/eiffel/README.txt @@ -0,0 +1,22 @@ +INTERNATIONAL STANDARD ISO/IEC 25436 +First edition +2006-12-01 + +Information technology — Eiffel: Analysis, Design and Programming Language +Technologies de l'information — Eiffel: Langage d'analyse, conception et programmation + +Reference number ISO/IEC 25436:2006(E) + +© ISO/IEC 2006 +All rights reserved. Unless otherwise specified, no part of this publication may be reproduced or utilized in any form or by any means, electronic or mechanical, including photocopying and microfilm, without permission in writing from either ISO at the address below or ISO's member body in the country of the requester. +ISO copyright office Case postale 56 • CH-1211 Geneva 20 Tel. +41227490111 Fax +41227490947 E-mail copyright@iso.org Web www.iso.org +Published in Switzerland + +Obtained via http://standards.iso.org/ittf/PubliclyAvailableStandards/index.html + + + +NB by Vadim Zaytsev: + src.language.specification.txt contains the raw grammar copy-pasted from the all "Syntax:" sections of the PDF + src.manually.fixed.txt contains the same grammar with some layout fixed by hand + both are provided so that everyone can assess the extent of manual changes diff --git a/topics/grammars/eiffel/config.edd b/topics/grammars/eiffel/config.edd new file mode 100644 index 00000000..da7a7eaa --- /dev/null +++ b/topics/grammars/eiffel/config.edd @@ -0,0 +1,30 @@ + + + =∆ + | + " + " + [ + ] + { + ...}* + { + ...}+ + + + + _ + + Result + Current + True + False + + + + + + + '"' + " + + diff --git a/topics/grammars/eiffel/correct.xbgf b/topics/grammars/eiffel/correct.xbgf new file mode 100644 index 00000000..60cd9bce --- /dev/null +++ b/topics/grammars/eiffel/correct.xbgf @@ -0,0 +1,59 @@ + + + + + + C_external + + + + ’" + + + C + + + + + + + + + + + + inline + + + + + + + External_signature + + + + + + + External_file_use + + + + + " + + + + + + + + + ’" + + + " + + + diff --git a/topics/grammars/eiffel/post-extraction.xbgf b/topics/grammars/eiffel/post-extraction.xbgf new file mode 100644 index 00000000..778854f8 --- /dev/null +++ b/topics/grammars/eiffel/post-extraction.xbgf @@ -0,0 +1,12 @@ + + + + + + CPP + + + C++ + + + diff --git a/topics/grammars/eiffel/src.language.specification.txt b/topics/grammars/eiffel/src.language.specification.txt new file mode 100644 index 00000000..2e86992e --- /dev/null +++ b/topics/grammars/eiffel/src.language.specification.txt @@ -0,0 +1,103 @@ +Class_name =∆ Identifier +Class_declaration =∆ [Notes] Class_header +[Formal_generics] [Obsolete] [Inheritance] [Creators] [Converters] [Features] [Invariant] +[Notes] end +Notes =∆ note Note_list Note_list =∆ {Note_entry ";" ...}* Note_entry =∆ Note_name Note_values +Note_name =∆ Identifier ":" Note_values =∆ {Note_item ","...}+ Note_item =∆ Identifier | Manifest_constant +Class_header =∆ [Header_mark] class Class_name Header_mark=∆ deferred|expanded|frozen +Obsolete =∆ obsolete Message Message =∆ Manifest_string +Features =∆ Feature_clause+ Feature_clause =∆ feature [Clients] [Header_comment] Feature_declaration_list Feature_declaration_list =∆ {Feature_declaration ";" ...}* Header_comment =∆ Comment +Feature_declaration =∆ New_feature_list Declaration_body Declaration_body =∆ [Formal_arguments] [Query_mark] [Feature_value] Query_mark =∆ Type_mark [Assigner_mark] Type_mark =∆ ":" Type Feature_value =∆ [Explicit_value] +[Obsolete] [Header_comment] [Attribute_or_routine] +Explicit_value =∆ "=" Manifest_constant +New_feature_list =∆ {New_feature "," ...}+ New_feature =∆ [frozen] Extended_feature_name +Attribute_or_routine =∆ [Precondition] [Local_declarations] +Feature_body [Postcondition] [Rescue] end ∆ +Feature_body = Deferred | Effective_routine | Attribute +Extended_feature_name =∆ Feature_name [Alias] Feature_name =∆ Identifier Alias =∆ alias '"' Alias_name '"' [convert] Alias_name =∆ Operator | Bracket +Bracket =∆ "[]" +Operator =∆ Unary | Binary Unary =∆ not | "+" | "–" | Free_unary Binary=∆ "+"|"–"|"*"|"/"|"//"|"\\"|"^"|".."| +"<" | ">" | "<=" | ">=" | and | or | xor | and then | or else | implies | +Free_binary +Assigner_mark =∆ assign Feature_name +Inheritance =∆ Inherit_clause+ Inherit_clause =∆ inherit [Non_conformance] Parent_list Non_conformance =∆ "{" NONE "}" Parent_list =∆ {Parent ";" ...}+ Parent =∆ Class_type [Feature_adaptation] Feature_adaptation =∆ [Undefine] +[Redefine] [Rename] [New_exports] [Select] +end +Rename =∆ rename Rename_list Rename_list =∆ {Rename_pair "," ...}+ +Rename_pair =∆ Feature_name as Extended_feature_name +Clients =∆ "{" Class_list "}" Class_list =∆ {Class_name "," ...}+ +New_exports =∆ export New_export_list New_export_list =∆ {New_export_item ";" ...}+ New_export_item =∆ Clients [Header_comment] Feature_set Feature_set =∆ Feature_list | all Feature_list =∆ {Feature_name "," ...}+ +Formal_arguments =∆ "(" Entity_declaration_list ")" Entity_declaration_list =∆ {Entity_declaration_group ";" ...}+ Entity_declaration_group =∆ Identifier_list Type_mark Identifier_list =∆ {Identifier "," ...}+ +Deferred =∆ deferred Effective_routine =∆ Internal | External Internal =∆ Routine_mark Compound Routine_mark =∆ do | Once Once =∆ once [ "("Key_list ")" ] Key_list =∆ {Manifest_string "," ...}+ +Local_declarations =∆ local [Entity_declaration_list] +Compound =∆ {Instruction ";" ...}* +Instruction =∆ Creation_instruction | Call | Assignment | Assigner_call | Conditional | Multi_branch | Loop | Debug | Precursor | Check | Retry +Precondition =∆ require [else] Assertion Postcondition =∆ ensure [then] Assertion [Only] Invariant =∆ invariant Assertion Assertion =∆ {Assertion_clause ";" ...}* Assertion_clause =∆ [Tag_mark] Unlabeled_assertion_clause Unlabeled_assertion_clause =∆ Boolean_expression | Comment Tag_mark =∆ Tag ":" Tag =∆ Identifier +Old =∆ old Expression +Only =∆ only [Feature_list] +Check =∆ check Assertion [Notes] end +Variant =∆ variant [Tag_mark] Expression +Precursor =∆ Precursor [Parent_qualification] [Actuals] Parent_qualification =∆ "{" Class_name "}" +Redefine =∆ redefine Feature_list +Undefine =∆ undefine Feature_list +Type =∆ Class_or_tuple_type | Formal_generic_name | Anchored Class_or_tuple_type =∆ Class_type | Tuple_type Class_type =∆ [Attachment_mark] Class_name [Actual_generics] +Attachment_mark =∆ "?" | "!" Anchored =∆ [Attachment_mark] like Anchor Anchor =∆ Feature_name | Current +Actual_generics =∆ "[" Type_list "]" Type_list =∆ {Type "," ...}+ +Formal_generics =∆ "[" Formal_generic_list "]" Formal_generic_list =∆ {Formal_generic ","...}+ Formal_generic =∆ [frozen] Formal_generic_name [Constraint] Formal_generic_name =∆ [?] Identifier +Constraint =∆ "–>" Constraining_types [Constraint_creators] Constraining_types =∆ Single_constraint | Multiple_constraint Single_constraint =∆ Type [Renaming] Renaming =∆ Rename end +Multiple_constraint =∆ "{" Constraint_list "}" Constraint_list =∆ {Single_constraint "," ...}+ Constraint_creators =∆ create Feature_list end +Tuple_type =∆ TUPLE [Tuple_parameter_list] Tuple_parameter_list =∆ "[" Tuple_parameters "]" Tuple_parameters =∆ Type_list | Entity_declaration_list +Manifest_tuple =∆ "[" Expression_list "]" Expression_list =∆ {Expression "," ...}* +Converters=∆ convertConverter_list Converter_list =∆ {Converter ","...}+ Converter =∆ Conversion_procedure | Conversion_query Conversion_procedure =∆ Feature_name "(" "{" Type_list "}" ")" Conversion_query =∆ Feature_name ":" "{" Type_list "}" +Select =∆ select Feature_list +Conditional =∆ if Then_part_list [Else_part] end Then_part_list∆= {Then_partelseif...}+ Then_part =∆ Boolean_expression then Compound Else_part =∆ else Compound +Multi_branch =∆ inspect Expression [When_part_list] [Else_part] endWhen_part_list =∆ When_part+When_part=∆ whenChoicesthenCompoundChoices =∆ {Choice "," ...}+ +Choice =∆ Constant | Manifest_type | Constant_interval | Type_intervalConstant_interval =∆ Constant ".." ConstantType_interval =∆ Manifest_type ".." Manifest_type +Loop =∆ Initialization +[Invariant] +Exit_condition +Loop_body +[Variant] +end ∆ +Initialization = from Compound Exit_condition =∆ until Boolean_expression Loop_body =∆ loop Compound +Debug = debug [ "("Key_list ")" ] Compound end +Attribute=∆ attributeCompound +Entity =∆ Variable | Read_only Variable =∆ Variable_attribute | Local Variable_attribute =∆ Feature_name Local =∆ Identifier | Result Read_only =∆ Formal | Constant_attribute | Current Formal =∆ Identifier∆ Constant_attribute = Feature_name +Creators =∆ Creation_clause+ Creation_clause =∆ create [Clients] [Header_comment] Creation_procedure_list Creation_procedure_list =∆ {Creation_procedure ","...}+ Creation_procedure =∆ Feature_name +Creation_instruction =∆ create [Explicit_creation_type] Creation_call Explicit_creation_type =∆ "{" Type "}" Creation_call =∆ Variable [Explicit_creation_call] Explicit_creation_call =∆ "." Unqualified_call +Creation_expression =∆ create Explicit_creation_type [Explicit_creation_call] +Equality =∆ Expression Comparison Expression Comparison =∆ "=" | "/=" | "~" | "/~" +Assignment =∆ Variable ":=" Expression +Assigner_call =∆ Expression ":=" Expression +Call =∆ Object_call | Non_object_call Object_call =∆ [Target "."] Unqualified_call Unqualified_call =∆ Feature_name [Actuals] Target =∆ Local | Read_only | Call | Parenthesized_target Parenthesized_target =∆ "(|" Expression "|)" Non_object_call =∆ "{" Type "}" "." Unqualified_call +Actuals =∆ "(" Actual_list ")" Actual_list =∆ {Expression "," ...}+ +Object_test =∆ "{" Identifier ":" Type "}" Expression +Rescue =∆ rescue Compound Retry =∆ retry +Agent =∆ Call_agent | Inline_agent Call_agent =∆ agent Call_agent_body Inline_agent =∆ agent [Formal_arguments] [Type_mark] [Attribute_or_routine] [Agent_actuals] +Call_agent_body =∆ Agent_qualified | Agent_unqualifiedAgent_qualified =∆ Agent_target ". " Agent_unqualifiedAgent_unqualified =∆ Feature_name [Agent_actuals]Agent_target =∆ Entity | Parenthesized | Manifest_typeAgent_actuals =∆ "(" Agent_actual_list ")"Agent_actual_list =∆ {Agent_actual "," ...}+ +Agent_actual =∆ Expression | PlaceholderPlaceholder =∆ [Manifest_type] "?" +Expression=∆ Basic_expression | Special_expression Basic_expression=∆ Read_only | Local | Call | Precursor | Equality | Parenthesized | Old | +Operator_expression | Bracket_expression | Creation_expression Special_expression=∆ Manifest_constant | Manifest_tuple | Agent | Object_test | Once_string | +Address ∆ Parenthesized = "(" Expression ")" Address =∆ "$" Variable Once_string=∆ onceManifest_string Boolean_expression =∆ Basic_expression | Boolean_constant | Object_test +Operator_expression =∆ Unary_expression | Binary_expression Unary_expression =∆ Unary Expression Binary_expression =∆ Expression Binary Expression +Bracket_expression =∆ Bracket_target "[" Actuals "]" Bracket_target =∆ Target | Once_string | Manifest_constant | Manifest_tuple +Constant =∆ Manifest_constant | Constant_attribute Constant_attribute =∆ Feature_name +Manifest_constant =∆ [Manifest_type] Manifest_value Manifest_type =∆ "{" Type "}" Manifest_value =∆ Boolean_constant | +Character_constant | Integer_constant | Real_constant | Manifest_string | Manifest_type +Sign=∆"+"|"–" ∆ Integer_constant = [Sign] Integer Character_constant =∆ "'" Character "'" Boolean_constant =∆ True | False Real_constant =∆ [Sign] Real +Manifest_string =∆ Basic_manifest_string | Verbatim_string Basic_manifest_string =∆ ' " ' String_content ' " ' String_content =∆ {Simple_string Line_wrapping_part ...}+ Verbatim_string =∆ Verbatim_string_opener Line_sequence Verbatim_string_closer Verbatim_string_opener =∆ ' " ' [Simple_string] Open_bracket Verbatim_string_closer =∆ Close_bracket [Simple_string] ' " ' +Open_bracket =∆ "[" | "{" Close_bracket =∆ "]" | "}" +External =∆ external External_language [External_name] External_language =∆ Unregistered_language | Registered_language Unregistered_language =∆ Manifest_string External_name =∆ alias Manifest_string +Registered_language=∆ C_external | C++_external | DLL_external +External_signature=∆ signature[External_argument_types][:External_type] External_argument_types =∆ "(" External_type_list ")" External_type_list =∆ {External_type "," ...}* External_type =∆ Simple_string +External_file_use=∆ useExternal_file_list External_file_list =∆ {External_file "," ...}+ External_file =∆ External_user_file | External_system_file External_user_file=∆ '"' Simple_string '"' External_system_file =∆ "<"Simple_string ">" +C_external =∆ ’' " ' C ’[inline] +[External_signature] [External_file_use] '"' +C++_external =∆ ' " ' C++ inline +[External_signature] [External_file_use] '"' +DLL_external =∆ ' " ' dll [windows] +DLL_identifier [DLL_index] [External_signature] [External_file_use] '"' ∆ +DLL_identifier = Simple_string DLL_index =∆ Integer +Comment =∆ "– –" {Simple_string Comment_break ...}* Comment_break =∆ New_line [Blanks_or_tabs] "– –" +Integer =∆ [Integer_base] Digit_sequence Integer_base =∆ "0" Integer_base_letter Integer_base_letter =∆ "b" | "c" | "x" | "B" | "C" | "X" Digit_sequence =∆ Digit+ +Digit=∆ "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9" | "a" | "b" | "c" | "d" | "e" | "f" | "A" | "B" | "C" | "D" | "E" | "F" | "_" diff --git a/topics/grammars/eiffel/src.manually.fixed.txt b/topics/grammars/eiffel/src.manually.fixed.txt new file mode 100644 index 00000000..84b2f6b0 --- /dev/null +++ b/topics/grammars/eiffel/src.manually.fixed.txt @@ -0,0 +1,111 @@ +Class_name =∆ Identifier +Class_declaration =∆ [Notes] Class_header +[Formal_generics] [Obsolete] [Inheritance] [Creators] [Converters] [Features] [Invariant] +[Notes] end +Notes =∆ note Note_list Note_list =∆ {Note_entry ";" ...}* Note_entry =∆ Note_name Note_values +Note_name =∆ Identifier ":" Note_values =∆ {Note_item ","...}+ Note_item =∆ Identifier | Manifest_constant +Class_header =∆ [Header_mark] class Class_name Header_mark=∆ deferred|expanded|frozen +Obsolete =∆ obsolete Message Message =∆ Manifest_string +Features =∆ Feature_clause+ Feature_clause =∆ feature [Clients] [Header_comment] Feature_declaration_list Feature_declaration_list =∆ {Feature_declaration ";" ...}* Header_comment =∆ Comment +Feature_declaration =∆ New_feature_list Declaration_body Declaration_body =∆ [Formal_arguments] [Query_mark] [Feature_value] Query_mark =∆ Type_mark [Assigner_mark] Type_mark =∆ ":" Type Feature_value =∆ [Explicit_value] +[Obsolete] [Header_comment] [Attribute_or_routine] +Explicit_value =∆ "=" Manifest_constant +New_feature_list =∆ {New_feature "," ...}+ New_feature =∆ [frozen] Extended_feature_name +Attribute_or_routine =∆ [Precondition] [Local_declarations] +Feature_body [Postcondition] [Rescue] end +Feature_body =∆ Deferred | Effective_routine | Attribute +Extended_feature_name =∆ Feature_name [Alias] Feature_name =∆ Identifier Alias =∆ alias '"' Alias_name '"' [convert] Alias_name =∆ Operator | Bracket +Bracket =∆ "[]" +Operator =∆ Unary | Binary Unary =∆ not | "+" | "–" | Free_unary Binary=∆ "+"|"–"|"*"|"/"|"//"|"\\"|"^"|".."| +"<" | ">" | "<=" | ">=" | and | or | xor | and then | or else | implies | +Free_binary +Assigner_mark =∆ assign Feature_name +Inheritance =∆ Inherit_clause+ Inherit_clause =∆ inherit [Non_conformance] Parent_list Non_conformance =∆ "{" NONE "}" Parent_list =∆ {Parent ";" ...}+ Parent =∆ Class_type [Feature_adaptation] Feature_adaptation =∆ [Undefine] +[Redefine] [Rename] [New_exports] [Select] +end +Rename =∆ rename Rename_list Rename_list =∆ {Rename_pair "," ...}+ +Rename_pair =∆ Feature_name as Extended_feature_name +Clients =∆ "{" Class_list "}" Class_list =∆ {Class_name "," ...}+ +New_exports =∆ export New_export_list New_export_list =∆ {New_export_item ";" ...}+ New_export_item =∆ Clients [Header_comment] Feature_set Feature_set =∆ Feature_list | all Feature_list =∆ {Feature_name "," ...}+ +Formal_arguments =∆ "(" Entity_declaration_list ")" Entity_declaration_list =∆ {Entity_declaration_group ";" ...}+ Entity_declaration_group =∆ Identifier_list Type_mark Identifier_list =∆ {Identifier "," ...}+ +Deferred =∆ deferred Effective_routine =∆ Internal | External Internal =∆ Routine_mark Compound Routine_mark =∆ do | Once Once =∆ once [ "("Key_list ")" ] Key_list =∆ {Manifest_string "," ...}+ +Local_declarations =∆ local [Entity_declaration_list] +Compound =∆ {Instruction ";" ...}* +Instruction =∆ Creation_instruction | Call | Assignment | Assigner_call | Conditional | Multi_branch | Loop | Debug | Precursor | Check | Retry +Precondition =∆ require [else] Assertion Postcondition =∆ ensure [then] Assertion [Only] Invariant =∆ invariant Assertion Assertion =∆ {Assertion_clause ";" ...}* Assertion_clause =∆ [Tag_mark] Unlabeled_assertion_clause Unlabeled_assertion_clause =∆ Boolean_expression | Comment Tag_mark =∆ Tag ":" Tag =∆ Identifier +Old =∆ old Expression +Only =∆ only [Feature_list] +Check =∆ check Assertion [Notes] end +Variant =∆ variant [Tag_mark] Expression +Precursor =∆ Precursor [Parent_qualification] [Actuals] Parent_qualification =∆ "{" Class_name "}" +Redefine =∆ redefine Feature_list +Undefine =∆ undefine Feature_list +Type =∆ Class_or_tuple_type | Formal_generic_name | Anchored Class_or_tuple_type =∆ Class_type | Tuple_type Class_type =∆ [Attachment_mark] Class_name [Actual_generics] +Attachment_mark =∆ "?" | "!" Anchored =∆ [Attachment_mark] like Anchor Anchor =∆ Feature_name | Current +Actual_generics =∆ "[" Type_list "]" Type_list =∆ {Type "," ...}+ +Formal_generics =∆ "[" Formal_generic_list "]" Formal_generic_list =∆ {Formal_generic ","...}+ Formal_generic =∆ [frozen] Formal_generic_name [Constraint] Formal_generic_name =∆ [?] Identifier +Constraint =∆ "–>" Constraining_types [Constraint_creators] Constraining_types =∆ Single_constraint | Multiple_constraint Single_constraint =∆ Type [Renaming] Renaming =∆ Rename end +Multiple_constraint =∆ "{" Constraint_list "}" Constraint_list =∆ {Single_constraint "," ...}+ Constraint_creators =∆ create Feature_list end +Tuple_type =∆ TUPLE [Tuple_parameter_list] Tuple_parameter_list =∆ "[" Tuple_parameters "]" Tuple_parameters =∆ Type_list | Entity_declaration_list +Manifest_tuple =∆ "[" Expression_list "]" Expression_list =∆ {Expression "," ...}* +Converters=∆ convertConverter_list Converter_list =∆ {Converter ","...}+ Converter =∆ Conversion_procedure | Conversion_query Conversion_procedure =∆ Feature_name "(" "{" Type_list "}" ")" Conversion_query =∆ Feature_name ":" "{" Type_list "}" +Select =∆ select Feature_list +Conditional =∆ if Then_part_list [Else_part] end Then_part_list=∆ {Then_partelseif...}+ Then_part =∆ Boolean_expression then Compound Else_part =∆ else Compound +Multi_branch =∆ inspect Expression [When_part_list] [Else_part] end +When_part_list =∆ When_part+When_part=∆ whenChoicesthenCompound +Choices =∆ {Choice "," ...}+ +Choice =∆ Constant | Manifest_type | Constant_interval | Type_interval +Constant_interval =∆ Constant ".." Constant +Type_interval =∆ Manifest_type ".." Manifest_type +Loop =∆ Initialization +[Invariant] +Exit_condition +Loop_body +[Variant] +end +Initialization =∆ from Compound Exit_condition =∆ until Boolean_expression Loop_body =∆ loop Compound +Debug =∆ debug [ "("Key_list ")" ] Compound end +Attribute=∆ attributeCompound +Entity =∆ Variable | Read_only Variable =∆ Variable_attribute | Local Variable_attribute =∆ Feature_name Local =∆ Identifier | Result Read_only =∆ Formal | Constant_attribute | Current Formal =∆ Identifier Constant_attribute =∆ Feature_name +Creators =∆ Creation_clause+ Creation_clause =∆ create [Clients] [Header_comment] Creation_procedure_list Creation_procedure_list =∆ {Creation_procedure ","...}+ Creation_procedure =∆ Feature_name +Creation_instruction =∆ create [Explicit_creation_type] Creation_call Explicit_creation_type =∆ "{" Type "}" Creation_call =∆ Variable [Explicit_creation_call] Explicit_creation_call =∆ "." Unqualified_call +Creation_expression =∆ create Explicit_creation_type [Explicit_creation_call] +Equality =∆ Expression Comparison Expression Comparison =∆ "=" | "/=" | "~" | "/~" +Assignment =∆ Variable ":=" Expression +Assigner_call =∆ Expression ":=" Expression +Call =∆ Object_call | Non_object_call Object_call =∆ [Target "."] Unqualified_call Unqualified_call =∆ Feature_name [Actuals] Target =∆ Local | Read_only | Call | Parenthesized_target Parenthesized_target =∆ "(|" Expression "|)" Non_object_call =∆ "{" Type "}" "." Unqualified_call +Actuals =∆ "(" Actual_list ")" Actual_list =∆ {Expression "," ...}+ +Object_test =∆ "{" Identifier ":" Type "}" Expression +Rescue =∆ rescue Compound Retry =∆ retry +Agent =∆ Call_agent | Inline_agent Call_agent =∆ agent Call_agent_body Inline_agent =∆ agent [Formal_arguments] [Type_mark] [Attribute_or_routine] [Agent_actuals] +Call_agent_body =∆ Agent_qualified | Agent_unqualified +Agent_qualified =∆ Agent_target ". " Agent_unqualified +Agent_unqualified =∆ Feature_name [Agent_actuals]Agent_target =∆ Entity | Parenthesized | Manifest_type +Agent_actuals =∆ "(" Agent_actual_list ")"Agent_actual_list =∆ {Agent_actual "," ...}+ +Agent_actual =∆ Expression | Placeholder +Placeholder =∆ [Manifest_type] "?" +Expression=∆ Basic_expression | Special_expression Basic_expression=∆ Read_only | Local | Call | Precursor | Equality | Parenthesized | Old | +Operator_expression | Bracket_expression | Creation_expression Special_expression=∆ Manifest_constant | Manifest_tuple | Agent | Object_test | Once_string | +Address Parenthesized =∆ "(" Expression ")" Address =∆ "$" Variable Once_string=∆ onceManifest_string Boolean_expression =∆ Basic_expression | Boolean_constant | Object_test +Operator_expression =∆ Unary_expression | Binary_expression Unary_expression =∆ Unary Expression Binary_expression =∆ Expression Binary Expression +Bracket_expression =∆ Bracket_target "[" Actuals "]" Bracket_target =∆ Target | Once_string | Manifest_constant | Manifest_tuple +Constant =∆ Manifest_constant | Constant_attribute Constant_attribute =∆ Feature_name +Manifest_constant =∆ [Manifest_type] Manifest_value Manifest_type =∆ "{" Type "}" Manifest_value =∆ Boolean_constant | +Character_constant | Integer_constant | Real_constant | Manifest_string | Manifest_type +Sign=∆"+"|"–" Integer_constant =∆ [Sign] Integer Character_constant =∆ "'" Character "'" Boolean_constant =∆ True | False Real_constant =∆ [Sign] Real +Manifest_string =∆ Basic_manifest_string | Verbatim_string Basic_manifest_string =∆ ' " ' String_content ' " ' String_content =∆ {Simple_string Line_wrapping_part ...}+ Verbatim_string =∆ Verbatim_string_opener Line_sequence Verbatim_string_closer Verbatim_string_opener =∆ ' " ' [Simple_string] Open_bracket Verbatim_string_closer =∆ Close_bracket [Simple_string] ' " ' +Open_bracket =∆ "[" | "{" Close_bracket =∆ "]" | "}" +External =∆ external External_language [External_name] External_language =∆ Unregistered_language | Registered_language Unregistered_language =∆ Manifest_string External_name =∆ alias Manifest_string +Registered_language=∆ C_external | C++_external | DLL_external +External_signature=∆ signature[External_argument_types][:External_type] External_argument_types =∆ "(" External_type_list ")" External_type_list =∆ {External_type "," ...}* External_type =∆ Simple_string +External_file_use=∆ useExternal_file_list External_file_list =∆ {External_file "," ...}+ External_file =∆ External_user_file | External_system_file External_user_file=∆ '"' Simple_string '"' External_system_file =∆ "<"Simple_string ">" +C_external =∆ ’' " ' C ’[inline] +[External_signature] [External_file_use] '"' +C++_external =∆ ' " ' C++ inline +[External_signature] [External_file_use] '"' +DLL_external =∆ ' " ' dll [windows] +DLL_identifier [DLL_index] [External_signature] [External_file_use] '"' +DLL_identifier =∆ Simple_string DLL_index =∆ Integer +Comment =∆ "– –" {Simple_string Comment_break ...}* Comment_break =∆ New_line [Blanks_or_tabs] "– –" +Integer =∆ [Integer_base] Digit_sequence Integer_base =∆ "0" Integer_base_letter Integer_base_letter =∆ "b" | "c" | "x" | "B" | "C" | "X" Digit_sequence =∆ Digit+ +Digit=∆ "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9" | "a" | "b" | "c" | "d" | "e" | "f" | "A" | "B" | "C" | "D" | "E" | "F" | "_" diff --git a/topics/grammars/hunter.py b/topics/grammars/hunter.py new file mode 100755 index 00000000..688541c8 --- /dev/null +++ b/topics/grammars/hunter.py @@ -0,0 +1,898 @@ +#!/Library/Frameworks/Python.framework/Versions/3.1/bin/python3 +# -*- coding: utf-8 -*- +import os, sys +import xml.etree.ElementTree as ET +#sys.path.append(os.getcwd().split('slps')[0]+'slps/shared/python') +import BGF +from functools import reduce + +debug = False + +config = {} +masked = {} +always_terminals = [] +always_nonterminals = [] + +special = \ + [ + 'DEFINING-SYMBOL', + 'DEFINITION-SEPARATOR-SYMBOL', + 'START-TERMINAL-SYMBOL', + 'END-TERMINAL-SYMBOL', + 'START-GROUP-SYMBOL', + 'END-GROUP-SYMBOL', + 'START-OPTION-SYMBOL', + 'END-OPTION-SYMBOL', + 'START-STAR-SYMBOL', + 'END-STAR-SYMBOL', + 'START-PLUS-SYMBOL', + 'END-PLUS-SYMBOL', + 'START-SEPLIST-STAR-SYMBOL', + 'END-SEPLIST-STAR-SYMBOL', + 'START-SEPLIST-PLUS-SYMBOL', + 'END-SEPLIST-PLUS-SYMBOL', + 'POSTFIX-REPETITION-STAR-SYMBOL', + 'POSTFIX-REPETITION-PLUS-SYMBOL', + 'UNDEFINED-NONTERMINALS-ARE-TERMINALS', + 'NONTERMINAL-IF-CONTAINS', + 'NONTERMINAL-IF-UPPERCASE', + 'TERMINAL-IF-UPPERCASE', + 'IGNORE-EXTRA-NEWLINES', + 'GLUE-NONALPHANUMERIC-TERMINALS' + ] + +def isAlpha(x): + return reduce(lambda a,b:a and b=='_' or b.isalnum(),x,True) + +def isQNumber(x): + if x =='.': + return False + else: + return reduce(lambda a,b:a and b=='.' or b.isdigit(),x,True) + +def splitTokenStream(s): + ts = [s[0]] + i = 1 + alpha = isAlpha(s[0]) + while (i>>>>',poss) + print('ats >>>>>>',ats) + for i in range(0,len(poss[0])): + if poss[0][i][0] == poss[1][i][0]: + # same alternative + tmp = ats[poss[0][i][0]][:poss[0][i][1]] + tmp.append([ats[poss[0][i][0]][poss[0][i][1]:poss[1][i][1]][1:]]) + tmp.extend(ats[poss[0][i][0]][poss[1][i][1]+1:]) + ats[poss[0][i][0]] = tmp + #print '-->',tmp + else: + newats = [] + for j in range(0,len(ats)): + if jposs[1][i][0]: + newats.append(ats[j]) + elif j==poss[0][i][0]: + before = ats[j][:poss[0][i][1]] + tmp = [ats[j][poss[0][i][1]+1:]] + elif j==poss[1][i][0]: + tmp.append(ats[j][:poss[1][i][1]]) + before.append(tmp) + before.extend(ats[j][poss[1][i][1]+1:]) + newats.append(before) + else: + tmp.append(ats[j]) + ats = newats + #print '--> not implemented yet' + if len(ats) == 1 and len(ats[0]) == 1 and type(ats[0][0]) == type([]): + ats = ats[0][0] + return ats + +def findSpecialGroups(ats,start,end): + global debug + poss = [[],[]] + i = j = level = 0 + lp = {} + for i in range(0,len(ats)): + for j in range(0,len(ats[i])): + if ats[i][j] == start: + poss[0].append((i,j)) + poss[1].append(None) + lp[level] = len(poss[0])-1 + level += 1 + if ats[i][j] == end: + level -= 1 + poss[1][lp.pop(level)] = (i,j) + if len(poss[0]) != len(poss[1]): + print('STEP 7 deadlock: number of start-?-symbol and end-?-symbol occurrences do not match.') + return ats + if debug and poss[0]: + print('poss >>>>>',poss) + print('ats >>>>>>',ats) + for i in range(0,len(poss[0])): + if poss[0][i][0] != poss[1][i][0]: + newats = [] + for j in range(0,len(ats)): + if jposs[1][i][0]: + newats.append(ats[j]) + elif j==poss[0][i][0]: + before = ats[j][:poss[0][i][1]+1] + tmp = [ats[j][poss[0][i][1]+1:]] + elif j==poss[1][i][0]: + tmp.append(ats[j][:poss[1][i][1]]) + before.append(tmp) + before.extend(ats[j][poss[1][i][1]:]) + newats.append(before) + else: + tmp.append(ats[j]) + ats = newats + if len(ats) == 1 and len(ats[0]) == 1 and type(ats[0][0]) == type([]): + ats = ats[0][0] + return ats + +def mapsymbols(ts,symbol,special): + for i in range(0,len(ts)): + if ts[i] == symbol: + ts[i] = special + elif type(ts[i]) == type([]): + ts[i] = mapsymbols(ts[i],symbol,special) + return ts + +def endOfContext(a,i,e): + s = a[i] + level = 1 + j = i+1 + #print('Searching in a context of',a[i:],'for',e) + while level>0 and j0 and j>=0: + if a[j]==s: + level += 1 + elif a[j]==e: + level -= 1 + j -= 1 + if level != 0: + #print('Cannot find end of context in:',a[i:],'@',level) + return -level + return j + +def map2expr(ss): + global debug + ess = [] + es = [] + i = 0 + while i>>context>>>',ss[i:j]) + e = BGF.Star() + e.setExpr(map2expr(ss[i+1:j-1])) + es.append(e) + i = j + elif ss[i] == 'START-PLUS-SYMBOL': + j = endOfContext(ss,i,'END-PLUS-SYMBOL') + if j<0: + print('Unbalanced bracketing, please fix first. Level:',-j) + j = len(ss) + if debug: + print('>>>context>>>',ss[i:j]) + e = BGF.Plus() + e.setExpr(map2expr(ss[i+1:j-1])) + es.append(e) + i = j + elif ss[i] == 'START-OPTION-SYMBOL': + j = endOfContext(ss,i,'END-OPTION-SYMBOL') + if j<0: + print('Unbalanced bracketing, please fix first. Level:',-j) + j = len(ss) + if debug: + print('>>>context>>>',ss[i:j]) + e = BGF.Optional() + e.setExpr(map2expr(ss[i+1:j-1])) + es.append(e) + i = j + elif ss[i] == 'START-SEPLIST-STAR-SYMBOL': + j = endOfContext(ss,i,'END-SEPLIST-STAR-SYMBOL') + if j<0: + print('Unbalanced bracketing, please fix first. Level:',-j) + j = len(ss) + if j-i != 4: + print('Incorrect separator list!') + if debug: + print('>>>context>>>',ss[i:j]) + # {x y}* => (x (yx)*)? + e = BGF.Sequence() + x = map2expr([ss[i+1]]) + y = map2expr([ss[i+2]]) + e.add(x) + e2 = BGF.Sequence() + e2.add(y) + e2.add(x) + s = BGF.Star() + s.setExpr(BGF.Expression(e2)) + e.add(BGF.Expression(s)) + e2 = BGF.Optional() + e2.setExpr(e) + es.append(e2) + i = j + elif ss[i] == 'START-SEPLIST-PLUS-SYMBOL': + j = endOfContext(ss,i,'END-SEPLIST-PLUS-SYMBOL') + if j<0: + print('Unbalanced bracketing, please fix first.') + j = len(ss) + if j-i != 4: + print('Incorrect separator list!') + if debug: + print('>>>context>>>',ss[i:j]) + # {x y}+ => (x (yx)*) + e = BGF.Sequence() + x = map2expr([ss[i+1]]) + y = map2expr([ss[i+2]]) + e.add(x) + e2 = BGF.Sequence() + e2.add(y) + e2.add(x) + s = BGF.Star() + s.setExpr(BGF.Expression(e2)) + e.add(BGF.Expression(s)) + es.append(e) + i = j + elif ss[i] == 'START-GROUP-SYMBOL': + j = endOfContext(ss,i,'END-GROUP-SYMBOL') + if j<0: + print('Unbalanced bracketing, please fix first.') + j = len(ss) + if i == 0 and j == len(ss): + ss = ss[1:-1] + continue + if debug: + print('>>>context>>>',ss[i:j]) + es.append(map2expr(ss[i+1:j-1])) + i = j + elif ss[i] == 'DEFINITION-SEPARATOR-SYMBOL': + if debug: + print('It is a bar! It is a choice!') + ess.append(es) + es = [] + i += 1 + elif ss[i][0] == config['start-terminal-symbol']: + if debug: + print('TERMINAL',ss[i][1:-1]) + e = BGF.Terminal() + if ss[i][1:-1] == '': + print('Serialisation error: empty terminal, replaced with ""!') + e.setName('""') + else: + e.setName(ss[i][1:-1]) + es.append(e) + i += 1 + else: + if debug: + print('NONTERMINAL',ss[i]) + e = BGF.Nonterminal() + e.setName(ss[i]) + es.append(e) + i += 1 + ess.append(es) + if len(ess) == 0: + print('Serialisation error: empty output sequence!') + return + elif len(ess) == 1: + if len(ess[0]) == 0: + print('Serialisation error: empty internal output sequence!') + return BGF.Expression(BGF.Epsilon()) + elif len(ess[0]) == 1: + return BGF.Expression(ess[0][0]) + else: + e = BGF.Sequence() + for x in ess[0]: + e.add(BGF.Expression(x)) + return BGF.Expression(e) + else: + e = BGF.Choice() + for es in ess: + if len(es) == 0: + print('Serialisation error: empty internal output sequence!') + return BGF.Expression(BGF.Epsilon()) + elif len(es) == 1: + e.add(BGF.Expression(es[0])) + else: + ee = BGF.Sequence() + for x in es: + ee.add(BGF.Expression(x)) + e.add(BGF.Expression(ee)) + return BGF.Expression(e) + print('Dead code reached!') + return + +def mapglue(b,s): + a = b[:] + i = 0 + while i < len(a): + if a[i][0] != s[0]: + # first char mismatch + i += 1 + continue + combined = a[i] + j = i+1 + while j0 and not isAlpha(q[-1][1:-1]) and not isAlpha(x): + # addition on terminals + q[-1] = q[-1][:-1] + y[1:] + else: + q.append(y) + r = p[:2] + r.extend(q) + if debug and p != r: + print('>>>in>>>>',p) + print('>>>out>>>',r) + return r + +def assembleQualifiedNumbers(ts): + ds = [] + for x in ts: + if len(ds)>0 and (isQNumber(x) or x=='.') and isQNumber(ds[-1]): + ds[-1] += x + else: + ds.append(x) + return ds + +def splitString(s,kw): + # split s according to any kws while preserving them + if len(kw)==0: + return [s] + elif s.find(kw[0])<0: + return splitString(s,kw[1:]) + else: + ss = s.split(kw[0]) + done = [] + for x in ss: + done.append(x) + done.append(kw[0]) + done.pop() + if done[-1]=='': + done.pop() + if done[0] =='': + done = done[1:] + res = [] + for a in done: + res.extend(splitString(a,kw[1:])) + reject = False + if min(map(len,res))<2: + reject = True + if 'nonterminal-if-contains' in config.keys(): + for y in res: + if y in kw or y.find(config['nonterminal-if-contains']) < 0: + continue + else: + reject = True + #print('!!!!! Have to reject',res,'of',s) + if reject: + return splitString(s,kw[1:]) + else: + return res + +def decomposeSymbols(p,defd): + # [label, nt, ...] + q = p[:2] + for x in p[2:]: + match = False + # expanded for better readability + if x in defd: + # defined nonterminal + q.append(x) + continue + if x in always_terminals: + # configured exception + q.append(x) + continue + if x[0] == config['start-terminal-symbol'] and x[-1] == config['end-terminal-symbol']: + # terminal + q.append(x) + continue + #if 'nonterminal-if-contains' in config.keys() and x.find(config['nonterminal-if-contains']) > -1: + # # we have a way of telling nonterminals from terminals, and this is not it + # q.append(x) + # continue + # none of the above: it is a nonterminal, it's not defined and we have no way to dismiss it + var = splitString(x,defd) + if len(var)==1: + q.append(x) + continue + #print(x,'-->',var) + pos = True + for y in var: + if y in defd or ('nonterminal-if-contains' in config.keys() and y.find(config['nonterminal-if-contains']) < 0): + continue + else: + pos = False + if pos: + print('STEP 7:',x,'matches as',var) + q.extend(var) + # todo: need to be adjusted if the order of phases is changed + #q.append(config['start-terminal-symbol']+t+config['end-terminal-symbol']) + match = True + continue + if not match: + q.append(x) + return q + +def convert2terminal(x,defd): + # unfolded for better readability + if x =='' or x=='\n': + # probably an error, let's not deal with it here + return x + if x in defd: + # defined nonterminal + return x + if x in special: + # pseudo-meta-symbol + return x + if x in always_nonterminals: + # configured exception + return x + if 'nonterminal-if-contains' in config.keys() and x.find(config['nonterminal-if-contains'])>-1: + # undefined nonterminal, but banned by configuration from being converted to a terminal + return x + if 'nonterminal-if-uppercase' in config.keys() and len(x)>1 and x.isupper(): + # configuration claims that LONGUPPERCASE is a nonterminal + return x + if 'nonterminal-if-camelcase' in config.keys() and len(x)>1 and x[0].isupper() and x[1:].isalpha() and x not in always_terminals: + # configuration claims that CamelCase is a nonterminal + return x + if x[0] == config['start-terminal-symbol'] and x[-1] == config['end-terminal-symbol']: + # already a terminal + return x + # none of the above + return config['start-terminal-symbol']+x+config['end-terminal-symbol'] + +def balanceProd(p): + i = 2 + while i0: + break + else: + fail = True + if fail: + print('Cannot balance a production :-(') + i += 1 + else: + print('STEP 6: Rebalanced ambiguity of',oldpi,'with',p[i]) + i = j + else: + i = j + return p + +def postfix2confix(p): + for s in ('POSTFIX-REPETITION-PLUS-SYMBOL','POSTFIX-REPETITION-STAR-SYMBOL'): + while s in p: + w = p.index(s) + if w == 0: + print('STEP 6: Impossible place for postfix operator, converted to a terminal.') + p[w] = config['start-terminal-symbol']+p[w]+config['end-terminal-symbol'] + continue + if 'end-group-symbol' in config.keys() and p[w-1] == config['end-group-symbol']: + # group + j = startOfContext(p,w-1,config['start-group-symbol']) + if j<0: + print('STEP 6: Impossible to balance the group preceding a postfix operator, converted it to a terminal') + p[w] = config['start-terminal-symbol']+p[w]+config['end-terminal-symbol'] + continue + else: + print('STEP 6: Converted postfix repetition to confix notation.') + p[w-1] = s.replace('POSTFIX-REPETITION','END') + p[j] = s.replace('POSTFIX-REPETITION','START') + q = p[:w] + q.extend(p[w+1:]) + p = q + else: + # single element + print('STEP 6: Converted postfix repetition to confix notation.') + q = p[:w-1] + q.append(s.replace('POSTFIX-REPETITION','START')) + q.append(p[w-1]) + q.append(s.replace('POSTFIX-REPETITION','END')) + q.extend(p[w+1:]) + p = q + return p +if __name__ == "__main__": + if len(sys.argv) != 4: + print('Usage:') + print(' extract.py input.txt config.edd output.bgf') + sys.exit(-1) + #f = open('src.grammar.txt','r') + f = open(sys.argv[1],'r') + # STEP 0: read the file, remove whitespace (?) + print('STEP 0: reading the file, removing whitespace, getting the configuration.') + tokens = list(splitTokenStream(f.read())) + f.close() + readConfig(sys.argv[2]) + if debug: + print(tokens) + # STEP 1: assemble terminal symbols + print('STEP 1: assembling terminal symbols according to start-terminal-symbol and end-terminal-symbol.') + for k in masked.keys(): + if len(k)>1: + print('STEP 1: going to glue tokens that resemble masked terminal', k.replace('\n','\\n')) + tokens = mapglue(tokens,k) + if 'start-terminal-symbol' in config.keys() and 'end-terminal-symbol' in config.keys(): + tokens = [config['start-terminal-symbol']+masked[x]+config['end-terminal-symbol'] if x in masked.keys() else x for x in tokens] + tokens = assembleBracketedSymbols(tokens,config['start-terminal-symbol'],config['end-terminal-symbol']) + else: + print('STEP 1 skipped, sorry: start-terminal-symbol and end-terminal-symbol are not both specified.') + # technically we still need them to denote terminals in our internal representation + config['start-terminal-symbol'] = config['end-terminal-symbol'] = '"' + tokens = [config['start-terminal-symbol']+masked[x]+config['end-terminal-symbol'] if x in masked.keys() else x for x in tokens] + if 'terminal-if-uppercase' in config.keys(): + print('STEP 1: All uppercase tokens are considered terminals.') + tokens = [config['start-terminal-symbol']+x+config['end-terminal-symbol'] if len(x)>1 and x[0]!=config['start-terminal-symbol'] and x.isupper() else x for x in tokens] + tokens = [config['start-terminal-symbol']+x+config['end-terminal-symbol'] if x==config['start-terminal-symbol']+config['end-terminal-symbol'] or x==config['start-terminal-symbol'] else x for x in tokens] + if debug: + print(tokens) + # STEP 2: assemble nonterminal symbols + print('STEP 2: assembling nonterminal symbols according to start-nonterminal-symbol and end-nonterminal-symbol.') + if 'start-nonterminal-symbol' in config.keys() and 'end-nonterminal-symbol' in config.keys(): + tokens = assembleBracketedSymbols(tokens,config['start-nonterminal-symbol'],config['end-nonterminal-symbol']) + else: + print('STEP 2 skipped, sorry: start-nonterminal-symbol and end-nonterminal-symbol are not both specified.') + # STEP 3: assembling composite metasymbols together + print('STEP 3: assembling metasymbols according to their possible values.') + tokens = assembleQualifiedNumbers(tokens) + for k in config.keys(): + if len(config[k])>1: + print('STEP 3: going to glue tokens that resemble metasymbol', config[k].replace('\n','\\n')) + tokens = mapglue(tokens,config[k]) + if debug: + print(tokens) + # STEP 4: slice according to defining-symbol + print('STEP 4: splitting the token stream into productions according to defining-symbol.') + if 'defining-symbol' in config.keys(): + prods = useDefiningSymbol(tokens,config['defining-symbol']) + else: + print('STEP 4 skipped, sorry: defining-symbol is not specified.') + # TODO + # STEP 4a.1: [sanity check] Infer terminator-symbol + print('STEP 4: inferring terminator-symbol by looking at the productions.') + if debug: + print(prods) + if 'terminator-symbol' not in config.keys(): + ts = findCommonTail(prods[:-1]) + if ts: + print('STEP 4 successful: inferred terminator-symbol:',ts) + config['terminator-symbol'] = ts + need2fix = [-1] + else: + (need2fix,ts,prob) = findMostProbableTail(prods) + if ts: + print('STEP 4 successful: inferred the most probable terminator-symbol:',repr(ts[0]),',','%i'%prob+'% sure') + config['terminator-symbol'] = ts[0] + else: + # ORLY? + print('STEP 4 unsuccessful, sorry.') + for p in prods: + print('%40s'%p[1],'>>>>>>',p[-2:]) + # STEP 4a.2: adjusting the terminator-symbol on the unfit productions + poststep4 = 0 + for f in need2fix: + for i in range(0,len(config['terminator-symbol'])): + if prods[f][-len(config['terminator-symbol'])+i:] == config['terminator-symbol'][:len(config['terminator-symbol'])-i]: + prods[f] = prods[f][:-len(config['terminator-symbol'])+i] + prods[f].extend(config['terminator-symbol']) + poststep4 += 1 + break + if poststep4 > 0: + print('STEP 4 also adjusted',poststep4,'productions that did not quite fit the expectations.') + # STEP 4b: splitting the token stream into productions according to terminator-symbol; inferring defining-symbol + # TODO + prods = [p[:-(len(config['terminator-symbol']))] if p[-(len(config['terminator-symbol'])):] == config['terminator-symbol'] else p for p in prods] + # STEP 5: slice insides according to definition-separator-symbol + step5 = False + for s in ('definition-separator-symbol','postfix-repetition-star-symbol','postfix-repetition-plus-symbol','start-group-symbol','end-group-symbol','start-star-symbol','end-star-symbol','start-plus-symbol','end-plus-symbol','start-seplist-star-symbol','end-seplist-star-symbol','start-seplist-plus-symbol','end-seplist-plus-symbol','start-option-symbol','end-option-symbol'): + if s in config.keys(): + print('STEP 5: marking',s+'.') + step5 = True + prods = [[s.upper() if x==config[s] else x for x in p] for p in prods] + #prods = list(map(lambda p:list(map(lambda x:s.upper() if x==config[s] else x,p)),prods)) + if not step5: + print('STEP 5 skipped: sorry, no metasymbols specified.') + # STEP 6: validating metasymbols + prods = list(map(postfix2confix,prods)) + prods = list(map(balanceProd,prods)) + # STEP 7: various commands + print('STEP 7: executing special extraction commands.') + step7 = False + defined = [x[1] for x in prods] + if debug: + print('Defined are',defined) + defined.append(config['defining-symbol']) + if 'ignore-extra-newlines' in config.keys(): + print('STEP 7: ignoring extra newlines.') + step7 = True + prods = [list(filter(lambda y:y!='\n',p)) for p in prods] + #prods = list(map(lambda x:filter(lambda y:y!='\n',x),prods)) + if 'decompose-symbols' in config.keys(): + print('STEP 7 (part of rule 4): decomposing compound symbols.') + step7 = True + prods = [decomposeSymbols(x,defined) for x in prods] + if 'undefined-nonterminals-are-terminals' in config.keys(): + print('STEP 7 (rule 5): turning undefined nonterminals into terminals.') + step7 = True + prods = [[convert2terminal(x,defined) for x in p] for p in prods] + #for p in prods: + # print(p[1],'is defined as',p[2:]) + if 'glue-nonalphanumeric-terminals' in config.keys(): + print('STEP 7 (part of rule 3): glueing non-alphanumeric terminal symbols together.') + step7 = True + prods = list(map(glueTerminals,prods)) + if not step7: + print('STEP 7 skipped, sorry: no special commands found in the configuration.') + # STEP X: validating bracketing? + # ... + # RESULT + if debug: + print('RESULT:') + for p in prods: + print(p[0],'is defined as:') + print('\t',p[2:]) + # FINAL STEP: compose BGF + bgf = BGF.Grammar() + for q in prods: + p = BGF.Production() + if 'disregard-labels' not in config.keys() and q[0]: + p.setLabel(q[0]) + p.setNT(q[1]) + p.setExpr(map2expr(q[2:])) + bgf.addProd(p) + ET.ElementTree(bgf.getXml()).write(sys.argv[3]) + print('FINAL STEP: BGF written.') diff --git a/topics/investigation/analysis/overview_bgf.py b/topics/investigation/analysis/overview_bgf.py index 8511fb4c..2776b1a2 100755 --- a/topics/investigation/analysis/overview_bgf.py +++ b/topics/investigation/analysis/overview_bgf.py @@ -23,7 +23,7 @@ def isnotalpha(x): mb = bench.MeasurementBench(grammar) print '' wrap = lambda x,y:map(lambda z:'<'+x+'>'+z+'',y) - print ''.join(wrap('bottom',mb.getBottoms()))+''.join(wrap('top',mb.getDeadTops())) + print (''.join(wrap('bottom',mb.getBottoms()))+''.join(wrap('top',mb.getDeadTops()))).replace('&','&') terms = metrics.term(grammar) htmlify = lambda s:map(lambda x:x.replace('&','&').replace('>','>').replace('<','<'),s) print ''.join(wrap('keyword',htmlify(filter(isalpha,terms))))+''.join(wrap('terminal',htmlify(filter(isnotalpha,terms))))