diff --git a/topics/grammars/Makefile.grammar b/topics/grammars/Makefile.grammar index affc7eba..041aef42 100644 --- a/topics/grammars/Makefile.grammar +++ b/topics/grammars/Makefile.grammar @@ -1,6 +1,6 @@ tooldir = ../../../../shared/tools sdflibdir = ../../../../../sdf-library/trunk/library/languages -hunter = ../../hunter.py +hunter = ../../../recovery/hunter/hunter.py clean: rm -f *.bgf *.bnf *.html intermediate.lll tmp.xml *prepared* diff --git a/topics/grammars/Makefile.language b/topics/grammars/Makefile.language index 60758471..27cc0b5b 100644 --- a/topics/grammars/Makefile.language +++ b/topics/grammars/Makefile.language @@ -5,11 +5,11 @@ extract: cd $* && make extract diff: + make extract cat ready.lst | xargs -n1 -I _ make _.diff %.diff: - make $*.extract - gdt $*.bgf $*/grammar.bgf || exit -1 + gdts $*.bgf $*/grammar.bgf save: cat ready.lst | xargs -n1 -I _ make _.save diff --git a/topics/grammars/metasyntax/ebnf-iso-2/Makefile b/topics/grammars/metasyntax/ebnf-iso-2/Makefile index 6a85671a..60be2e0a 100644 --- a/topics/grammars/metasyntax/ebnf-iso-2/Makefile +++ b/topics/grammars/metasyntax/ebnf-iso-2/Makefile @@ -1,5 +1,5 @@ extract: - ../../hunter.py src.8.2.txt config.edd raw.bgf + ${hunter} src.8.2.txt config.edd raw.bgf ${tooldir}/xbgf post-extract.xbgf raw.bgf ext.bgf ${tooldir}/xbgf refactor.xbgf ext.bgf grammar.bgf diff --git a/topics/grammars/metasyntax/ebnf-iso-3/Makefile b/topics/grammars/metasyntax/ebnf-iso-3/Makefile index 9fb1a340..3f90abc8 100644 --- a/topics/grammars/metasyntax/ebnf-iso-3/Makefile +++ b/topics/grammars/metasyntax/ebnf-iso-3/Makefile @@ -1,5 +1,5 @@ extract: - ../../hunter.py src.8.3.txt config.edd raw.bgf + ${hunter} src.8.3.txt config.edd raw.bgf ${tooldir}/xbgf post-extract.xbgf raw.bgf ext.bgf ${tooldir}/xbgf refactor.xbgf ext.bgf grammar.bgf diff --git a/topics/recovery/hunter/Makefile b/topics/recovery/hunter/Makefile new file mode 100644 index 00000000..b331c0fa --- /dev/null +++ b/topics/recovery/hunter/Makefile @@ -0,0 +1,9 @@ +build: + +test: + cd tests && make test + +clean: + cd tests && make clean + rm -f ~* *.bnf + diff --git a/topics/grammars/hunter.py b/topics/recovery/hunter/hunter.py similarity index 93% rename from topics/grammars/hunter.py rename to topics/recovery/hunter/hunter.py index 65c8a658..a2e505ce 100755 --- a/topics/grammars/hunter.py +++ b/topics/recovery/hunter/hunter.py @@ -21,6 +21,8 @@ 'DEFINITION-SEPARATOR-SYMBOL', 'START-TERMINAL-SYMBOL', 'END-TERMINAL-SYMBOL', + 'START-NONTERMINAL-SYMBOL', + 'END-NONTERMINAL-SYMBOL', 'START-GROUP-SYMBOL', 'END-GROUP-SYMBOL', 'START-OPTION-SYMBOL', @@ -270,19 +272,21 @@ def findCommonTail(ps): tail.reverse() return tail -def assembleBracketedSymbols(ts,start,end): +def assembleBracketedSymbols(ts,start,end,preserveSpace): tss = [] - terminal = False + inside = False i = 0 while (i>>>>',poss) @@ -354,7 +358,7 @@ def findSpecialGroups(ats,start,end): level -= 1 poss[1][lp.pop(level)] = (i,j) if len(poss[0]) != len(poss[1]): - print('STEP 7 deadlock: number of start-?-symbol and end-?-symbol occurrences do not match.') + print('STEP 8 deadlock: number of start-?-symbol and end-?-symbol occurrences do not match.') return ats if debug and poss[0]: print('poss >>>>>',poss) @@ -541,7 +545,13 @@ def map2expr(ss): if debug: print('NONTERMINAL',ss[i]) e = BGF3.Nonterminal() - e.setName(ss[i]) + n = ss[i] + if 'start-nonterminal-symbol' in config.keys() or 'end-nonterminal-symbol' in config.keys(): + if n[:len(config['start-nonterminal-symbol'])] == config['start-nonterminal-symbol']: + n = n[len(config['start-nonterminal-symbol']):] + if n[-len(config['end-nonterminal-symbol']):] == config['end-nonterminal-symbol']: + n = n[:-len(config['end-nonterminal-symbol'])] + e.setName(n) es.append(e) i += 1 ess.append(es) @@ -715,7 +725,7 @@ def decomposeSymbols(p,defd): else: pos = False if pos: - print('STEP 7:',x,'matches as',var) + print('STEP 8:',x,'matches as',var) q.extend(var) # todo: need to be adjusted if the order of phases is changed #q.append(config['start-terminal-symbol']+t+config['end-terminal-symbol']) @@ -777,14 +787,14 @@ def balanceProd(p): else: fail = True if fail: - print('STEP 6: Cannot balance a production, reverting',oldpi,'to a terminal.') + print('STEP 7: Cannot balance a production, reverting',oldpi,'to a terminal.') p[i] = config['start-terminal-symbol']+config[oldpi.lower()]+config['end-terminal-symbol'] i += 1 elif p[i] == oldpi: - print('STEP 6: Problem at',oldpi,'in',p) + print('STEP 7: Problem at',oldpi,'in',p) i += 1 else: - print('STEP 6: Rebalanced ambiguity of',oldpi,'with',p[i]) + print('STEP 7: Rebalanced ambiguity of',oldpi,'with',p[i]) i = j else: i = j @@ -795,18 +805,18 @@ def postfix2confix(p): while s in p: w = p.index(s) if w == 0: - print('STEP 6: Impossible place for postfix operator, converted to a terminal.') + print('STEP 7: Impossible place for postfix operator, converted to a terminal.') p[w] = config['start-terminal-symbol']+p[w]+config['end-terminal-symbol'] continue if 'end-group-symbol' in config.keys() and p[w-1] == config['end-group-symbol']: # group j = startOfContext(p,w-1,config['start-group-symbol']) if j<0: - print('STEP 6: Impossible to balance the group preceding a postfix operator, converted it to a terminal') + print('STEP 7: Impossible to balance the group preceding a postfix operator, converted it to a terminal') p[w] = config['start-terminal-symbol']+p[w]+config['end-terminal-symbol'] continue else: - print('STEP 6: Converted postfix metasymbol to confix notation.') + print('STEP 7: Converted postfix metasymbol to confix notation.') p[w-1] = s.replace('POSTFIX','END') p[j] = s.replace('POSTFIX','START') q = p[:w] @@ -814,7 +824,7 @@ def postfix2confix(p): p = q else: # single element - print('STEP 6: Converted postfix metasymbol to confix notation.') + print('STEP 7: Converted postfix metasymbol to confix notation.') q = p[:w-1] q.append(s.replace('POSTFIX','START')) q.append(p[w-1]) @@ -918,7 +928,7 @@ def considerIndentation(ts): print('Token stream:',tokens) if 'start-terminal-symbol' in config.keys() and 'end-terminal-symbol' in config.keys(): tokens = [config['start-terminal-symbol']+masked[x]+config['end-terminal-symbol'] if x in masked.keys() else x for x in tokens] - tokens = assembleBracketedSymbols(tokens,config['start-terminal-symbol'],config['end-terminal-symbol']) + tokens = assembleBracketedSymbols(tokens,config['start-terminal-symbol'],config['end-terminal-symbol'],False) else: print('STEP 1 was of limited use, sorry: start-terminal-symbol and end-terminal-symbol are not both specified.') # technically we still need them to denote terminals in our internal representation @@ -943,7 +953,7 @@ def considerIndentation(ts): # STEP 2: assemble nonterminal symbols print('STEP 2: assembling nonterminal symbols.') if 'start-nonterminal-symbol' in config.keys() and 'end-nonterminal-symbol' in config.keys(): - tokens = assembleBracketedSymbols(tokens,config['start-nonterminal-symbol'],config['end-nonterminal-symbol']) + tokens = assembleBracketedSymbols(tokens,config['start-nonterminal-symbol'],config['end-nonterminal-symbol'],True) else: print('STEP 2 skipped, sorry: start-nonterminal-symbol and end-nonterminal-symbol are not both specified.') # STEP 3: assembling composite metasymbols together @@ -1045,8 +1055,16 @@ def considerIndentation(ts): # STEP 4b: splitting the token stream into productions according to terminator-symbol; inferring defining-symbol # TODO prods = [p[:-(len(config['terminator-symbol']))] if p[-(len(config['terminator-symbol'])):] == config['terminator-symbol'] else p for p in prods] - # STEP 5: slice insides according to definition-separator-symbol - step5 = False + # STEP 5: decompose symbols + defined = [x[1] for x in prods] + if debug: + print('Defined are',defined) + defined.extend(config.keys()) + if 'decompose-symbols' in config.keys(): + print('STEP 5 (part of rule 4): decomposing compound symbols.') + prods = [decomposeSymbols(x,defined) for x in prods] + # STEP 6: slice insides according to definition-separator-symbol + step6 = False for s in \ ('definition-separator-symbol' ,'postfix-repetition-star-symbol' @@ -1065,13 +1083,13 @@ def considerIndentation(ts): ,'start-option-symbol' ,'end-option-symbol'): if s in config.keys(): - print('STEP 5: marking',s+'.') - step5 = True + print('STEP 6: marking',s+'.') + step6 = True prods = [[s.upper() if x==config[s] else x for x in p] for p in prods] #prods = list(map(lambda p:list(map(lambda x:s.upper() if x==config[s] else x,p)),prods)) - if not step5: - print('STEP 5 skipped: sorry, no metasymbols specified.') - # STEP 6: validating metasymbols + if not step6: + print('STEP 6 skipped: sorry, no metasymbols specified.') + # STEP 7: validating metasymbols if debug: print('The grammar is perceived like this:') for p in prods: @@ -1082,35 +1100,27 @@ def considerIndentation(ts): print('The grammar is perceived like this:') for p in prods: print('\t',p[1],'is defined as',p[2:]) - # STEP 7: various commands - print('STEP 7: executing special extraction commands.') - step7 = False - defined = [x[1] for x in prods] - if debug: - print('Defined are',defined) - defined.append(config['defining-symbol']) + # STEP 8: various commands + print('STEP 8: executing special extraction commands.') + step8 = False if len(ignore_tokens)>0: - print('STEP 7: ignoring extra tokens.') - step7 = True + print('STEP 8: ignoring extra tokens.') + step8 = True for x in ignore_tokens: prods = [list(filter(lambda y:y!=x,p)) for p in prods] #prods = list(map(lambda x:filter(lambda y:y!='\n',x),prods)) - if 'decompose-symbols' in config.keys(): - print('STEP 7 (part of rule 4): decomposing compound symbols.') - step7 = True - prods = [decomposeSymbols(x,defined) for x in prods] if 'undefined-nonterminals-are-terminals' in config.keys(): - print('STEP 7 (rule 5): turning undefined nonterminals into terminals.') - step7 = True + print('STEP 8 (rule 5): turning undefined nonterminals into terminals.') + step8 = True prods = [[convert2terminal(x,defined) for x in p] for p in prods] #for p in prods: # print(p[1],'is defined as',p[2:]) if 'glue-nonalphanumeric-terminals' in config.keys(): - print('STEP 7 (part of rule 3): glueing non-alphanumeric terminal symbols together.') - step7 = True + print('STEP 8 (part of rule 3): glueing non-alphanumeric terminal symbols together.') + step8 = True prods = list(map(glueTerminals,prods)) - if not step7: - print('STEP 7 skipped, sorry: no special commands found in the configuration.') + if not step8: + print('STEP 8 skipped, sorry: no special commands found in the configuration.') # STEP X: validating bracketing? # ... # RESULT diff --git a/topics/recovery/hunter/tests/0-ignored.bgf b/topics/recovery/hunter/tests/0-ignored.bgf new file mode 100644 index 00000000..fba5ce6a --- /dev/null +++ b/topics/recovery/hunter/tests/0-ignored.bgf @@ -0,0 +1,21 @@ + + + + foo + + bar + + + + bar + + wez + + + + wez + + foo + + + diff --git a/topics/recovery/hunter/tests/0-ignored.edd b/topics/recovery/hunter/tests/0-ignored.edd new file mode 100644 index 00000000..c0484563 --- /dev/null +++ b/topics/recovery/hunter/tests/0-ignored.edd @@ -0,0 +1,8 @@ + + + : + \n + + // + + diff --git a/topics/recovery/hunter/tests/0-ignored.src b/topics/recovery/hunter/tests/0-ignored.src new file mode 100644 index 00000000..24db2b71 --- /dev/null +++ b/topics/recovery/hunter/tests/0-ignored.src @@ -0,0 +1,6 @@ +// this is foo +foo : bar +bar : wez +// this is not foo +wez : foo + diff --git a/topics/recovery/hunter/tests/0-simple.bgf b/topics/recovery/hunter/tests/0-simple.bgf new file mode 100644 index 00000000..fba5ce6a --- /dev/null +++ b/topics/recovery/hunter/tests/0-simple.bgf @@ -0,0 +1,21 @@ + + + + foo + + bar + + + + bar + + wez + + + + wez + + foo + + + diff --git a/topics/recovery/hunter/tests/0-simple.edd b/topics/recovery/hunter/tests/0-simple.edd new file mode 100644 index 00000000..7f2234bf --- /dev/null +++ b/topics/recovery/hunter/tests/0-simple.edd @@ -0,0 +1,5 @@ + + + : + \n + diff --git a/topics/recovery/hunter/tests/0-simple.src b/topics/recovery/hunter/tests/0-simple.src new file mode 100644 index 00000000..6ce7dd7f --- /dev/null +++ b/topics/recovery/hunter/tests/0-simple.src @@ -0,0 +1,4 @@ +foo : bar +bar : wez +wez : foo + diff --git a/topics/recovery/hunter/tests/1-comment.bgf b/topics/recovery/hunter/tests/1-comment.bgf new file mode 100644 index 00000000..fba5ce6a --- /dev/null +++ b/topics/recovery/hunter/tests/1-comment.bgf @@ -0,0 +1,21 @@ + + + + foo + + bar + + + + bar + + wez + + + + wez + + foo + + + diff --git a/topics/recovery/hunter/tests/1-comment.edd b/topics/recovery/hunter/tests/1-comment.edd new file mode 100644 index 00000000..8d43a56c --- /dev/null +++ b/topics/recovery/hunter/tests/1-comment.edd @@ -0,0 +1,7 @@ + + + : + \n + /* + */ + diff --git a/topics/recovery/hunter/tests/1-comment.src b/topics/recovery/hunter/tests/1-comment.src new file mode 100644 index 00000000..61983332 --- /dev/null +++ b/topics/recovery/hunter/tests/1-comment.src @@ -0,0 +1,4 @@ +foo : /* this is foo */ bar +bar : wez +wez : foo /* this is not foo */ + diff --git a/topics/recovery/hunter/tests/1-terminals.bgf b/topics/recovery/hunter/tests/1-terminals.bgf new file mode 100644 index 00000000..7d748b0b --- /dev/null +++ b/topics/recovery/hunter/tests/1-terminals.bgf @@ -0,0 +1,21 @@ + + + + foo + + bar + + + + bar + + wez + + + + wez + + foo + + + diff --git a/topics/recovery/hunter/tests/1-terminals.edd b/topics/recovery/hunter/tests/1-terminals.edd new file mode 100644 index 00000000..6e2300dc --- /dev/null +++ b/topics/recovery/hunter/tests/1-terminals.edd @@ -0,0 +1,7 @@ + + + : + \n + + + diff --git a/topics/recovery/hunter/tests/1-terminals.src b/topics/recovery/hunter/tests/1-terminals.src new file mode 100644 index 00000000..4d6fc406 --- /dev/null +++ b/topics/recovery/hunter/tests/1-terminals.src @@ -0,0 +1,3 @@ +foo : “bar” +bar : wez +wez : “foo” diff --git a/topics/recovery/hunter/tests/1-uppercase.bgf b/topics/recovery/hunter/tests/1-uppercase.bgf new file mode 100644 index 00000000..6235ae62 --- /dev/null +++ b/topics/recovery/hunter/tests/1-uppercase.bgf @@ -0,0 +1,21 @@ + + + + foo + + BAR + + + + bar + + wez + + + + wez + + FOO + + + diff --git a/topics/recovery/hunter/tests/1-uppercase.edd b/topics/recovery/hunter/tests/1-uppercase.edd new file mode 100644 index 00000000..a4dcc18f --- /dev/null +++ b/topics/recovery/hunter/tests/1-uppercase.edd @@ -0,0 +1,6 @@ + + + : + \n + + diff --git a/topics/recovery/hunter/tests/1-uppercase.src b/topics/recovery/hunter/tests/1-uppercase.src new file mode 100644 index 00000000..7a96970c --- /dev/null +++ b/topics/recovery/hunter/tests/1-uppercase.src @@ -0,0 +1,3 @@ +foo : BAR +bar : wez +wez : FOO diff --git a/topics/recovery/hunter/tests/1-whitespace.bgf b/topics/recovery/hunter/tests/1-whitespace.bgf new file mode 100644 index 00000000..24cd5786 --- /dev/null +++ b/topics/recovery/hunter/tests/1-whitespace.bgf @@ -0,0 +1,35 @@ + + + + foo + + + + x + + + y + + + + + + bar + + + + z + + + z + + + + + + wez + + foo + + + diff --git a/topics/recovery/hunter/tests/1-whitespace.edd b/topics/recovery/hunter/tests/1-whitespace.edd new file mode 100644 index 00000000..7f2234bf --- /dev/null +++ b/topics/recovery/hunter/tests/1-whitespace.edd @@ -0,0 +1,5 @@ + + + : + \n + diff --git a/topics/recovery/hunter/tests/1-whitespace.src b/topics/recovery/hunter/tests/1-whitespace.src new file mode 100644 index 00000000..97a07380 --- /dev/null +++ b/topics/recovery/hunter/tests/1-whitespace.src @@ -0,0 +1,3 @@ +foo : x y +bar : z z +wez : foo diff --git a/topics/recovery/hunter/tests/2-nonterminals.bgf b/topics/recovery/hunter/tests/2-nonterminals.bgf new file mode 100644 index 00000000..9ab83c9a --- /dev/null +++ b/topics/recovery/hunter/tests/2-nonterminals.bgf @@ -0,0 +1,21 @@ + + + + foo + + x y z + + + + bar + + x_y_z + + + + wez + + foo + + + diff --git a/topics/recovery/hunter/tests/2-nonterminals.edd b/topics/recovery/hunter/tests/2-nonterminals.edd new file mode 100644 index 00000000..9af5c491 --- /dev/null +++ b/topics/recovery/hunter/tests/2-nonterminals.edd @@ -0,0 +1,10 @@ + + + : + \n + + + ( + ) + + diff --git a/topics/recovery/hunter/tests/2-nonterminals.src b/topics/recovery/hunter/tests/2-nonterminals.src new file mode 100644 index 00000000..92409089 --- /dev/null +++ b/topics/recovery/hunter/tests/2-nonterminals.src @@ -0,0 +1,3 @@ +foo : “x y z” +bar : (x y z) +wez : “foo” diff --git a/topics/recovery/hunter/tests/3-composite.bgf b/topics/recovery/hunter/tests/3-composite.bgf new file mode 100644 index 00000000..fba5ce6a --- /dev/null +++ b/topics/recovery/hunter/tests/3-composite.bgf @@ -0,0 +1,21 @@ + + + + foo + + bar + + + + bar + + wez + + + + wez + + foo + + + diff --git a/topics/recovery/hunter/tests/3-composite.edd b/topics/recovery/hunter/tests/3-composite.edd new file mode 100644 index 00000000..e91a628b --- /dev/null +++ b/topics/recovery/hunter/tests/3-composite.edd @@ -0,0 +1,5 @@ + + + ::= + END\n\n + diff --git a/topics/recovery/hunter/tests/3-composite.src b/topics/recovery/hunter/tests/3-composite.src new file mode 100644 index 00000000..28f43609 --- /dev/null +++ b/topics/recovery/hunter/tests/3-composite.src @@ -0,0 +1,6 @@ +foo ::= bar END + +bar ::= wez END + +wez ::= foo END + diff --git a/topics/recovery/hunter/tests/Makefile b/topics/recovery/hunter/tests/Makefile new file mode 100644 index 00000000..bd159c8f --- /dev/null +++ b/topics/recovery/hunter/tests/Makefile @@ -0,0 +1,5 @@ +test: + ls -1 *.src | xargs -n1 -I _ basename _ .src | xargs -n1 ./testperform + +clean: + rm -f *.out *.log *.gdt diff --git a/topics/recovery/hunter/tests/testcopy b/topics/recovery/hunter/tests/testcopy new file mode 100755 index 00000000..ecfa7676 --- /dev/null +++ b/topics/recovery/hunter/tests/testcopy @@ -0,0 +1,5 @@ +#!/bin/sh + +cp $1.src $2.src +cp $1.edd $2.edd +cp $1.bgf $2.bgf diff --git a/topics/recovery/hunter/tests/testperform b/topics/recovery/hunter/tests/testperform new file mode 100755 index 00000000..d16a1f5b --- /dev/null +++ b/topics/recovery/hunter/tests/testperform @@ -0,0 +1,7 @@ +#!/bin/sh + +echo [Test Hunter] $1 +rm -f $1.log +../hunter.py $1.src $1.edd $1.out > $1.log || exit -1 +../../../../shared/tools/validate bgf $1.out 2>> $1.log || exit -1 +gdt $1.bgf $1.out > $1.gdt || exit -1 diff --git a/topics/recovery/hunter/tests/testrename b/topics/recovery/hunter/tests/testrename new file mode 100755 index 00000000..a49ec05e --- /dev/null +++ b/topics/recovery/hunter/tests/testrename @@ -0,0 +1,6 @@ +#!/bin/sh + +mv $1.src $2.src +mv $1.edd $2.edd +mv $1.bgf $2.bgf +rm -f $1.out $1.log diff --git a/topics/recovery/hunter/tests/testsave b/topics/recovery/hunter/tests/testsave new file mode 100755 index 00000000..2d661894 --- /dev/null +++ b/topics/recovery/hunter/tests/testsave @@ -0,0 +1,5 @@ +#!/bin/sh + +echo '' > $1.bgf +cat $1.out >> $1.bgf +perl -pi -w -e 's/ns0/bgf/g;' $1.bgf