Skip to content

Commit

Permalink
Grammar Hunter got its own place; test set place initiated
Browse files Browse the repository at this point in the history
git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@1085 ab42f6e0-554d-0410-b580-99e487e6eeb2
  • Loading branch information
grammarware committed Jun 9, 2011
1 parent 4edc3e6 commit 8944eaa
Show file tree
Hide file tree
Showing 35 changed files with 363 additions and 49 deletions.
2 changes: 1 addition & 1 deletion topics/grammars/Makefile.grammar
@@ -1,6 +1,6 @@
tooldir = ../../../../shared/tools
sdflibdir = ../../../../../sdf-library/trunk/library/languages
hunter = ../../hunter.py
hunter = ../../../recovery/hunter/hunter.py

clean:
rm -f *.bgf *.bnf *.html intermediate.lll tmp.xml *prepared*
Expand Down
4 changes: 2 additions & 2 deletions topics/grammars/Makefile.language
Expand Up @@ -5,11 +5,11 @@ extract:
cd $* && make extract

diff:
make extract
cat ready.lst | xargs -n1 -I _ make _.diff

%.diff:
make $*.extract
gdt $*.bgf $*/grammar.bgf || exit -1
gdts $*.bgf $*/grammar.bgf

save:
cat ready.lst | xargs -n1 -I _ make _.save
Expand Down
2 changes: 1 addition & 1 deletion topics/grammars/metasyntax/ebnf-iso-2/Makefile
@@ -1,5 +1,5 @@
extract:
../../hunter.py src.8.2.txt config.edd raw.bgf
${hunter} src.8.2.txt config.edd raw.bgf
${tooldir}/xbgf post-extract.xbgf raw.bgf ext.bgf
${tooldir}/xbgf refactor.xbgf ext.bgf grammar.bgf

Expand Down
2 changes: 1 addition & 1 deletion topics/grammars/metasyntax/ebnf-iso-3/Makefile
@@ -1,5 +1,5 @@
extract:
../../hunter.py src.8.3.txt config.edd raw.bgf
${hunter} src.8.3.txt config.edd raw.bgf
${tooldir}/xbgf post-extract.xbgf raw.bgf ext.bgf
${tooldir}/xbgf refactor.xbgf ext.bgf grammar.bgf

Expand Down
9 changes: 9 additions & 0 deletions topics/recovery/hunter/Makefile
@@ -0,0 +1,9 @@
build:

test:
cd tests && make test

clean:
cd tests && make clean
rm -f ~* *.bnf

98 changes: 54 additions & 44 deletions topics/grammars/hunter.py → topics/recovery/hunter/hunter.py
Expand Up @@ -21,6 +21,8 @@
'DEFINITION-SEPARATOR-SYMBOL',
'START-TERMINAL-SYMBOL',
'END-TERMINAL-SYMBOL',
'START-NONTERMINAL-SYMBOL',
'END-NONTERMINAL-SYMBOL',
'START-GROUP-SYMBOL',
'END-GROUP-SYMBOL',
'START-OPTION-SYMBOL',
Expand Down Expand Up @@ -270,19 +272,21 @@ def findCommonTail(ps):
tail.reverse()
return tail

def assembleBracketedSymbols(ts,start,end):
def assembleBracketedSymbols(ts,start,end,preserveSpace):
tss = []
terminal = False
inside = False
i = 0
while (i<len(ts)):
if terminal:
if inside:
if preserveSpace and ts[i] != end and tss[-1][-1] != start:
tss[-1] += ' '
tss[-1] += ts[i]
if ts[i] == end:
terminal = False
inside = False
else:
tss.append(ts[i])
if ts[i] == start:
terminal = True
inside = True
i += 1
return tss

Expand All @@ -302,7 +306,7 @@ def findGroups(ats,start,end):
level -= 1
poss[1][lp.pop(level)] = (i,j)
if len(poss[0]) != len(poss[1]):
print('STEP 7 deadlock: number of start-group-symbol and end-group-symbol occurrences do not match.')
print('STEP 8 deadlock: number of start-group-symbol and end-group-symbol occurrences do not match.')
return ats
if debug and poss[0]:
print('poss >>>>>',poss)
Expand Down Expand Up @@ -354,7 +358,7 @@ def findSpecialGroups(ats,start,end):
level -= 1
poss[1][lp.pop(level)] = (i,j)
if len(poss[0]) != len(poss[1]):
print('STEP 7 deadlock: number of start-?-symbol and end-?-symbol occurrences do not match.')
print('STEP 8 deadlock: number of start-?-symbol and end-?-symbol occurrences do not match.')
return ats
if debug and poss[0]:
print('poss >>>>>',poss)
Expand Down Expand Up @@ -541,7 +545,13 @@ def map2expr(ss):
if debug:
print('NONTERMINAL',ss[i])
e = BGF3.Nonterminal()
e.setName(ss[i])
n = ss[i]
if 'start-nonterminal-symbol' in config.keys() or 'end-nonterminal-symbol' in config.keys():
if n[:len(config['start-nonterminal-symbol'])] == config['start-nonterminal-symbol']:
n = n[len(config['start-nonterminal-symbol']):]
if n[-len(config['end-nonterminal-symbol']):] == config['end-nonterminal-symbol']:
n = n[:-len(config['end-nonterminal-symbol'])]
e.setName(n)
es.append(e)
i += 1
ess.append(es)
Expand Down Expand Up @@ -715,7 +725,7 @@ def decomposeSymbols(p,defd):
else:
pos = False
if pos:
print('STEP 7:',x,'matches as',var)
print('STEP 8:',x,'matches as',var)
q.extend(var)
# todo: need to be adjusted if the order of phases is changed
#q.append(config['start-terminal-symbol']+t+config['end-terminal-symbol'])
Expand Down Expand Up @@ -777,14 +787,14 @@ def balanceProd(p):
else:
fail = True
if fail:
print('STEP 6: Cannot balance a production, reverting',oldpi,'to a terminal.')
print('STEP 7: Cannot balance a production, reverting',oldpi,'to a terminal.')
p[i] = config['start-terminal-symbol']+config[oldpi.lower()]+config['end-terminal-symbol']
i += 1
elif p[i] == oldpi:
print('STEP 6: Problem at',oldpi,'in',p)
print('STEP 7: Problem at',oldpi,'in',p)
i += 1
else:
print('STEP 6: Rebalanced ambiguity of',oldpi,'with',p[i])
print('STEP 7: Rebalanced ambiguity of',oldpi,'with',p[i])
i = j
else:
i = j
Expand All @@ -795,26 +805,26 @@ def postfix2confix(p):
while s in p:
w = p.index(s)
if w == 0:
print('STEP 6: Impossible place for postfix operator, converted to a terminal.')
print('STEP 7: Impossible place for postfix operator, converted to a terminal.')
p[w] = config['start-terminal-symbol']+p[w]+config['end-terminal-symbol']
continue
if 'end-group-symbol' in config.keys() and p[w-1] == config['end-group-symbol']:
# group
j = startOfContext(p,w-1,config['start-group-symbol'])
if j<0:
print('STEP 6: Impossible to balance the group preceding a postfix operator, converted it to a terminal')
print('STEP 7: Impossible to balance the group preceding a postfix operator, converted it to a terminal')
p[w] = config['start-terminal-symbol']+p[w]+config['end-terminal-symbol']
continue
else:
print('STEP 6: Converted postfix metasymbol to confix notation.')
print('STEP 7: Converted postfix metasymbol to confix notation.')
p[w-1] = s.replace('POSTFIX','END')
p[j] = s.replace('POSTFIX','START')
q = p[:w]
q.extend(p[w+1:])
p = q
else:
# single element
print('STEP 6: Converted postfix metasymbol to confix notation.')
print('STEP 7: Converted postfix metasymbol to confix notation.')
q = p[:w-1]
q.append(s.replace('POSTFIX','START'))
q.append(p[w-1])
Expand Down Expand Up @@ -918,7 +928,7 @@ def considerIndentation(ts):
print('Token stream:',tokens)
if 'start-terminal-symbol' in config.keys() and 'end-terminal-symbol' in config.keys():
tokens = [config['start-terminal-symbol']+masked[x]+config['end-terminal-symbol'] if x in masked.keys() else x for x in tokens]
tokens = assembleBracketedSymbols(tokens,config['start-terminal-symbol'],config['end-terminal-symbol'])
tokens = assembleBracketedSymbols(tokens,config['start-terminal-symbol'],config['end-terminal-symbol'],False)
else:
print('STEP 1 was of limited use, sorry: start-terminal-symbol and end-terminal-symbol are not both specified.')
# technically we still need them to denote terminals in our internal representation
Expand All @@ -943,7 +953,7 @@ def considerIndentation(ts):
# STEP 2: assemble nonterminal symbols
print('STEP 2: assembling nonterminal symbols.')
if 'start-nonterminal-symbol' in config.keys() and 'end-nonterminal-symbol' in config.keys():
tokens = assembleBracketedSymbols(tokens,config['start-nonterminal-symbol'],config['end-nonterminal-symbol'])
tokens = assembleBracketedSymbols(tokens,config['start-nonterminal-symbol'],config['end-nonterminal-symbol'],True)
else:
print('STEP 2 skipped, sorry: start-nonterminal-symbol and end-nonterminal-symbol are not both specified.')
# STEP 3: assembling composite metasymbols together
Expand Down Expand Up @@ -1045,8 +1055,16 @@ def considerIndentation(ts):
# STEP 4b: splitting the token stream into productions according to terminator-symbol; inferring defining-symbol
# TODO
prods = [p[:-(len(config['terminator-symbol']))] if p[-(len(config['terminator-symbol'])):] == config['terminator-symbol'] else p for p in prods]
# STEP 5: slice insides according to definition-separator-symbol
step5 = False
# STEP 5: decompose symbols
defined = [x[1] for x in prods]
if debug:
print('Defined are',defined)
defined.extend(config.keys())
if 'decompose-symbols' in config.keys():
print('STEP 5 (part of rule 4): decomposing compound symbols.')
prods = [decomposeSymbols(x,defined) for x in prods]
# STEP 6: slice insides according to definition-separator-symbol
step6 = False
for s in \
('definition-separator-symbol'
,'postfix-repetition-star-symbol'
Expand All @@ -1065,13 +1083,13 @@ def considerIndentation(ts):
,'start-option-symbol'
,'end-option-symbol'):
if s in config.keys():
print('STEP 5: marking',s+'.')
step5 = True
print('STEP 6: marking',s+'.')
step6 = True
prods = [[s.upper() if x==config[s] else x for x in p] for p in prods]
#prods = list(map(lambda p:list(map(lambda x:s.upper() if x==config[s] else x,p)),prods))
if not step5:
print('STEP 5 skipped: sorry, no metasymbols specified.')
# STEP 6: validating metasymbols
if not step6:
print('STEP 6 skipped: sorry, no metasymbols specified.')
# STEP 7: validating metasymbols
if debug:
print('The grammar is perceived like this:')
for p in prods:
Expand All @@ -1082,35 +1100,27 @@ def considerIndentation(ts):
print('The grammar is perceived like this:')
for p in prods:
print('\t',p[1],'is defined as',p[2:])
# STEP 7: various commands
print('STEP 7: executing special extraction commands.')
step7 = False
defined = [x[1] for x in prods]
if debug:
print('Defined are',defined)
defined.append(config['defining-symbol'])
# STEP 8: various commands
print('STEP 8: executing special extraction commands.')
step8 = False
if len(ignore_tokens)>0:
print('STEP 7: ignoring extra tokens.')
step7 = True
print('STEP 8: ignoring extra tokens.')
step8 = True
for x in ignore_tokens:
prods = [list(filter(lambda y:y!=x,p)) for p in prods]
#prods = list(map(lambda x:filter(lambda y:y!='\n',x),prods))
if 'decompose-symbols' in config.keys():
print('STEP 7 (part of rule 4): decomposing compound symbols.')
step7 = True
prods = [decomposeSymbols(x,defined) for x in prods]
if 'undefined-nonterminals-are-terminals' in config.keys():
print('STEP 7 (rule 5): turning undefined nonterminals into terminals.')
step7 = True
print('STEP 8 (rule 5): turning undefined nonterminals into terminals.')
step8 = True
prods = [[convert2terminal(x,defined) for x in p] for p in prods]
#for p in prods:
# print(p[1],'is defined as',p[2:])
if 'glue-nonalphanumeric-terminals' in config.keys():
print('STEP 7 (part of rule 3): glueing non-alphanumeric terminal symbols together.')
step7 = True
print('STEP 8 (part of rule 3): glueing non-alphanumeric terminal symbols together.')
step8 = True
prods = list(map(glueTerminals,prods))
if not step7:
print('STEP 7 skipped, sorry: no special commands found in the configuration.')
if not step8:
print('STEP 8 skipped, sorry: no special commands found in the configuration.')
# STEP X: validating bracketing?
# ...
# RESULT
Expand Down
21 changes: 21 additions & 0 deletions topics/recovery/hunter/tests/0-ignored.bgf
@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<bgf:grammar xmlns:bgf="http://planet-sl.org/bgf">
<bgf:production>
<nonterminal>foo</nonterminal>
<bgf:expression>
<nonterminal>bar</nonterminal>
</bgf:expression>
</bgf:production>
<bgf:production>
<nonterminal>bar</nonterminal>
<bgf:expression>
<nonterminal>wez</nonterminal>
</bgf:expression>
</bgf:production>
<bgf:production>
<nonterminal>wez</nonterminal>
<bgf:expression>
<nonterminal>foo</nonterminal>
</bgf:expression>
</bgf:production>
</bgf:grammar>
8 changes: 8 additions & 0 deletions topics/recovery/hunter/tests/0-ignored.edd
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<edd:config xmlns:edd="http://planet-sl.org/edd">
<defining-symbol>:</defining-symbol>
<terminator-symbol>\n</terminator-symbol>
<ignore>
<lines-containing>//</lines-containing>
</ignore>
</edd:config>
6 changes: 6 additions & 0 deletions topics/recovery/hunter/tests/0-ignored.src
@@ -0,0 +1,6 @@
// this is foo
foo : bar
bar : wez
// this is not foo
wez : foo

21 changes: 21 additions & 0 deletions topics/recovery/hunter/tests/0-simple.bgf
@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<bgf:grammar xmlns:bgf="http://planet-sl.org/bgf">
<bgf:production>
<nonterminal>foo</nonterminal>
<bgf:expression>
<nonterminal>bar</nonterminal>
</bgf:expression>
</bgf:production>
<bgf:production>
<nonterminal>bar</nonterminal>
<bgf:expression>
<nonterminal>wez</nonterminal>
</bgf:expression>
</bgf:production>
<bgf:production>
<nonterminal>wez</nonterminal>
<bgf:expression>
<nonterminal>foo</nonterminal>
</bgf:expression>
</bgf:production>
</bgf:grammar>
5 changes: 5 additions & 0 deletions topics/recovery/hunter/tests/0-simple.edd
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<edd:config xmlns:edd="http://planet-sl.org/edd">
<defining-symbol>:</defining-symbol>
<terminator-symbol>\n</terminator-symbol>
</edd:config>
4 changes: 4 additions & 0 deletions topics/recovery/hunter/tests/0-simple.src
@@ -0,0 +1,4 @@
foo : bar
bar : wez
wez : foo

21 changes: 21 additions & 0 deletions topics/recovery/hunter/tests/1-comment.bgf
@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<bgf:grammar xmlns:bgf="http://planet-sl.org/bgf">
<bgf:production>
<nonterminal>foo</nonterminal>
<bgf:expression>
<nonterminal>bar</nonterminal>
</bgf:expression>
</bgf:production>
<bgf:production>
<nonterminal>bar</nonterminal>
<bgf:expression>
<nonterminal>wez</nonterminal>
</bgf:expression>
</bgf:production>
<bgf:production>
<nonterminal>wez</nonterminal>
<bgf:expression>
<nonterminal>foo</nonterminal>
</bgf:expression>
</bgf:production>
</bgf:grammar>
7 changes: 7 additions & 0 deletions topics/recovery/hunter/tests/1-comment.edd
@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<edd:config xmlns:edd="http://planet-sl.org/edd">
<defining-symbol>:</defining-symbol>
<terminator-symbol>\n</terminator-symbol>
<start-comment-symbol>/*</start-comment-symbol>
<end-comment-symbol>*/</end-comment-symbol>
</edd:config>
4 changes: 4 additions & 0 deletions topics/recovery/hunter/tests/1-comment.src
@@ -0,0 +1,4 @@
foo : /* this is foo */ bar
bar : wez
wez : foo /* this is not foo */

0 comments on commit 8944eaa

Please sign in to comment.