Skip to content

Commit

Permalink
Hunter 3.0: (almost) no corrective heuristics, but support for labels…
Browse files Browse the repository at this point in the history
…: enough to extract the third Ada grammar

git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@1038 ab42f6e0-554d-0410-b580-99e487e6eeb2
  • Loading branch information
grammarware committed May 20, 2011
1 parent 1837e61 commit 17176c2
Show file tree
Hide file tree
Showing 11 changed files with 811 additions and 438 deletions.
4 changes: 4 additions & 0 deletions topics/grammars/ada/Makefile
@@ -1,11 +1,15 @@
build:
cd kempe && make build
cd laemmel-verhoef && make build
cd lncs-4348 && make build

clean:
cd kempe && make clean
cd laemmel-verhoef && make clean
cd lncs-4348 && make clean

test:
cd kempe && make test
cd laemmel-verhoef && make test
cd lncs-4348 && make test

63 changes: 55 additions & 8 deletions topics/grammars/ada/hunter.py
Expand Up @@ -12,6 +12,12 @@
def isAlpha(x):
return reduce(lambda a,b:a and b=='_' or b.isalnum(),x,True)

def isQNumber(x):
if x =='.':
return False
else:
return reduce(lambda a,b:a and b=='.' or b.isdigit(),x,True)

def splitTokenStream(s):
ts = [s[0]]
i = 1
Expand Down Expand Up @@ -50,7 +56,32 @@ def useDefiningSymbol(ts,d):
poss.append(i)
poss.append(len(ts)+1)
for i in range(0,len(poss)-1):
prods.append(ts[poss[i]-1:poss[i+1]-1])
if 'end-label-symbol' in config.keys():
if ts[poss[i]-2] == config['end-label-symbol']:
if 'start-label-symbol' in config.keys():
# todo: now works only with one-token labels!
if ts[poss[i]-4] == config['start-label-symbol']:
# everything is fine
p = [ts[poss[i]-3],ts[poss[i]-1]]
else:
print 'STEP 4 problem: start-label-symbol mismatch!'
# todo: recover
else:
# no starting symbol for the label
p = [ts[poss[i]-3],ts[poss[i]-1]]
else:
# no label this time
p = ['',ts[poss[i]-1]]
else:
# no labels at all
p = ['',ts[poss[i]-1]]
end = poss[i+1]-1
if 'end-label-symbol' in config.keys() and ts[end-1] == config['end-label-symbol']:
end -= 2
if 'start-label-symbol' in config.keys() and ts[end-1] == config['start-label-symbol']:
end -= 1
p.extend(ts[poss[i]+1:end])
prods.append(p)
return prods

def useDefinitionSeparatorSymbol(ts,d):
Expand Down Expand Up @@ -433,7 +464,7 @@ def filterNewlines(s):

def glueTerminals(p):
q = []
for y in p:
for y in p[2:]:
if y[0] != config['start-terminal-symbol'] or len(q) == 0 or q[-1][0] != config['start-terminal-symbol']:
q.append(y)
continue
Expand All @@ -443,10 +474,21 @@ def glueTerminals(p):
q[-1] = q[-1][:-1] + y[1:]
else:
q.append(y)
if debug and p != q:
r = p[:2]
r.extend(q)
if debug and p != r:
print '>>>in>>>>',p
print '>>>out>>>',q
return q
print '>>>out>>>',r
return r

def assembleQualifiedNumbers(ts):
ds = []
for x in ts:
if len(ds)>0 and (isQNumber(x) or x=='.') and isQNumber(ds[-1]):
ds[-1] += x
else:
ds.append(x)
return ds

if __name__ == "__main__":
if len(sys.argv) != 4:
Expand Down Expand Up @@ -475,6 +517,7 @@ def glueTerminals(p):
print 'STEP 2 skipped, sorry: start-nonterminal-symbol and end-nonterminal-symbol are not both specified.'
# STEP 3: assembling composite metasymbols together
print 'STEP 3: assembling metasymbols according to their possible values.'
tokens = assembleQualifiedNumbers(tokens)
for k in config.keys():
if len(config[k])>1:
print 'STEP 3: going to glue tokens that resemble', config[k]
Expand All @@ -497,6 +540,8 @@ def glueTerminals(p):
config['terminator-symbol'] = ts
else:
print 'STEP 4 unsuccessful, sorry.'
for p in prods:
print '%40s'%p[1],'>>>>>>',p[-2:]
# ORLY?
# STEP 4a.2: adjusting the terminator-symbol on the last production
for i in range(0,len(config['terminator-symbol'])):
Expand Down Expand Up @@ -528,9 +573,9 @@ def glueTerminals(p):
step6 = True
if 'start-terminal-symbol' not in config.keys() and 'end-terminal-symbol' not in config.keys():
config['start-terminal-symbol'] = config['end-terminal-symbol'] = '"'
defined = map(lambda x:x[0],prods)
defined = map(lambda x:x[1],prods)
defined.append(config['defining-symbol'])
prods = map(lambda p:map(lambda x:x if x in defined or x.find(config['undefined-nonterminals-are-terminals'])>-1 or (x.isupper() and len(x)>1) else config['start-terminal-symbol']+x+config['end-terminal-symbol'],p),prods)
prods = map(lambda p:map(lambda x:x if x in defined or x.find(config['undefined-nonterminals-are-terminals'])>-1 or (x.isupper() and len(x)>1) or x=='' else config['start-terminal-symbol']+x+config['end-terminal-symbol'],p),prods)
if 'glue-nonalphanumeric-terminals' in config.keys():
print 'STEP 6: glueing non-alphanumeric terminal symbols together.'
step6 = True
Expand All @@ -549,7 +594,9 @@ def glueTerminals(p):
bgf = BGF.Grammar()
for q in prods:
p = BGF.Production()
p.setNT(q[0])
if 'disregard-labels' not in config.keys() and q[0]:
p.setLabel(q[0])
p.setNT(q[1])
p.setExpr(map2expr(q[2:]))
bgf.addProd(p)
ET.ElementTree(bgf.getXml()).write(sys.argv[3])
Expand Down
3 changes: 2 additions & 1 deletion topics/grammars/ada/kempe/Makefile
Expand Up @@ -2,8 +2,9 @@ build:
make ada.bgf

ada.bgf: src.syntax.summary.txt
cat src.syntax.summary.txt | grep -v 1. | grep -v 2. | grep -v 3. | grep -v 4. | grep -v 5. | grep -v 6. | grep -v 7. | grep -v 8. | grep -v 9. | grep -v 0. | grep -v 'quotation mark' > src.prepared.txt
cat src.syntax.summary.txt | grep -v 'quotation mark' > src.prepared.txt
perl -pi -w -e 's/{\|/{TERMINALBAR/g;' src.prepared.txt
perl -pi -w -e 's/J\./0\./g;' src.prepared.txt
../hunter.py src.prepared.txt config.edd ada-1.bgf
../../../../shared/tools/xbgf post-extract.xbgf ada-1.bgf ada-2.bgf
../../../../shared/tools/xbgf correct.xbgf ada-2.bgf ada.bgf
Expand Down
2 changes: 2 additions & 0 deletions topics/grammars/ada/kempe/config.edd
@@ -1,5 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<edd:config xmlns:edd="http://planet-sl.org/edd">
<end-label-symbol>:\n</end-label-symbol>
<defining-symbol>::=</defining-symbol>
<definition-separator-symbol>|</definition-separator-symbol>
<start-option-symbol>[</start-option-symbol>
Expand All @@ -9,4 +10,5 @@
<undefined-nonterminals-are-terminals>_</undefined-nonterminals-are-terminals>
<ignore-extra-newlines/>
<glue-nonalphanumeric-terminals/>
<disregard-labels/>
</edd:config>
7 changes: 4 additions & 3 deletions topics/grammars/ada/laemmel-verhoef/Makefile
@@ -1,9 +1,10 @@
build:
../hunter.py src.context.free.syntax.txt config.edd ada.bgf
../hunter.py src.context.free.syntax.txt config.edd ada.raw.bgf
../../../../shared/tools/xbgf correct.xbgf ada.raw.bgf ada.bgf

test:
validate bgf ada.bgf
checkbgf ada.bgf
ls -1 *.bgf | xargs -n1 ../../../../shared/tools/validate bgf
../../../../shared/tools/checkbgf ada.bgf

clean:
rm -rf *.bgf *.bnf
15 changes: 15 additions & 0 deletions topics/grammars/ada/laemmel-verhoef/correct.xbgf
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<xbgf:sequence xmlns:xbgf="http://planet-sl.org/xbgf" xmlns:bgf="http://planet-sl.org/bgf">
<!-- Thanks Othon Batista, othonb@yahoo.com!-->
<xbgf:replace>
<bgf:expression>
<terminal>--</terminal>
</bgf:expression>
<bgf:expression>
<terminal>-</terminal>
</bgf:expression>
<in>
<nonterminal>unary_adding_operator</nonterminal>
</in>
</xbgf:replace>
</xbgf:sequence>
20 changes: 20 additions & 0 deletions topics/grammars/ada/lncs-4348/Makefile
@@ -0,0 +1,20 @@
build:
make ada.bgf

ada.bgf: src.syntax.summary.txt
cp src.syntax.summary.txt src.prepared.txt
#cat src.syntax.summary.txt | grep -v 1. | grep -v 2. | grep -v 3. | grep -v 4. | grep -v 5. | grep -v 6. | grep -v 7. | grep -v 8. | grep -v 9. | grep -v 0. | grep -v 'quotation mark' > src.prepared.txt
perl -pi -w -e 's/{\|/{TERMINALBAR/g;' src.prepared.txt
perl -pi -w -e 's/–/-/g;' src.prepared.txt
perl -pi -w -e 's/J\./0\./g;' src.prepared.txt
../hunter.py src.prepared.txt config.edd ada-1.bgf
../../../../shared/tools/xbgf post-extract.xbgf ada-1.bgf ada-2.bgf
../../../../shared/tools/xbgf correct.xbgf ada-2.bgf ada.bgf
../../../../shared/tools/bgf2bnf ada.bgf ada.bnf

test:
ls -1 *.bgf | xargs -n1 ../../../../shared/tools/validate bgf
../../../../shared/tools/checkbgf ada.bgf

clean:
rm -f *.bgf *.bnf src.prepared.txt
14 changes: 14 additions & 0 deletions topics/grammars/ada/lncs-4348/config.edd
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<edd:config xmlns:edd="http://planet-sl.org/edd">
<end-label-symbol>:</end-label-symbol>
<defining-symbol>::=</defining-symbol>
<definition-separator-symbol>|</definition-separator-symbol>
<start-option-symbol>[</start-option-symbol>
<end-option-symbol>]</end-option-symbol>
<start-star-symbol>{</start-star-symbol>
<end-star-symbol>}</end-star-symbol>
<undefined-nonterminals-are-terminals>_</undefined-nonterminals-are-terminals>
<ignore-extra-newlines/>
<glue-nonalphanumeric-terminals/>
<disregard-labels/>
</edd:config>
26 changes: 26 additions & 0 deletions topics/grammars/ada/lncs-4348/correct.xbgf
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>
<xbgf:sequence xmlns:xbgf="http://planet-sl.org/xbgf" xmlns:bgf="http://planet-sl.org/bgf">
<xbgf:massage>
<bgf:expression>
<sequence>
<bgf:expression>
<nonterminal>variant</nonterminal>
</bgf:expression>
<bgf:expression>
<star>
<bgf:expression>
<nonterminal>variant</nonterminal>
</bgf:expression>
</star>
</bgf:expression>
</sequence>
</bgf:expression>
<bgf:expression>
<plus>
<bgf:expression>
<nonterminal>variant</nonterminal>
</bgf:expression>
</plus>
</bgf:expression>
</xbgf:massage>
</xbgf:sequence>
120 changes: 120 additions & 0 deletions topics/grammars/ada/lncs-4348/post-extract.xbgf
@@ -0,0 +1,120 @@
<?xml version="1.0" encoding="UTF-8"?>
<xbgf:sequence xmlns:xbgf="http://planet-sl.org/xbgf" xmlns:bgf="http://planet-sl.org/bgf">
<!-- Interpreting the notation rule that | is a BNF bar unless encountered as {| -->
<xbgf:replace>
<bgf:expression>
<nonterminal>TERMINALBAR</nonterminal>
</bgf:expression>
<bgf:expression>
<terminal>|</terminal>
</bgf:expression>
</xbgf:replace>
<!-- should be defined elsewhere-->
<xbgf:replace>
<bgf:expression>
<terminal>underline</terminal>
</bgf:expression>
<bgf:expression>
<nonterminal>underline</nonterminal>
</bgf:expression>
</xbgf:replace>
<!-- Resolving the confusion of "pragma" and the pragma nonterminal -->
<xbgf:replace>
<bgf:expression>
<nonterminal>pragma</nonterminal>
</bgf:expression>
<bgf:expression>
<terminal>pragma</terminal>
</bgf:expression>
<in>
<nonterminal>pragma</nonterminal>
</in>
</xbgf:replace>
<!-- Disambiguating "range" and the range nonterminal -->
<xbgf:replace>
<bgf:expression>
<nonterminal>range</nonterminal>
</bgf:expression>
<bgf:expression>
<terminal>range</terminal>
</bgf:expression>
</xbgf:replace>
<xbgf:replace>
<bgf:expression>
<sequence>
<bgf:expression>
<terminal>range</terminal>
</bgf:expression>
<bgf:expression>
<terminal>range</terminal>
</bgf:expression>
</sequence>
</bgf:expression>
<bgf:expression>
<sequence>
<bgf:expression>
<terminal>range</terminal>
</bgf:expression>
<bgf:expression>
<nonterminal>range</nonterminal>
</bgf:expression>
</sequence>
</bgf:expression>
<in>
<nonterminal>range_constraint</nonterminal>
</in>
</xbgf:replace>
<xbgf:replace>
<bgf:expression>
<terminal>range</terminal>
</bgf:expression>
<bgf:expression>
<nonterminal>range</nonterminal>
</bgf:expression>
<in>
<nonterminal>discrete_range</nonterminal>
</in>
</xbgf:replace>
<xbgf:replace>
<bgf:expression>
<terminal>range</terminal>
</bgf:expression>
<bgf:expression>
<nonterminal>range</nonterminal>
</bgf:expression>
<in>
<nonterminal>relation</nonterminal>
</in>
</xbgf:replace>
<xbgf:replace>
<bgf:expression>
<terminal>range</terminal>
</bgf:expression>
<bgf:expression>
<nonterminal>range</nonterminal>
</bgf:expression>
<in>
<nonterminal>discrete_subtype_definition</nonterminal>
</in>
</xbgf:replace>
<!-- Disambiguating "body" and the body nonterminal -->
<xbgf:replace>
<bgf:expression>
<nonterminal>body</nonterminal>
</bgf:expression>
<bgf:expression>
<terminal>body</terminal>
</bgf:expression>
</xbgf:replace>
<xbgf:replace>
<bgf:expression>
<terminal>body</terminal>
</bgf:expression>
<bgf:expression>
<nonterminal>body</nonterminal>
</bgf:expression>
<in>
<nonterminal>declarative_item</nonterminal>
</in>
</xbgf:replace>
</xbgf:sequence>

0 comments on commit 17176c2

Please sign in to comment.