Skip to content

Commit

Permalink
fixed a bug in HTML->BGF extractor that failed to correctly split ter…
Browse files Browse the repository at this point in the history
…minals like "Name..." (to "Name" and "...")

git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@322 ab42f6e0-554d-0410-b580-99e487e6eeb2
  • Loading branch information
grammarware committed Oct 6, 2008
1 parent 55ba5c9 commit 9c4fd42
Showing 1 changed file with 23 additions and 14 deletions.
37 changes: 23 additions & 14 deletions topics/extraction/html2bgf/html2bgf.py
Expand Up @@ -124,7 +124,7 @@ def addProduction(name,choices,oneof):
bs.append(ss)
if name in prods.keys():
print 'Duplicate definition of',name,'found, merged.'
pessimistic[2] += 1
#pessimistic[2] += 1
for c in bs:
addifnew(c,name)
else:
Expand Down Expand Up @@ -433,27 +433,36 @@ def automatedImprove():
else:
quote = False
word = bs[i]
print 'Multiple terminals heuristic fix:',bs[i],'in',nt,
if word[0]=='.' or word[-1]=='.':
print 'Multiple terminals heuristic fix:',bs[i],'in',nt,'(1 to 2)'
print '(1 to 2)'
else:
print 'Multiple terminals heuristic fix:',bs[i],'in',nt,'(1 to 3)'
print '(1 to 3)'
pessimistic[2] += 1
if i>0:
newbs = bs[:i-1]
newbs = bs[:i]
else:
newbs = []
if quote:
if word[0]!='.':
if word.find('...')<0:
if word[0]!='.':
newbs.append('"'+word[:word.index('.')]+'"')
newbs.append('"."')
if word[-1]!='.':
newbs.append('"'+word[word.index('.')+1:]+'"')
else:
newbs.append('"'+word[:word.index('.')]+'"')
newbs.append('"."')
if word[-1]!='.':
newbs.append('"'+word[word.index('.')+1:]+'"')
newbs.append('"..."')
else:
if word[0]!='.':
if word.find('...')<0:
if word[0]!='.':
newbs.append(word[:word.index('.')])
newbs.append('"."')
if word[-1]!='.':
newbs.append(word[word.index('.')+1:])
else:
newbs.append(word[:word.index('.')])
newbs.append('"."')
if word[-1]!='.':
newbs.append(word[word.index('.')+1:])
newbs.append('"..."')
if i+1<len(bs):
newbs.extend(bs[i+1:])
bs = newbs
Expand Down Expand Up @@ -638,7 +647,7 @@ def fixBracketPair(nt,arr,left,right):
while(cx>0):
if '"'+right+'"' in arr:
arr[arr.index('"'+right+'"')]=right
print '(transformed terminal)'
print '(transformed terminal bracket)'
elif left in arr:
arr.remove(left)
print '(removed left bracket)'
Expand All @@ -650,7 +659,7 @@ def fixBracketPair(nt,arr,left,right):
while(cx<0):
if '"'+left+'"' in arr:
arr[arr.index('"'+left+'"')]=left
print '(transformed terminal)'
print '(transformed terminal bracket)'
elif right in arr:
arr.remove(right)
print '(removed right bracket)'
Expand Down

0 comments on commit 9c4fd42

Please sign in to comment.