Skip to content

Commit

Permalink
update for Ralf's requests
Browse files Browse the repository at this point in the history
git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@371 ab42f6e0-554d-0410-b580-99e487e6eeb2
  • Loading branch information
grammarware committed Oct 26, 2008
1 parent 9658702 commit 68119b2
Show file tree
Hide file tree
Showing 18 changed files with 530 additions and 475 deletions.
4 changes: 2 additions & 2 deletions shared/tools/bgf2bnf
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ SLPS=${PWD}
cd ${LOCAL}

if [ $# -eq 1 ]; then
OUTPUT=`basename $1 .bgf`.bnf
OUTPUT=/dev/stdout
elif [ $# -ne 2 ]; then
echo "This tool transforms XML BNF-like Grammar Format documents to EBNF dialect used in JLS."
echo "Usage: bgf2bnf <input-bgf-document> [<output-text>]"
echo "When the output file is not specified, the same filename with .bnf extension is used."
echo "When output file is not specified, stdout is used."
exit 1
elif [ ! -r $1 ]; then
echo "Oops: $1 not found or not readable."
Expand Down
46 changes: 46 additions & 0 deletions shared/tools/tokenover
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/sh

# Get our hands on basedir
LOCAL1=${PWD}
cd `dirname $0`
cd ../..
SLPS=${PWD}
cd ${LOCAL1}

if [ $# -ne 1 ]; then
echo "This tool provides a token overview"
echo "Usage: tokenover <topic>"
exit 1
else
/bin/echo "\begin{tabular}{l|c|c|c|}"
/bin/echo "Token&\textit{italic}&\texttt{fixed}&default\\\\\hline"
/bin/echo -n "\\tokenAlNum&N ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "ITA ALNUM"
/bin/echo -n ") &T ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "FIX ALNUM"
/bin/echo -n ")&T? ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "DEF ALNUM"
/bin/echo ")\\\\"
/bin/echo -n "\\tokenBar&M ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "ITA BNBAR"
/bin/echo -n ") &T ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "FIX BNBAR"
/bin/echo -n ")&M? ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "DEF BNBAR"
/bin/echo ")\\\\"
/bin/echo -n "\\tokenMeta&M ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "ITA METAS"
/bin/echo -n ") &T ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "FIX METAS"
/bin/echo -n ")&T? ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "DEF METAS"
/bin/echo ")\\\\"
/bin/echo -n "\\tokenOther&T ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "ITA WEIRD"
/bin/echo -n ") &T ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "FIX WEIRD"
/bin/echo -n ")&T ("
cd ${SLPS}/topics/$1 ; make debug | grep -c "DEF WEIRD"
/bin/echo ")\\\\"
/bin/echo "\hline\end{tabular}"
fi
84 changes: 38 additions & 46 deletions topics/extraction/html2bgf/html2bgf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

pp_mode = MODE_DEFAULT
pp_outer = pp_mode
verbose = False
totalerrors = 0
# pp_mode == MODE_ITALIC
# pp_mode == MODE_FIXED

Expand Down Expand Up @@ -119,6 +121,7 @@ def traverse(c):
return line+'</choice></bgf:expression>'

def addProduction(name,choices,oneof):
global verbose
bs = []
if oneof:
# concatenate all choices
Expand All @@ -129,24 +132,23 @@ def addProduction(name,choices,oneof):
for s in range(0,len(choices)):
ss = []
for i in range(0,len(choices[s][0])):
"""
if choices[s][1][i] == MODE_DEFAULT:
print 'DEF',
elif choices[s][1][i] == MODE_ITALIC:
print 'ITA',
elif choices[s][1][i] == MODE_FIXED:
print 'FIX',
else:
print 'UNK',
if choices[s][0][i].isalnum():
print 'ALNUM'
elif choices[s][0][i] == '|':
print 'BNBAR'
elif choices[s][0][i] in ('[',']','{','}','(',')','?????'):
print 'METAS'
else:
print 'WEIRD'
"""
if verbose:
if choices[s][1][i] == MODE_DEFAULT:
print 'DEF',
elif choices[s][1][i] == MODE_ITALIC:
print 'ITA',
elif choices[s][1][i] == MODE_FIXED:
print 'FIX',
else:
print 'UNK',
if choices[s][0][i].isalnum():
print 'ALNUM'
elif choices[s][0][i] == '|':
print 'BNBAR'
elif choices[s][0][i] in ('[',']','{','}','(',')','?????'):
print 'METAS'
else:
print 'WEIRD'
if choices[s][1][i] == MODE_FIXED:
# terminal
ss.append('"'+choices[s][0][i]+'"')
Expand All @@ -173,7 +175,7 @@ def addProduction(name,choices,oneof):
bs.append(ss)
if name in prods.keys():
print 'Duplicate definition of',name,'found, will be merged.'
#pessimistic[2] += 1
pessimistic[2] += 1
for c in bs:
addifnew(c,name)
else:
Expand All @@ -200,15 +202,6 @@ def structuralEq(arr1,arr2):
return False
return True

def serialiseT(name,choices):
line=name+' is defined as:\n'
for b in choices:
line += ' '
for s in b:
line += s+' '
line += '\n'
return line

def addSpaces(line,symb):
return line.replace(symb,' '+symb+' ')

Expand All @@ -232,6 +225,7 @@ def mapHTMLtoTokenStream(line):
if line.find('</i>')==0:
if pp_mode != MODE_ITALIC:
print 'Style tag mismatch.'
pessimistic[1]+=1
pp_mode = MODE_DEFAULT
pp_outer = MODE_DEFAULT
line = line[4:]
Expand All @@ -240,12 +234,14 @@ def mapHTMLtoTokenStream(line):
#if pp_mode == MODE_ITALIC:
if pp_mode != MODE_DEFAULT:
print 'Style tag mismatch.'
pessimistic[1]+=1
pp_mode = MODE_ITALIC
line = line[3:]
continue
if line.find('</em>')==0:
if pp_mode != MODE_ITALIC:
print 'Style tag mismatch.'
pessimistic[1]+=1
pp_mode = MODE_DEFAULT
pp_outer = MODE_DEFAULT
line = line[5:]
Expand All @@ -254,6 +250,7 @@ def mapHTMLtoTokenStream(line):
#if pp_mode == MODE_ITALIC:
if pp_mode != MODE_DEFAULT:
print 'Style tag mismatch.'
pessimistic[1]+=1
if (pp_mode == MODE_ITALIC) and tokens and oldline.find(tokens[-1]+'<em>'+line[4:line.index('>')])>=0:
print 'Token-breaking <em> tag endangers',
line = tokens.pop()+line[4:]
Expand All @@ -266,13 +263,15 @@ def mapHTMLtoTokenStream(line):
if line.find('<code>')==0:
if pp_mode == MODE_FIXED:
print 'Style tag mismatch.'
pessimistic[1]+=1
pp_outer = pp_mode
pp_mode = MODE_FIXED
line = line[6:]
continue
if line.find('</code>')==0:
if pp_mode != MODE_FIXED:
print 'Style tag mismatch.'
pessimistic[1]+=1
pp_mode = pp_outer
line = line[7:]
continue
Expand Down Expand Up @@ -304,11 +303,11 @@ def mapHTMLtoTokenStream(line):
if line.find('<a')==0:
print 'Anchor found, skipping everything that is left of this snippet.'
pessimistic[0] = True
pessimistic[1] += 1
#pessimistic[1] += 1
continue
if line.find('<')==0:
print 'Style tag unknown: "'+line+'", skipping!'
pessimistic[2] += 1
pessimistic[1] += 1
line = line[line.index('>')+1:]
else:
if line.find('<')>0:
Expand Down Expand Up @@ -399,7 +398,7 @@ def preprocessConstruct(fn):
if (pp_mode != MODE_ITALIC) and line.find('</em>')<0 and line.find('</i>')<0 and line.find('<code>')<0:
pp_mode = MODE_ITALIC
print 'Style tag enforcing: virtual <em> when new definition of',name,'starts.'
pessimistic[2] += 1
pessimistic[1] += 1
elif len(a)==4 and a[0]==a[2] and a[1]=='$$$$$' and a[-1]=='$$$$$':
# new mingled definition
if choices:
Expand All @@ -409,7 +408,7 @@ def preprocessConstruct(fn):
name = a[0]
oneof = False
print name,'double-declared, fixed'
pessimistic[2] += 1
#pessimistic[2] += 1
elif len(a)==4 and a[1]=='$$$$$' and a[2]=='one' and a[3]=='of':
# new "one-of" definition
if choices:
Expand All @@ -421,7 +420,7 @@ def preprocessConstruct(fn):
# line continuation
if countspaces(oldline)>countspaces(line):
print 'Line continuation enforced while parsing',name,'- indentation went from',countspaces(oldline),'to 0'
pessimistic[2] += 1
pessimistic[1] += 1
for i in range(0,len(a)):
choices[-1][0].append(a[i])
choices[-1][1].append(b[i])
Expand All @@ -432,8 +431,8 @@ def preprocessConstruct(fn):
else:
oldline=line=''
src.close()
if pessimistic[1]:
print 'Skipped',pessimistic[1],'anchor-containing snippets'
#if pessimistic[1]:
# print 'Skipped',pessimistic[1],'anchor-containing snippets'

def countspaces(s):
olds = s
Expand All @@ -447,12 +446,6 @@ def countspaces(s):
s=s[1:]
return cx

def printGrammarText(fn):
ext = open(fn,'w')
for nt in prods.keys():
ext.write(serialiseT(nt,prods[nt]))
ext.close()

def printGrammar(fn):
ext = open(fn,'w')
ext.write('<bgf:grammar xmlns:bgf="http://planet-sl.org/bgf">')
Expand Down Expand Up @@ -792,23 +785,22 @@ def fixBracketPair(nt,arr,left,right):
print 'HTML to Grammar automated extractor'
if len(sys.argv)==3 or len(sys.argv)==4:
print 'Reading the HTML document...'
if sys.argv[-1]=='-v':
verbose = True
preprocessConstruct(sys.argv[1])
print 'Massaging the grammar...'
glueSymbols()
preprocessCorrect()
killDuplicates()
print 'Writing the extracted grammar...'
if sys.argv[-1]=='-bnf':
printGrammarText(sys.argv[2])
else:
printGrammar(sys.argv[2])
printGrammar(sys.argv[2])
if pessimistic[2]:
print 'Total of',pessimistic[2]+pessimistic[1],'problems encountered and coped with.'
else:
print 'Usage:'
print ' ',sys.argv[0],'''<input> <output> [<options>]
Possible options:
-bnf Outputs in EBNF rather then in BGF'''
-v verbose mode (report the code of each token)'''
sys.exit(1)

6 changes: 6 additions & 0 deletions topics/java/Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
all:
@echo "Run 'make rebuild' if you want to renew the sources from their URLs."
@echo NOT recommended UNLESS you really know what you are doing!
@echo "Run 'make debug' to execute all extractors in debug mode"
@echo "Run 'make test' to execute all extractors and diff the results with LCI snapshot"

debug:
cd jls1 ; make debug
cd jls2 ; make debug
cd jls3 ; make debug

test:
cd jls1 ; make all
cd jls2 ; make all
Expand Down
6 changes: 6 additions & 0 deletions topics/java/jls1/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ all:
make doc
make test

debug:
python ../../extraction/html2bgf/getpre.py syntax.kw app.html parse.html
python ../../extraction/html2bgf/html2bgf.py parse.html app1.bgf -v
python ../../extraction/html2bgf/getpre.py collect.kw doc.html parse.html
python ../../extraction/html2bgf/html2bgf.py parse.html doc1.bgf -v

app:
python ../../extraction/html2bgf/getpre.py syntax.kw app.html parse.html
python ../../extraction/html2bgf/html2bgf.py parse.html app1.bgf
Expand Down
5 changes: 5 additions & 0 deletions topics/java/jls2/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ all:
make doc
make test

debug:
python ../../extraction/html2bgf/html2bgf.py app.html app2.bgf -v
python ../../extraction/html2bgf/getpre.py collect.kw doc.html parse.html
python ../../extraction/html2bgf/html2bgf.py parse.html doc2.bgf -v

app:
python ../../extraction/html2bgf/html2bgf.py app.html app2.bgf

Expand Down
5 changes: 5 additions & 0 deletions topics/java/jls3/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ all:
make doc
make test

debug:
python ../../extraction/html2bgf/html2bgf.py app.html app3.bgf -v
python ../../extraction/html2bgf/getpre.py collect.kw doc.html parse.html
python ../../extraction/html2bgf/html2bgf.py parse.html doc3.bgf -v

app:
python ../../extraction/html2bgf/html2bgf.py app.html app3.bgf

Expand Down
13 changes: 7 additions & 6 deletions topics/java/lci/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ check:
ls -1 bgf/*.bgf | xargs -n1 ${validator} bgf

debug:
../../../shared/tools/html2bgf ../jls1/syntax.kw ../jls1/app.html 1.bnf -bnf >/dev/null
../../../shared/tools/html2bgf ../jls1/collect.kw ../jls1/doc.html q.bnf -bnf >/dev/null
../../../shared/tools/html2bgf ../jls2/app.html 2.bnf -bnf >/dev/null
../../../shared/tools/html2bgf ../jls2/collect.kw ../jls2/doc.html w.bnf -bnf >/dev/null
../../../shared/tools/html2bgf ../jls3/app.html 3.bnf -bnf >/dev/null
../../../shared/tools/html2bgf ../jls3/collect.kw ../jls3/doc.html e.bnf -bnf >/dev/null
../../../shared/tools/bgf2bnf snapshot/app1.bgf 1.bnf
../../../shared/tools/bgf2bnf snapshot/app2.bgf 2.bnf
../../../shared/tools/bgf2bnf snapshot/app3.bgf 3.bnf
../../../shared/tools/bgf2bnf snapshot/doc1.bgf q.bnf
../../../shared/tools/bgf2bnf snapshot/doc2.bgf w.bnf
../../../shared/tools/bgf2bnf snapshot/doc3.bgf e.bnf

clean:
rm -f *~
rm -f bgf/*
Expand Down
Loading

0 comments on commit 68119b2

Please sign in to comment.