Skip to content

Commit

Permalink
streamlines HTML->BGF; connecting second JLS1 grammar to LCI
Browse files Browse the repository at this point in the history
git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@259 ab42f6e0-554d-0410-b580-99e487e6eeb2
  • Loading branch information
grammarware committed Sep 11, 2008
1 parent b4f52b9 commit 72f95d1
Show file tree
Hide file tree
Showing 12 changed files with 9,389 additions and 50 deletions.
15 changes: 12 additions & 3 deletions shared/tools/html2bgf
Expand Up @@ -8,12 +8,21 @@ SLPS=${PWD}
cd ${LOCAL1}

if [ $# -lt 2 ]; then
echo "This tool extracts a BGF from Java Language Standard"
echo "Usage: $0 <input-hypertext-document> <output-bgf> [options]"
echo "This tool extracts a BGF from Java Language Specification or a similarly typeset HTML"
echo "Usage:"
echo " html2bgf [<keywords-list>] <input-hypertext-document> <output-bgf>"
echo " or: html2bgf [<keywords-list>] <input-hypertext-document> <output-bnf> -bnf"
exit 1
elif [ ! -r $1 ]; then
echo "Oops: $1 not found or not readable."
exit 1
elif [ $# -eq 2 ]; then
python ${SLPS}/topics/extraction/html2bgf/html2bgf.py $1 $2
elif [ "$3" == "-bnf" ]; then
python ${SLPS}/topics/extraction/html2bgf/html2bgf.py $1 $2 $3
else
python ${SLPS}/topics/extraction/html2bgf/html2bgf.py $@
python ${SLPS}/topics/extraction/html2bgf/xpathpre.py $1 $2 $2.fixed
python ${SLPS}/topics/extraction/html2bgf/html2bgf.py $2.fixed $3 $4
rm -f $2.fixed
fi

21 changes: 0 additions & 21 deletions shared/tools/jls2bgf

This file was deleted.

35 changes: 20 additions & 15 deletions topics/extraction/html2bgf/xpathpre.py
Expand Up @@ -4,7 +4,7 @@
yes = []
no = []

def checkSection(text,tagN,includeFlag):
def checkSection(text,tagN,includeFlag,p):
for chapter in text.split('<h'+`tagN`+'>')[1:]:
grammar = includeFlag
content = chapter.split('</h'+`tagN`+'>')
Expand All @@ -16,34 +16,39 @@ def checkSection(text,tagN,includeFlag):
grammar = False
if grammar and content[1].find('<h')==-1:
for chunk in content[1].split('<pre>')[1:]:
print chunk.split('</pre>')[0].replace('<br>','').replace('&#32;',' ')
print '<hr>'
p.write(chunk.split('</pre>')[0].replace('<br>','').replace('&#32;',' '))
p.write('<hr>')
else:
#print 'Going deeper than',content[0].split()[0]
if grammar:
for chunk in content[1].split('<h'+`tagN+1`+'>')[0].split('<pre>')[1:]:
print chunk.split('</pre>')[0].replace('<br>','').replace('&#32;',' ')
print '<hr>'
checkSection(content[1],tagN+1,grammar)
p.write(chunk.split('</pre>')[0].replace('<br>','').replace('&#32;',' '))
p.write('<hr>')
checkSection(content[1],tagN+1,grammar,p)

if len(sys.argv)<2:
if len(sys.argv)!=4:
print '''This tool simulates a particular XPath query that it can execute upon a badly composed HTML.
Usage:
python xpathpre.py keyword [keyword ...] <input >output
python xpathpre.py <keywords-list> <input-document> <output-bgf>
It will read the input, looking for sections (<h?>) that contain keywords in the title.
Once found, it will output the content of all <pre> tags from such sections.
Keywords can be positive or negative, with positive being default.'''
Keywords can be positive or negative, with positive being default.
<pre> inside <blockquote> is not used.'''
else:
for kw in sys.argv[1:]:
if kw[0]=='-':
for kw in open(sys.argv[1],'r').readlines():
kw = kw.strip()
if not kw:
continue
elif kw[0]=='-':
no.append(kw[1:])
elif kw[0]=='+':
yes.append(kw[1:])
else:
yes.append(kw)
print '<pre>'
checkSection(''.join(sys.stdin.readlines()),1,False)
print '</pre>'

out = open(sys.argv[3],'w')
out.write('<pre>')
checkSection(''.join(open(sys.argv[2],'r').readlines()),1,False,out)
out.write('</pre>')
out.close()
9 changes: 9 additions & 0 deletions topics/java/Makefile
Expand Up @@ -23,3 +23,12 @@ rebuild:
curl -k http://java.sun.com/docs/books/jls/first_edition/html/10.doc.html >>jls1/collected.html
curl -k http://java.sun.com/docs/books/jls/first_edition/html/14.doc.html >>jls1/collected.html
curl -k http://java.sun.com/docs/books/jls/first_edition/html/15.doc.html >>jls1/collected.html
curl -k http://java.sun.com/docs/books/jls/second_edition/html/typesValues.doc.html >jls2/collected.html
curl -k http://java.sun.com/docs/books/jls/second_edition/html/names.doc.html >>jls2/collected.html
curl -k http://java.sun.com/docs/books/jls/second_edition/html/packages.doc.html >>jls2/collected.html
curl -k http://java.sun.com/docs/books/jls/second_edition/html/classes.doc.html >>jls2/collected.html
curl -k http://java.sun.com/docs/books/jls/second_edition/html/interfaces.doc.html >>jls2/collected.html
curl -k http://java.sun.com/docs/books/jls/second_edition/html/arrays.doc.html >>jls2/collected.html
curl -k http://java.sun.com/docs/books/jls/second_edition/html/statements.doc.html >>jls2/collected.html
curl -k http://java.sun.com/docs/books/jls/second_edition/html/expressions.doc.html >>jls2/collected.html

4 changes: 2 additions & 2 deletions topics/java/jls1/Makefile
@@ -1,7 +1,7 @@
all:
python ../../extraction/html2bgf/xpathpre.py LALR -Difficulties <syntax.html >parse.html
python ../../extraction/html2bgf/xpathpre.py syntax.kw syntax.html parse.html
python ../../extraction/html2bgf/html2bgf.py parse.html jls1.bgf
python ../../extraction/html2bgf/xpathpre.py 4 6 7 8 9 10 14 15 <collected.html >parse.html
python ../../extraction/html2bgf/xpathpre.py collect.kw collected.html parse.html
python ../../extraction/html2bgf/html2bgf.py parse.html jls1c.bgf
../../../shared/tools/checkxml bgf jls1.bgf
../../../shared/tools/checkxml bgf jls1c.bgf
Expand Down
8 changes: 8 additions & 0 deletions topics/java/jls1/collect.kw
@@ -0,0 +1,8 @@
4
6
7
8
9
10
14
15
2 changes: 2 additions & 0 deletions topics/java/jls1/syntax.kw
@@ -0,0 +1,2 @@
LALR
-Difficulties
3 changes: 3 additions & 0 deletions topics/java/jls2/Makefile
@@ -1,6 +1,9 @@
all:
python ../../extraction/html2bgf/html2bgf.py syntax.html jls2.bgf
python ../../extraction/html2bgf/xpathpre.py collect.kw collected.html parse.html
python ../../extraction/html2bgf/html2bgf.py parse.html jls2c.bgf
../../../shared/tools/checkxml bgf jls2.bgf
../../../shared/tools/checkxml bgf jls2c.bgf

clean:
rm -f *.bgf
8 changes: 8 additions & 0 deletions topics/java/jls2/collect.kw
@@ -0,0 +1,8 @@
4
6.5
7
8
9
10.6
14
15

0 comments on commit 72f95d1

Please sign in to comment.