update for Ralf's requests

git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@371 ab42f6e0-554d-0410-b580-99e487e6eeb2
grammarware · Oct 26, 2008 · 68119b2 · 68119b2
1 parent 9658702
commit 68119b2
Show file tree

Hide file tree

Showing 18 changed files with 530 additions and 475 deletions.
diff --git a/shared/tools/bgf2bnf b/shared/tools/bgf2bnf
@@ -7,11 +7,11 @@ SLPS=${PWD}
 cd ${LOCAL}
 
 if [ $# -eq 1 ]; then
-    OUTPUT=`basename $1 .bgf`.bnf
+    OUTPUT=/dev/stdout
 elif [ $# -ne 2 ]; then
     echo "This tool transforms XML BNF-like Grammar Format documents to EBNF dialect used in JLS."
     echo "Usage: bgf2bnf <input-bgf-document> [<output-text>]"
-    echo "When the output file is not specified, the same filename with .bnf extension is used."
+    echo "When output file is not specified, stdout is used."
     exit 1
 elif [ ! -r $1 ]; then
     echo "Oops: $1 not found or not readable."

diff --git a/shared/tools/tokenover b/shared/tools/tokenover
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+# Get our hands on basedir
+LOCAL1=${PWD}
+cd `dirname $0`
+cd ../..
+SLPS=${PWD}
+cd ${LOCAL1}
+
+if [ $# -ne 1 ]; then
+    echo "This tool provides a token overview"
+    echo "Usage: tokenover <topic>"
+    exit 1
+else
+    /bin/echo "\begin{tabular}{l|c|c|c|}"
+    /bin/echo "Token&\textit{italic}&\texttt{fixed}&default\\\\\hline"
+    /bin/echo -n "\\tokenAlNum&N ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "ITA ALNUM"
+    /bin/echo -n ") &T ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "FIX ALNUM"
+    /bin/echo -n ")&T? ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "DEF ALNUM"
+    /bin/echo ")\\\\"
+    /bin/echo -n "\\tokenBar&M ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "ITA BNBAR"
+    /bin/echo -n ") &T ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "FIX BNBAR"
+    /bin/echo -n ")&M? ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "DEF BNBAR"
+    /bin/echo ")\\\\"
+    /bin/echo -n "\\tokenMeta&M ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "ITA METAS"
+    /bin/echo -n ") &T ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "FIX METAS"
+    /bin/echo -n ")&T? ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "DEF METAS"
+    /bin/echo ")\\\\"
+    /bin/echo -n "\\tokenOther&T ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "ITA WEIRD"
+    /bin/echo -n ") &T ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "FIX WEIRD"
+    /bin/echo -n ")&T ("
+    cd ${SLPS}/topics/$1 ; make debug | grep -c "DEF WEIRD"
+    /bin/echo ")\\\\"
+    /bin/echo "\hline\end{tabular}"
+fi
diff --git a/topics/extraction/html2bgf/html2bgf.py b/topics/extraction/html2bgf/html2bgf.py
@@ -7,6 +7,8 @@
 
 pp_mode = MODE_DEFAULT
 pp_outer = pp_mode
+verbose = False
+totalerrors = 0
 # pp_mode == MODE_ITALIC
 # pp_mode == MODE_FIXED
 
@@ -119,6 +121,7 @@ def traverse(c):
   return line+'</choice></bgf:expression>'
 
 def addProduction(name,choices,oneof):
+ global verbose
  bs = []
  if oneof:
   # concatenate all choices
@@ -129,24 +132,23 @@ def addProduction(name,choices,oneof):
   for s in range(0,len(choices)):
    ss = []
    for i in range(0,len(choices[s][0])):
-    """
-    if choices[s][1][i] == MODE_DEFAULT:
-     print 'DEF',
-    elif choices[s][1][i] == MODE_ITALIC:
-     print 'ITA',
-    elif choices[s][1][i] == MODE_FIXED:
-     print 'FIX',
-    else:
-     print 'UNK',
-    if choices[s][0][i].isalnum():
-     print 'ALNUM'
-    elif choices[s][0][i] == '|':
-     print 'BNBAR'
-    elif choices[s][0][i] in ('[',']','{','}','(',')','?????'):
-     print 'METAS'
-    else:
-     print 'WEIRD'
-    """
+    if verbose:
+     if choices[s][1][i] == MODE_DEFAULT:
+      print 'DEF',
+     elif choices[s][1][i] == MODE_ITALIC:
+      print 'ITA',
+     elif choices[s][1][i] == MODE_FIXED:
+      print 'FIX',
+     else:
+      print 'UNK',
+     if choices[s][0][i].isalnum():
+      print 'ALNUM'
+     elif choices[s][0][i] == '|':
+      print 'BNBAR'
+     elif choices[s][0][i] in ('[',']','{','}','(',')','?????'):
+      print 'METAS'
+     else:
+      print 'WEIRD'
     if choices[s][1][i] == MODE_FIXED:
      # terminal
      ss.append('"'+choices[s][0][i]+'"')
@@ -173,7 +175,7 @@ def addProduction(name,choices,oneof):
    bs.append(ss)
  if name in prods.keys():
   print 'Duplicate definition of',name,'found, will be merged.'
-  #pessimistic[2] += 1
+  pessimistic[2] += 1
   for c in bs:
    addifnew(c,name)
  else:
@@ -200,15 +202,6 @@ def structuralEq(arr1,arr2):
    return False
  return True
 
-def serialiseT(name,choices):
- line=name+' is defined as:\n'
- for b in choices:
-  line += '     '
-  for s in b:
-   line += s+' '
-  line += '\n'
- return line
-
 def addSpaces(line,symb):
  return line.replace(symb,' '+symb+' ')
 
@@ -232,6 +225,7 @@ def mapHTMLtoTokenStream(line):
   if line.find('</i>')==0:
    if pp_mode != MODE_ITALIC:
     print 'Style tag mismatch.'
+    pessimistic[1]+=1
    pp_mode = MODE_DEFAULT
    pp_outer = MODE_DEFAULT
    line = line[4:]
@@ -240,12 +234,14 @@ def mapHTMLtoTokenStream(line):
    #if pp_mode == MODE_ITALIC:
    if pp_mode != MODE_DEFAULT:
     print 'Style tag mismatch.'
+    pessimistic[1]+=1
    pp_mode = MODE_ITALIC
    line = line[3:]
    continue
   if line.find('</em>')==0:
    if pp_mode != MODE_ITALIC:
     print 'Style tag mismatch.'
+    pessimistic[1]+=1
    pp_mode = MODE_DEFAULT
    pp_outer = MODE_DEFAULT
    line = line[5:]
@@ -254,6 +250,7 @@ def mapHTMLtoTokenStream(line):
    #if pp_mode == MODE_ITALIC:
    if pp_mode != MODE_DEFAULT:
     print 'Style tag mismatch.'
+    pessimistic[1]+=1
    if (pp_mode == MODE_ITALIC) and tokens and oldline.find(tokens[-1]+'<em>'+line[4:line.index('>')])>=0:
     print 'Token-breaking <em> tag endangers',
     line = tokens.pop()+line[4:]
@@ -266,13 +263,15 @@ def mapHTMLtoTokenStream(line):
   if line.find('<code>')==0:
    if pp_mode == MODE_FIXED:
     print 'Style tag mismatch.'
+    pessimistic[1]+=1
    pp_outer = pp_mode
    pp_mode = MODE_FIXED
    line = line[6:]
    continue
   if line.find('</code>')==0:
    if pp_mode != MODE_FIXED:
     print 'Style tag mismatch.'
+    pessimistic[1]+=1
    pp_mode = pp_outer
    line = line[7:]
    continue
@@ -304,11 +303,11 @@ def mapHTMLtoTokenStream(line):
   if line.find('<a')==0:
    print 'Anchor found, skipping everything that is left of this snippet.'
    pessimistic[0] = True
-   pessimistic[1] += 1
+   #pessimistic[1] += 1
    continue
   if line.find('<')==0:
    print 'Style tag unknown: "'+line+'", skipping!'
-   pessimistic[2] += 1
+   pessimistic[1] += 1
    line = line[line.index('>')+1:]
   else:
    if line.find('<')>0:
@@ -399,7 +398,7 @@ def preprocessConstruct(fn):
      if (pp_mode != MODE_ITALIC) and line.find('</em>')<0 and line.find('</i>')<0 and line.find('<code>')<0:
       pp_mode = MODE_ITALIC
       print 'Style tag enforcing: virtual <em> when new definition of',name,'starts.'
-      pessimistic[2] += 1
+      pessimistic[1] += 1
     elif len(a)==4 and a[0]==a[2] and a[1]=='$$$$$' and a[-1]=='$$$$$':
      # new mingled definition
      if choices:
@@ -409,7 +408,7 @@ def preprocessConstruct(fn):
      name = a[0]
      oneof = False
      print name,'double-declared, fixed'
-     pessimistic[2] += 1
+     #pessimistic[2] += 1
     elif len(a)==4 and a[1]=='$$$$$' and a[2]=='one' and a[3]=='of':
      # new "one-of" definition
      if choices:
@@ -421,7 +420,7 @@ def preprocessConstruct(fn):
      # line continuation
      if countspaces(oldline)>countspaces(line):
       print 'Line continuation enforced while parsing',name,'- indentation went from',countspaces(oldline),'to 0'
-      pessimistic[2] += 1
+      pessimistic[1] += 1
      for i in range(0,len(a)):
       choices[-1][0].append(a[i])
       choices[-1][1].append(b[i])
@@ -432,8 +431,8 @@ def preprocessConstruct(fn):
    else:
     oldline=line=''
  src.close()
- if pessimistic[1]:
-  print 'Skipped',pessimistic[1],'anchor-containing snippets'
+ #if pessimistic[1]:
+ # print 'Skipped',pessimistic[1],'anchor-containing snippets'
 
 def countspaces(s):
  olds = s
@@ -447,12 +446,6 @@ def countspaces(s):
   s=s[1:]
  return cx
 
-def printGrammarText(fn):
- ext = open(fn,'w')
- for nt in prods.keys():
-  ext.write(serialiseT(nt,prods[nt]))
- ext.close()
-
 def printGrammar(fn):
  ext = open(fn,'w')
  ext.write('<bgf:grammar xmlns:bgf="http://planet-sl.org/bgf">')
@@ -792,23 +785,22 @@ def fixBracketPair(nt,arr,left,right):
  print 'HTML to Grammar automated extractor'
  if len(sys.argv)==3 or len(sys.argv)==4:
   print 'Reading the HTML document...'
+  if sys.argv[-1]=='-v':
+   verbose = True
   preprocessConstruct(sys.argv[1])
   print 'Massaging the grammar...'
   glueSymbols()
   preprocessCorrect()
   killDuplicates()
   print 'Writing the extracted grammar...'
-  if sys.argv[-1]=='-bnf':
-   printGrammarText(sys.argv[2])
-  else:
-   printGrammar(sys.argv[2])
+  printGrammar(sys.argv[2])
   if pessimistic[2]:
    print 'Total of',pessimistic[2]+pessimistic[1],'problems encountered and coped with.'
  else:
   print 'Usage:'
   print ' ',sys.argv[0],'''<input> <output> [<options>]
 
 Possible options:
-	-bnf			Outputs in EBNF rather then in BGF'''
+	-v			verbose mode (report the code of each token)'''
   sys.exit(1)
 
diff --git a/topics/java/Makefile b/topics/java/Makefile
@@ -1,8 +1,14 @@
 all:
 	@echo "Run 'make rebuild' if you want to renew the sources from their URLs."
 	@echo NOT recommended UNLESS you really know what you are doing!
+	@echo "Run 'make debug' to execute all extractors in debug mode"
 	@echo "Run 'make test' to execute all extractors and diff the results with LCI snapshot"
 
+debug:
+	cd jls1 ; make debug
+	cd jls2 ; make debug
+	cd jls3 ; make debug
+
 test:
 	cd jls1 ; make all
 	cd jls2 ; make all

diff --git a/topics/java/jls1/Makefile b/topics/java/jls1/Makefile
@@ -3,6 +3,12 @@ all:
 	make doc
 	make test
 
+debug:
+	python ../../extraction/html2bgf/getpre.py syntax.kw app.html parse.html
+	python ../../extraction/html2bgf/html2bgf.py parse.html app1.bgf -v
+	python ../../extraction/html2bgf/getpre.py collect.kw doc.html parse.html 
+	python ../../extraction/html2bgf/html2bgf.py parse.html doc1.bgf -v
+
 app:
 	python ../../extraction/html2bgf/getpre.py syntax.kw app.html parse.html
 	python ../../extraction/html2bgf/html2bgf.py parse.html app1.bgf

diff --git a/topics/java/jls2/Makefile b/topics/java/jls2/Makefile
@@ -3,6 +3,11 @@ all:
 	make doc
 	make test
 
+debug:
+	python ../../extraction/html2bgf/html2bgf.py app.html app2.bgf -v
+	python ../../extraction/html2bgf/getpre.py collect.kw doc.html parse.html
+	python ../../extraction/html2bgf/html2bgf.py parse.html doc2.bgf -v
+
 app:
 	python ../../extraction/html2bgf/html2bgf.py app.html app2.bgf
 

diff --git a/topics/java/jls3/Makefile b/topics/java/jls3/Makefile
@@ -3,6 +3,11 @@ all:
 	make doc
 	make test
 
+debug:
+	python ../../extraction/html2bgf/html2bgf.py app.html app3.bgf -v
+	python ../../extraction/html2bgf/getpre.py collect.kw doc.html parse.html
+	python ../../extraction/html2bgf/html2bgf.py parse.html doc3.bgf -v
+
 app:
 	python ../../extraction/html2bgf/html2bgf.py app.html app3.bgf
 

diff --git a/topics/java/lci/Makefile b/topics/java/lci/Makefile
@@ -16,12 +16,13 @@ check:
 	ls -1  bgf/*.bgf  | xargs -n1 ${validator} bgf
 
 debug:
-	../../../shared/tools/html2bgf ../jls1/syntax.kw ../jls1/app.html 1.bnf -bnf >/dev/null
-	../../../shared/tools/html2bgf ../jls1/collect.kw ../jls1/doc.html q.bnf -bnf >/dev/null
-	../../../shared/tools/html2bgf ../jls2/app.html 2.bnf -bnf >/dev/null
-	../../../shared/tools/html2bgf ../jls2/collect.kw ../jls2/doc.html w.bnf -bnf >/dev/null
-	../../../shared/tools/html2bgf ../jls3/app.html 3.bnf -bnf >/dev/null
-	../../../shared/tools/html2bgf ../jls3/collect.kw ../jls3/doc.html e.bnf -bnf >/dev/null
+	../../../shared/tools/bgf2bnf snapshot/app1.bgf 1.bnf
+	../../../shared/tools/bgf2bnf snapshot/app2.bgf 2.bnf
+	../../../shared/tools/bgf2bnf snapshot/app3.bgf 3.bnf
+	../../../shared/tools/bgf2bnf snapshot/doc1.bgf q.bnf
+	../../../shared/tools/bgf2bnf snapshot/doc2.bgf w.bnf
+	../../../shared/tools/bgf2bnf snapshot/doc3.bgf e.bnf
+
 clean:
 	rm -f *~
 	rm -f bgf/*