better line continuation detection

git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@250 ab42f6e0-554d-0410-b580-99e487e6eeb2
grammarware · Sep 10, 2008 · 7530814 · 7530814
1 parent 42880f9
commit 7530814
Showing 1 changed file with 12 additions and 1 deletion.
diff --git a/topics/extraction/html2bgf/html2bgf.py b/topics/extraction/html2bgf/html2bgf.py
@@ -178,6 +178,17 @@ def cleanup(line):
  return line.replace('<!-- </i> -->','').replace('        ','\t')
  #.replace('<code>','"').replace('</code>','"')
 
+def ifContinuation(s):
+ if not s:
+  return False
+ if s[0]=='\t':
+  return False
+ if s[0]==' ':
+  return False
+ if s[0]=='<':
+  return ifContinuation(s[s.index('>')+1:])
+ return True
+
 def readGrammar(fn):
  src = open(fn,'r')
  grammar = False
@@ -193,7 +204,7 @@ def readGrammar(fn):
    grammar = not grammar
    continue
   if grammar:
-   cont = line[0] not in ('\t','<',' ')
+   cont = ifContinuation(line)
    line = preprocess(cleanup(line))
    #print 'Parsing "'+line+'"...'
    a,b=parseLine(line)