Skip to content

Commit

Permalink
new Hunter feature: fetch grammar chunks by delimiters
Browse files Browse the repository at this point in the history
git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@1099 ab42f6e0-554d-0410-b580-99e487e6eeb2
  • Loading branch information
grammarware committed Jun 20, 2011
1 parent b71627c commit d55aee9
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 0 deletions.
27 changes: 27 additions & 0 deletions topics/recovery/hunter/hunter.py
Expand Up @@ -1215,6 +1215,24 @@ def t2nt(tokens,check):
else x
for x in tokens]

def processLine(line,inside,chunks):
if inside:
if line.find(config['end-grammar-symbol'])>-1:
inside = False
line = line[:line.index(config['end-grammar-symbol'])]
if line.strip() != '':
line,inside,chunks = processLine(line,True,chunks)
return line,False,chunks
else:
chunks.append(line)
else:
if line.find(config['start-grammar-symbol'])>-1:
inside = True
line = line[line.index(config['start-grammar-symbol'])+len(config['start-grammar-symbol']):]
if line.strip() != '':
return processLine(line,inside,chunks)
return (line,inside,chunks)

if __name__ == "__main__":
if len(sys.argv) != 4:
print('Usage:')
Expand All @@ -1230,6 +1248,15 @@ def t2nt(tokens,check):
print('STEP 0: reading the input file.')
lines = f.readlines()
f.close()
if 'start-grammar-symbol' in config.keys() and 'end-grammar-symbol' in config.keys():
chunks = []
inside = False
for line in lines:
(line,inside,chunks) = processLine(line,inside,chunks)
lines = chunks
print('STEP 0 found',len(lines),'in grammar chunks between designated delimiters.')
if debug:
print('Perceived lines:',lines)
if 'line-continuation-symbol' in config.keys():
if 'concatenate-symbol' in config.keys():
sep = config['concatenate-symbol']
Expand Down
21 changes: 21 additions & 0 deletions topics/recovery/hunter/tests/0-chunks.bgf
@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<bgf:grammar xmlns:bgf="http://planet-sl.org/bgf">
<bgf:production>
<nonterminal>foo</nonterminal>
<bgf:expression>
<nonterminal>bar</nonterminal>
</bgf:expression>
</bgf:production>
<bgf:production>
<nonterminal>bar</nonterminal>
<bgf:expression>
<nonterminal>wez</nonterminal>
</bgf:expression>
</bgf:production>
<bgf:production>
<nonterminal>wez</nonterminal>
<bgf:expression>
<nonterminal>foo</nonterminal>
</bgf:expression>
</bgf:production>
</bgf:grammar>
10 changes: 10 additions & 0 deletions topics/recovery/hunter/tests/0-chunks.edd
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<edd:config xmlns:edd="http://planet-sl.org/edd">
<start-grammar-symbol>&lt;grammar&gt;</start-grammar-symbol>
<end-grammar-symbol>&lt;/grammar&gt;</end-grammar-symbol>
<defining-symbol>:</defining-symbol>
<terminator-symbol>.</terminator-symbol>
<ignore>
<newline/>
</ignore>
</edd:config>
11 changes: 11 additions & 0 deletions topics/recovery/hunter/tests/0-chunks.src
@@ -0,0 +1,11 @@
The following is the grammar of foo:
<grammar>
foo : bar.
bar : wez.
</grammar>

The following is also a grammar, but not of foo

<grammar>wez : foo.</grammar>

This is some irrelevant text.

0 comments on commit d55aee9

Please sign in to comment.