From d55aee9acb3fd8549ab148d3c03262ce46b548dd Mon Sep 17 00:00:00 2001 From: grammarware Date: Mon, 20 Jun 2011 15:03:13 +0000 Subject: [PATCH] new Hunter feature: fetch grammar chunks by delimiters git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@1099 ab42f6e0-554d-0410-b580-99e487e6eeb2 --- topics/recovery/hunter/hunter.py | 27 +++++++++++++++++++++++ topics/recovery/hunter/tests/0-chunks.bgf | 21 ++++++++++++++++++ topics/recovery/hunter/tests/0-chunks.edd | 10 +++++++++ topics/recovery/hunter/tests/0-chunks.src | 11 +++++++++ 4 files changed, 69 insertions(+) create mode 100644 topics/recovery/hunter/tests/0-chunks.bgf create mode 100644 topics/recovery/hunter/tests/0-chunks.edd create mode 100644 topics/recovery/hunter/tests/0-chunks.src diff --git a/topics/recovery/hunter/hunter.py b/topics/recovery/hunter/hunter.py index cacb90aa..35d5d17f 100755 --- a/topics/recovery/hunter/hunter.py +++ b/topics/recovery/hunter/hunter.py @@ -1215,6 +1215,24 @@ def t2nt(tokens,check): else x for x in tokens] +def processLine(line,inside,chunks): + if inside: + if line.find(config['end-grammar-symbol'])>-1: + inside = False + line = line[:line.index(config['end-grammar-symbol'])] + if line.strip() != '': + line,inside,chunks = processLine(line,True,chunks) + return line,False,chunks + else: + chunks.append(line) + else: + if line.find(config['start-grammar-symbol'])>-1: + inside = True + line = line[line.index(config['start-grammar-symbol'])+len(config['start-grammar-symbol']):] + if line.strip() != '': + return processLine(line,inside,chunks) + return (line,inside,chunks) + if __name__ == "__main__": if len(sys.argv) != 4: print('Usage:') @@ -1230,6 +1248,15 @@ def t2nt(tokens,check): print('STEP 0: reading the input file.') lines = f.readlines() f.close() + if 'start-grammar-symbol' in config.keys() and 'end-grammar-symbol' in config.keys(): + chunks = [] + inside = False + for line in lines: + (line,inside,chunks) = processLine(line,inside,chunks) + lines = chunks + print('STEP 0 found',len(lines),'in grammar chunks between designated delimiters.') + if debug: + print('Perceived lines:',lines) if 'line-continuation-symbol' in config.keys(): if 'concatenate-symbol' in config.keys(): sep = config['concatenate-symbol'] diff --git a/topics/recovery/hunter/tests/0-chunks.bgf b/topics/recovery/hunter/tests/0-chunks.bgf new file mode 100644 index 00000000..fba5ce6a --- /dev/null +++ b/topics/recovery/hunter/tests/0-chunks.bgf @@ -0,0 +1,21 @@ + + + + foo + + bar + + + + bar + + wez + + + + wez + + foo + + + diff --git a/topics/recovery/hunter/tests/0-chunks.edd b/topics/recovery/hunter/tests/0-chunks.edd new file mode 100644 index 00000000..22483e45 --- /dev/null +++ b/topics/recovery/hunter/tests/0-chunks.edd @@ -0,0 +1,10 @@ + + + <grammar> + </grammar> + : + . + + + + diff --git a/topics/recovery/hunter/tests/0-chunks.src b/topics/recovery/hunter/tests/0-chunks.src new file mode 100644 index 00000000..1411cf89 --- /dev/null +++ b/topics/recovery/hunter/tests/0-chunks.src @@ -0,0 +1,11 @@ +The following is the grammar of foo: + +foo : bar. +bar : wez. + + +The following is also a grammar, but not of foo + +wez : foo. + +This is some irrelevant text. \ No newline at end of file