From d55aee9acb3fd8549ab148d3c03262ce46b548dd Mon Sep 17 00:00:00 2001
From: grammarware <vadim@grammarware.net>
Date: Mon, 20 Jun 2011 15:03:13 +0000
Subject: [PATCH] new Hunter feature: fetch grammar chunks by delimiters

git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@1099 ab42f6e0-554d-0410-b580-99e487e6eeb2
---
 topics/recovery/hunter/hunter.py          | 27 +++++++++++++++++++++++
 topics/recovery/hunter/tests/0-chunks.bgf | 21 ++++++++++++++++++
 topics/recovery/hunter/tests/0-chunks.edd | 10 +++++++++
 topics/recovery/hunter/tests/0-chunks.src | 11 +++++++++
 4 files changed, 69 insertions(+)
 create mode 100644 topics/recovery/hunter/tests/0-chunks.bgf
 create mode 100644 topics/recovery/hunter/tests/0-chunks.edd
 create mode 100644 topics/recovery/hunter/tests/0-chunks.src
diff --git a/topics/recovery/hunter/hunter.py b/topics/recovery/hunter/hunter.py
index cacb90aa..35d5d17f 100755
--- a/topics/recovery/hunter/hunter.py
+++ b/topics/recovery/hunter/hunter.py
@@ -1215,6 +1215,24 @@ def t2nt(tokens,check):
 			else x
 				for x in tokens]
 
+def processLine(line,inside,chunks):
+	if inside:
+		if line.find(config['end-grammar-symbol'])>-1:
+			inside = False
+			line = line[:line.index(config['end-grammar-symbol'])]
+			if line.strip() != '':
+				line,inside,chunks = processLine(line,True,chunks)
+				return line,False,chunks
+		else:
+			chunks.append(line)
+	else:
+		if line.find(config['start-grammar-symbol'])>-1:
+			inside = True
+			line = line[line.index(config['start-grammar-symbol'])+len(config['start-grammar-symbol']):]
+			if line.strip() != '':
+				return processLine(line,inside,chunks)
+	return (line,inside,chunks)
+
 if __name__ == "__main__":
 	if len(sys.argv) != 4:
 		print('Usage:')
@@ -1230,6 +1248,15 @@ def t2nt(tokens,check):
 	print('STEP 0: reading the input file.')
 	lines = f.readlines()
 	f.close()
+	if 'start-grammar-symbol' in config.keys() and 'end-grammar-symbol' in config.keys():
+		chunks = []
+		inside = False
+		for line in lines:
+			(line,inside,chunks) = processLine(line,inside,chunks)
+		lines = chunks
+		print('STEP 0 found',len(lines),'in grammar chunks between designated delimiters.')
+		if debug:
+			print('Perceived lines:',lines)
 	if 'line-continuation-symbol' in config.keys():
 		if 'concatenate-symbol' in config.keys():
 			sep = config['concatenate-symbol']
diff --git a/topics/recovery/hunter/tests/0-chunks.bgf b/topics/recovery/hunter/tests/0-chunks.bgf
new file mode 100644
index 00000000..fba5ce6a
--- /dev/null
+++ b/topics/recovery/hunter/tests/0-chunks.bgf
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<bgf:grammar xmlns:bgf="http://planet-sl.org/bgf">
+	<bgf:production>
+		<nonterminal>foo</nonterminal>
+		<bgf:expression>
+			<nonterminal>bar</nonterminal>
+		</bgf:expression>
+	</bgf:production>
+	<bgf:production>
+		<nonterminal>bar</nonterminal>
+		<bgf:expression>
+			<nonterminal>wez</nonterminal>
+		</bgf:expression>
+	</bgf:production>
+	<bgf:production>
+		<nonterminal>wez</nonterminal>
+		<bgf:expression>
+			<nonterminal>foo</nonterminal>
+		</bgf:expression>
+	</bgf:production>
+</bgf:grammar>
diff --git a/topics/recovery/hunter/tests/0-chunks.edd b/topics/recovery/hunter/tests/0-chunks.edd
new file mode 100644
index 00000000..22483e45
--- /dev/null
+++ b/topics/recovery/hunter/tests/0-chunks.edd
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<edd:config xmlns:edd="http://planet-sl.org/edd">
+	<start-grammar-symbol>&lt;grammar&gt;</start-grammar-symbol>
+	<end-grammar-symbol>&lt;/grammar&gt;</end-grammar-symbol>
+	<defining-symbol>:</defining-symbol>
+	<terminator-symbol>.</terminator-symbol>
+	<ignore>
+		<newline/>
+	</ignore>
+</edd:config>
diff --git a/topics/recovery/hunter/tests/0-chunks.src b/topics/recovery/hunter/tests/0-chunks.src
new file mode 100644
index 00000000..1411cf89
--- /dev/null
+++ b/topics/recovery/hunter/tests/0-chunks.src
@@ -0,0 +1,11 @@
+The following is the grammar of foo:
+<grammar>
+foo : bar.
+bar : wez.
+</grammar>
+
+The following is also a grammar, but not of foo
+
+<grammar>wez : foo.</grammar>
+
+This is some irrelevant text.
\ No newline at end of file