Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
general preprocessor; LCI advancements; much smarter extractor
git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@257 ab42f6e0-554d-0410-b580-99e487e6eeb2
- Loading branch information
1 parent
1799766
commit 35e33d4
Showing
8 changed files
with
221 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,49 @@ | ||
#!/usr/bin/python | ||
import sys | ||
|
||
f = open(sys.argv[1],'r') | ||
grammar = False | ||
print '<pre>' | ||
for chunk in ''.join(f.readlines()).split('<pre>'): | ||
if chunk.find('19.2')>0: | ||
grammar = True | ||
elif grammar: | ||
print chunk.split('</pre>')[0].replace('<br>','').replace(' ',' ') | ||
print '</pre>' | ||
f.close() | ||
yes = [] | ||
no = [] | ||
|
||
def checkSection(text,tagN,includeFlag): | ||
for chapter in text.split('<h'+`tagN`+'>')[1:]: | ||
grammar = includeFlag | ||
content = chapter.split('</h'+`tagN`+'>') | ||
for kw in yes: | ||
if content[0].find(kw)>=0: | ||
grammar = True | ||
for kw in no: | ||
if content[0].find(kw)>=0: | ||
grammar = False | ||
if grammar and content[1].find('<h')==-1: | ||
for chunk in content[1].split('<pre>')[1:]: | ||
print chunk.split('</pre>')[0].replace('<br>','').replace(' ',' ') | ||
print '<hr>' | ||
else: | ||
#print 'Going deeper than',content[0].split()[0] | ||
if grammar: | ||
for chunk in content[1].split('<h'+`tagN+1`+'>')[0].split('<pre>')[1:]: | ||
print chunk.split('</pre>')[0].replace('<br>','').replace(' ',' ') | ||
print '<hr>' | ||
checkSection(content[1],tagN+1,grammar) | ||
|
||
if len(sys.argv)<2: | ||
print '''This tool simulates a particular XPath query that it can execute upon a badly composed HTML. | ||
Usage: | ||
python xpathpre.py keyword [keyword ...] <input >output | ||
It will read the input, looking for sections (<h?>) that contain keywords in the title. | ||
Once found, it will output the content of all <pre> tags from such sections. | ||
Keywords can be positive or negative, with positive being default.''' | ||
else: | ||
for kw in sys.argv[1:]: | ||
if kw[0]=='-': | ||
no.append(kw[1:]) | ||
elif kw[0]=='+': | ||
yes.append(kw[1:]) | ||
else: | ||
yes.append(kw) | ||
print '<pre>' | ||
checkSection(''.join(sys.stdin.readlines()),1,False) | ||
print '</pre>' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<xbgf:sequence | ||
xmlns:bgf="http://planet-sl.org/bgf" | ||
xmlns:xbgf="http://planet-sl.org/xbgf"> | ||
|
||
<xbgf:add> | ||
<bgf:production> | ||
<nonterminal>Modifier</nonterminal> | ||
<bgf:expression> | ||
<terminal>strictfp</terminal> | ||
</bgf:expression> | ||
</bgf:production> | ||
</xbgf:add> | ||
|
||
</xbgf:sequence> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters