-
Notifications
You must be signed in to change notification settings - Fork 0
/
TextNormalizeFilter.py
63 lines (50 loc) · 1.65 KB
/
TextNormalizeFilter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
'''
Created on 02-08-2013
@author: http://code.activestate.com/recipes/265881/
'''
from xml.sax.saxutils import XMLFilterBase
class text_normalize_filter(XMLFilterBase):
"""
SAX filter to ensure that contiguous white space nodes are
delivered merged into a single node
"""
def __init__(self, upstream, downstream):
XMLFilterBase.__init__(self, upstream)
self._downstream = downstream
self._accumulator = []
return
def _complete_text_node(self):
if self._accumulator:
self._downstream.characters(''.join(self._accumulator))
self._accumulator = []
return
def startElement(self, name, attrs):
self._complete_text_node()
self._downstream.startElement(name, attrs)
return
def startElementNS(self, name, qname, attrs):
self._complete_text_node()
self._downstream.startElementNS(name, qname, attrs)
return
def endElement(self, name):
self._complete_text_node()
self._downstream.endElement(name)
return
def endElementNS(self, name, qname):
self._complete_text_node()
self._downstream.endElementNS(name, qname)
return
def processingInstruction(self, target, body):
self._complete_text_node()
self._downstream.processingInstruction(target, body)
return
def comment(self, body):
self._complete_text_node()
self._downstream.comment(body)
return
def characters(self, text):
self._accumulator.append(text)
return
def ignorableWhitespace(self, ws):
self._accumulator.append(text)
return