-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathPSPParser.py
333 lines (298 loc) · 12.5 KB
/
PSPParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
"""The PSP parser.
This module handles the actual reading of the characters in the source
PSP file and checking it for valid psp tokens. When it finds one,
it calls ParseEventHandler with the characters it found.
Copyright (c) by Jay Love, 2000 (mailto:jsliv@jslove.org)
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee or royalty is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation or portions thereof, including modifications,
that you make.
This software is based in part on work done by the Jakarta group.
"""
from io import StringIO
from .PSPUtils import checkAttributes, PSPParserException
checklist = []
def checker(method):
"""Decorator for adding a method to the checklist."""
checklist.append(method)
return method
class PSPParser:
"""The main PSP parser class.
The PSPParser class does the actual sniffing through the input file
looking for anything we're interested in. Basically, it starts by
looking at the code looking for a '<' symbol. It looks at the code by
working with a PSPReader object, which handles the current location in
the code. When it finds one, it calls a list of checker methods,
asking each if it recognizes the characters as its kind of input.
When the checker methods look at the characters, if they want it,
they go ahead and gobble it up and set up to create it in the servlet
when the time comes. When they return, they return true if they accept
the character, and the PSPReader object cursor is positioned past the
end of the block that the checker method accepted.
"""
checklist = checklist # global list of checker methods
def __init__(self, ctxt):
self._reader = ctxt.getReader()
self._writer = ctxt.getServletWriter()
self._handler = None
self.cout = StringIO() # for dumping HTML that none of the check wants
self.tmplStart = None # marks the start of HTML code
self.tmplStop = None # marks the end of HTML code
self.currentFile = self._reader.mark().getFile()
def setEventHandler(self, handler):
"""Set the handler this parser will use when it finds PSP code."""
self._handler = handler
def flushCharData(self, start, stop):
"""Dump everything to the char data handler.
Dump all the HTML that we've accumulated over to the character data
handler in the event handler object.
"""
data = self.cout.getvalue()
self.cout.close()
if data: # make sure there's something there
self._handler.handleCharData(start, stop, data)
self.cout = StringIO()
@checker
def commentCheck(self, _handler, reader):
"""Comments just get eaten."""
if reader.matches('<%--'):
reader.advance(4)
if reader.skipUntil('--%>') is None:
raise PSPParserException('Comment not terminated')
self.flushCharData(self.tmplStart, self.tmplStop)
return True
return False
@checker
def checkExpression(self, handler, reader):
"""Look for "expressions" and handle them."""
if not reader.matches('<%='):
return False
reader.advance(3) # eat the opening tag
reader.peekChar()
reader.skipSpaces()
start = reader.mark()
stop = reader.skipUntil('%>')
if stop is None:
raise PSPParserException('Expression not terminated')
handler.setTemplateInfo(self.tmplStart, self.tmplStop)
handler.handleExpression(start, stop, None)
return True
@checker
def checkDirective(self, handler, reader):
"""Check for directives; for now we support only page and include."""
if not reader.matches('<%@'):
return False
start = reader.mark()
reader.advance(3)
reader.skipSpaces()
for directive in ('page', 'include', 'taglib'):
if reader.matches(directive):
match = directive
break
else:
raise PSPParserException('Invalid directive')
reader.advance(len(match))
# parse the directive attr:val pair dictionary
attrs = reader.parseTagAttributes()
if match == 'page':
checkAttributes('Page directive', attrs, ([], {
'imports', 'extends', 'method',
'isThreadSafe', 'isInstanceSafe',
'indentType', 'indentSpaces',
'gobbleWhitespace', 'formatter'}))
elif match == 'include':
checkAttributes('Include directive', attrs, (['file'], []))
else:
raise PSPParserException(f'{match} directive not implemented')
reader.skipSpaces() # skip to where we expect a close tag
if reader.matches('%>'):
reader.advance(2) # advance past it
else:
raise PSPParserException('Directive not terminated')
stop = reader.mark()
handler.setTemplateInfo(self.tmplStart, self.tmplStop)
handler.handleDirective(match, start, stop, attrs)
return True
@checker
def checkEndBlock(self, handler, reader):
"""Check for the end of a block."""
start = reader.mark()
if reader.matches('<%'):
reader.advance(2)
reader.skipSpaces()
if reader.matches('end'):
reader.advance(3)
reader.skipSpaces()
if reader.matches('%>'):
reader.advance(2)
handler.setTemplateInfo(self.tmplStart, self.tmplStop)
handler.handleEndBlock()
return True
if reader.matches('$%>'):
reader.advance(3)
handler.setTemplateInfo(self.tmplStart, self.tmplStop)
handler.handleEndBlock()
print('INFO: A $ at the end of an end tag does nothing.')
return True
# that wasn't it
reader.reset(start)
return False
@checker
def checkScript(self, handler, reader):
"""The main thing we're after. Check for embedded scripts."""
if not reader.matches('<%'):
return False
reader.advance(2)
# don't skip as spaces may be significant; leave this for the generator
start = reader.mark()
try:
stop = reader.skipUntil('%>')
except EOFError as e:
raise EOFError(
"Reached EOF while looking for ending script tag") from e
if stop is None:
raise PSPParserException('Script not terminated')
handler.setTemplateInfo(self.tmplStart, self.tmplStop)
handler.handleScript(start, stop, None)
return True
@checker
def checkScriptFile(self, handler, reader):
"""Check for file level code.
Check for Python code that must go to the top of the generated module::
<psp:file>
import xyz
print('hi Mome!')
def foo(): return 'foo'
</psp:file>
"""
if not reader.matches('<psp:file>'):
return False
reader.advance(10)
start = reader.mark()
try:
stop = reader.skipUntil('</psp:file>')
if stop is None:
raise PSPParserException(
'Script not terminated in <psp:file> block')
except EOFError as e:
raise EOFError(
'Reached EOF while looking for'
' ending script tag </psp:file>') from e
handler.setTemplateInfo(self.tmplStart, self.tmplStop)
handler.handleScriptFile(start, stop, None)
return True
@checker
def checkScriptClass(self, handler, reader):
"""Check for class level code.
Check for Python code that should go in the class definition::
<psp:class>
def foo(self):
return self.dosomething()
</psp:class>
"""
if not reader.matches('<psp:class>'):
return False
reader.advance(11)
start = reader.mark()
try:
stop = reader.skipUntil('</psp:class>')
if stop is None:
raise PSPParserException(
'Script not terminated in <psp:class> block')
except EOFError as e:
raise EOFError(
'Reached EOF while looking for'
' ending script tag </psp:class>') from e
handler.setTemplateInfo(self.tmplStart, self.tmplStop)
handler.handleScriptClass(start, stop, None)
return True
@checker
def checkMethod(self, handler, reader):
"""Check for class methods defined in the page.
We only support one format for these,
``<psp:method name="xxx" params="xxx,xxx">``
Then the function body, then </psp:method>.
"""
if not reader.matches('<psp:method'):
return False
start = reader.mark()
reader.advance(11)
attrs = reader.parseTagAttributes()
checkAttributes('method', attrs, (['name'], ['params']))
reader.skipSpaces()
if not reader.matches('>'):
raise PSPParserException('Expected method declaration close')
reader.advance(1)
stop = reader.mark()
handler.setTemplateInfo(self.tmplStart, self.tmplStop)
handler.handleMethod(start, stop, attrs)
start = stop
# skip past the close marker, return the point before the close marker
stop = reader.skipUntil('</psp:method>')
handler.handleMethodEnd(start, stop, attrs)
return True
@checker
def checkInclude(self, handler, reader):
"""Check for inserting another pages output in this spot."""
if not reader.matches('<psp:include'):
return False
reader.advance(12)
reader.skipSpaces()
attrs = reader.parseTagAttributes()
checkAttributes('include', attrs, (['path'], []))
reader.skipSpaces()
if not reader.matches('>'):
raise PSPParserException('Include bodies not implemented')
reader.advance(1)
handler.setTemplateInfo(self.tmplStart, self.tmplStop)
handler.handleInclude(attrs, None)
return True
@checker
def checkInsert(self, handler, reader):
"""Check for straight character dumps.
No big hurry for this. It's almost the same as the page include
directive. This is only a partial implementation of what JSP does.
JSP can pull it from another server, servlet, JSP page, etc.
"""
if not reader.matches('<psp:insert'):
return False
reader.advance(11)
reader.skipSpaces()
attrs = reader.parseTagAttributes()
checkAttributes('insert', attrs, (['file'], []))
reader.skipSpaces()
if not reader.matches('>'):
raise PSPParserException('Insert bodies not implemented')
reader.advance(1)
handler.setTemplateInfo(self.tmplStart, self.tmplStop)
handler.handleInsert(attrs, None)
return True
def parse(self, until=None):
"""Parse the PSP file."""
reader = self._reader
handler = self._handler
noPspElement = False
while reader.hasMoreInput():
# This is for XML style blocks, which we're not handling yet:
if until and reader.matches(until):
return
# If the file the reader is working on has changed due to
# a push or pop, flush any char data from the old file:
if reader.mark().getFile() != self.currentFile:
self.flushCharData(self.tmplStart, self.tmplStop)
self.currentFile = reader.mark().getFile()
self.tmplStart = reader.mark()
for checkFunc in self.checklist:
if checkFunc(self, handler, reader):
noPspElement = False
break
else:
if not noPspElement:
self.tmplStart = reader.mark()
noPspElement = True
s = reader.nextContent() # skip till the next possible tag
self.tmplStop = reader.mark() # mark the end of HTML data
self.cout.write(s) # write out the raw HTML data
self.flushCharData(self.tmplStart, self.tmplStop) # dump the rest