This repository has been archived by the owner on Feb 3, 2021. It is now read-only.
/
Cursor-protoregex-peek.pir
290 lines (242 loc) · 7.8 KB
/
Cursor-protoregex-peek.pir
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# Copyright (C) 2009, Patrick R. Michaud
=head1 NAME
Regex::Cursor-protoregex-peek - simple protoregex implementation
=head1 DESCRIPTION
=over 4
=item !protoregex(name)
Perform a match for protoregex C<name>.
=cut
.sub '!protoregex' :method
.param string name
self.'!cursor_debug'('START ', name)
.local pmc generation
generation = get_global '$!generation'
# Get the protoregex table for the current grammar. If
# a table doesn't exist or it's out of date, generate a
# new one.
.local pmc parrotclass, prototable
parrotclass = typeof self
prototable = getprop '%!prototable', parrotclass
if null prototable goto make_prototable
$P0 = getprop '$!generation', prototable
$I0 = issame $P0, generation
if $I0 goto have_prototable
make_prototable:
prototable = self.'!protoregex_gen_table'(parrotclass)
have_prototable:
# Obtain the toxrk and toklen hashes for the current grammar
# from the protoregex table. If they haven't been computed
# yet for this table, then do that now.
.local pmc tokrx, toklen
$S0 = concat name, '.tokrx'
tokrx = prototable[$S0]
$S0 = concat name, '.toklen'
toklen = prototable[$S0]
unless null tokrx goto have_tokrx
(tokrx, toklen) = self.'!protoregex_gen_tokrx'(prototable, name)
have_tokrx:
# If there are no entries at all for this protoregex, we fail outright.
unless tokrx goto fail
# Figure out where we are in the current match.
.local pmc target
.local int pos
target = getattribute self, '$!target'
$P1 = getattribute self, '$!pos'
pos = $P1
# Use the character at the current match position to determine
# the longest possible token we could encounter at this point.
.local string token
$S0 = substr target, pos, 1
$I0 = toklen[$S0]
token = substr target, pos, $I0
# Create a hash to keep track of the methods we've already called,
# so that we don't end up calling it twice.
.local pmc mcalled
mcalled = new ['Hash']
# Look in the tokrx hash for any rules that are keyed with the
# current token. If there aren't any, or the rules we have don't
# match, then shorten the token by one character and try again
# until we either have a match or we've run out of candidates.
token_loop:
self.'!cursor_debug'('TOKEN ', token)
.local pmc rx, result
rx = tokrx[token]
if null rx goto token_next
$I0 = isa rx, ['ResizablePMCArray']
if $I0 goto rx_array
.local int rxaddr
rxaddr = get_addr rx
result = mcalled[rxaddr]
unless null result goto token_next
result = self.rx()
mcalled[rxaddr] = mcalled
if result goto done
goto token_next
rx_array:
.local pmc rx_it
rx_it = iter rx
cand_loop:
unless rx_it goto cand_done
rx = shift rx_it
rxaddr = get_addr rx
result = mcalled[rxaddr]
unless null result goto token_next
result = self.rx()
mcalled[rxaddr] = mcalled
if result goto done
goto cand_loop
cand_done:
token_next:
unless token goto fail
chopn token, 1
goto token_loop
done:
pos = result.'pos'()
self.'!cursor_debug'('PASS ', name, ' at pos=', pos)
.return (result)
fail:
self.'!cursor_debug'('FAIL ', name)
.return (0)
.end
=item !protoregex_generation()
Set the C<$!generation> flag to indicate that protoregexes need to
be recalculated.
=cut
.sub '!protoregex_generation' :method
$P0 = get_global '$!generation'
# don't change this to 'inc' -- we want to ensure new PMC
$P1 = add $P0, 1
set_global '$!generation', $P1
.return ($P1)
.end
=item !protoregex_gen_table(parrotclass)
Generate a new protoregex table for C<parrotclass>. This involves
creating a hash keyed with method names containing ':sym<' from
C<parrotclass> and all of its superclasses. This new hash is
then given the current C<$!generate> property so we can avoid
recreating it.
The categorization of the protoregex candidate lists
for individual protoregexes is handled (lazily) by
C<!protoregex_gen_tokrx> below.
=cut
.sub '!protoregex_gen_table' :method
.param pmc parrotclass
.local pmc prototable
prototable = new ['Hash']
.local pmc class_it, method_it
$P0 = parrotclass.'inspect'('all_parents')
class_it = iter $P0
class_loop:
unless class_it goto class_done
$P0 = shift class_it
$P0 = $P0.'methods'()
method_it = iter $P0
method_loop:
unless method_it goto class_loop
$S0 = shift method_it
$I0 = index $S0, ':sym<'
if $I0 < 0 goto method_loop
prototable[$S0] = prototable
goto method_loop
class_done:
$P0 = get_global '$!generation'
setprop prototable, '$!generation', $P0
setprop parrotclass, '%!prototable', prototable
.return (prototable)
.end
=item !protoregex_gen_tokrx(prototable, name)
Generate this class' token list in prototable for the protoregex
called C<name>.
=cut
.sub '!protoregex_gen_tokrx' :method
.param pmc prototable
.param string name
self.'!cursor_debug'('Generating protoregex table for ', name)
.local pmc toklen, tokrx
toklen = new ['Hash']
tokrx = new ['Hash']
# The prototable has already collected all of the names of
# protoregex methods into C<prototable>. We set up a loop
# to find all of the method names that begin with "name:sym<".
.local string mprefix
.local int mlen
mprefix = concat name, ':sym<'
mlen = length mprefix
.local pmc peekcur
peekcur = self.'!cursor_start'()
$P0 = box CURSOR_TYPE_PEEK
setattribute peekcur, '$!type', $P0
.local pmc method_it, method
.local string method_name
method_it = iter prototable
method_loop:
unless method_it goto method_done
method_name = shift method_it
$S0 = substr method_name, 0, mlen
if $S0 != mprefix goto method_loop
# Okay, we've found a method name intended for this protoregex.
# Now we look up the method itself, and ask it for its prefix tokens.
# If it doesn't return any, we use '' as its only prefix.
.local pmc rx, tokens, tokens_it
rx = find_method self, method_name
(tokens :slurpy) = peekcur.rx()
# Now loop through all of the tokens for the method, updating
# the longest initial key and adding it to the tokrx hash.
# We automatically promote entries in tokrx to arrays when
# there's more than one method candidate for a given token.
.local pmc seentok
seentok = new ['Hash']
tokens_loop:
unless tokens goto tokens_done
.local string tkey, tfirst
tkey = ''
$P0 = shift tokens
$I0 = isa $P0, ['Regex';'Cursor']
if $I0 goto have_tkey
tkey = $P0
have_tkey:
# If we've already processed this token for this rule, don't enter it twice
$I0 = exists seentok[tkey]
if $I0 goto tokens_loop
seentok[tkey] = seentok
# Keep track of longest token lengths by first character
tfirst = substr tkey, 0, 1
$I0 = length tkey
$I1 = toklen[tfirst]
if $I0 <= $I1 goto toklen_done
toklen[tfirst] = $I0
toklen_done:
# Add the regex to the list under the token key
.local pmc rxlist
rxlist = tokrx[tkey]
if null rxlist goto rxlist_0
$I0 = isa rxlist, ['ResizablePMCArray']
if $I0 goto rxlist_n
rxlist_1:
$I0 = issame rx, rxlist
if $I0 goto tokens_loop
$P0 = rxlist
rxlist = new ['ResizablePMCArray']
push rxlist, $P0
push rxlist, rx
tokrx[tkey] = rxlist
goto tokens_loop
rxlist_n:
push rxlist, rx
goto tokens_loop
rxlist_0:
tokrx[tkey] = rx
goto tokens_loop
tokens_done:
goto method_loop
method_done:
# It's built! Now store the tokrx and toklen hashes in the
# prototable and return them to the caller.
$S0 = concat name, '.tokrx'
prototable[$S0] = tokrx
$S0 = concat name, '.toklen'
prototable[$S0] = toklen
.return (tokrx, toklen)
.end
=back
=cut