This repository has been archived by the owner on Feb 3, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 8
/
Cursor.pir
620 lines (476 loc) · 13.8 KB
/
Cursor.pir
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
# Copyright (C) 2009, Patrick R. Michaud
# $Id$
=head1 NAME
Regex::Cursor - Regex Cursor nodes
=head1 DESCRIPTION
This file implements the Regex::Cursor class, used for managing regular
expression control flow. Regex::Cursor is also a base class for
grammars.
=cut
.include 'cclass.pasm'
.include 'src/Regex/constants.pir'
.namespace ['Regex';'Cursor']
.sub '' :anon :load :init
load_bytecode 'P6object.pbc'
.local pmc p6meta
p6meta = new 'P6metaclass'
$P0 = p6meta.'new_class'('Regex::Cursor', 'attr'=>'$!target $!from $!pos $!match $!action $!names @!bstack @!cstack @!caparray')
$P0 = box 0
set_global '$!generation', $P0
$P0 = new ['Boolean']
assign $P0, 1
set_global '$!TRUE', $P0
.return ()
.end
=head2 Methods
=over 4
=item MATCH()
Return this cursor's current Match object, generating a new one
for the Cursor if one hasn't been created yet.
=cut
.sub 'MATCH' :method
.local pmc match
match = getattribute self, '$!match'
if null match goto match_make
$I0 = isa match, ['Regex';'Match']
if $I0 goto match_done
# First, create a Match object and bind it
match_make:
match = new ['Regex';'Match']
setattribute self, '$!match', match
.local pmc target, from, to
target = getattribute self, '$!target'
setattribute match, '$!target', target
from = getattribute self, '$!from'
setattribute match, '$!from', from
to = getattribute self, '$!pos'
setattribute match, '$!to', to
# Create any arrayed subcaptures.
.local pmc caparray, caparray_it, caphash
caparray = getattribute self, '@!caparray'
if null caparray goto caparray_done
caparray_it = iter caparray
caphash = new ['Hash']
caparray_loop:
unless caparray_it goto caparray_done
.local string subname
.local pmc arr
.local int keyint
subname = shift caparray_it
arr = new ['ResizablePMCArray']
caphash[subname] = arr
keyint = is_cclass .CCLASS_NUMERIC, subname, 0
if keyint goto caparray_int
match[subname] = arr
goto caparray_loop
caparray_int:
$I0 = subname
match[$I0] = arr
caparray_done:
# If it's not a successful match, or if there are
# no saved subcursors, we're done.
if to < from goto match_done
.local pmc cstack, cstack_it
cstack = getattribute self, '@!cstack'
if null cstack goto cstack_done
unless cstack goto cstack_done
cstack_it = iter cstack
cstack_loop:
unless cstack_it goto cstack_done
.local pmc subcur, submatch
subcur = shift cstack_it
# If the subcursor isn't bound with a name, skip it
$P0 = getattribute subcur, '$!names'
if null $P0 goto cstack_loop
subname = $P0
submatch = subcur.'MATCH'()
keyint = is_cclass .CCLASS_NUMERIC, subname, 0
if null caparray goto cstack_bind
$I0 = exists caphash[subname]
unless $I0 goto cstack_bind
if keyint goto cstack_array_int
$P0 = match[subname]
push $P0, submatch
goto cstack_loop
cstack_array_int:
$I0 = subname
$P0 = match[$I0]
push $P0, submatch
goto cstack_loop
cstack_bind:
if keyint goto cstack_bind_int
match[subname] = submatch
goto cstack_loop
cstack_bind_int:
$I0 = subname
match[$I0] = submatch
goto cstack_loop
cstack_done:
match_done:
.return (match)
.end
=item parse(target [, regex])
Parse C<target> in the current grammar starting with C<regex>.
If C<regex> is omitted, then use the C<TOP> rule for the grammar.
=cut
.sub 'parse' :method
.param pmc target
.param pmc regex :optional
.param int has_regex :opt_flag
.param pmc options :slurpy :named
if has_regex goto regex_done
regex = find_method self, 'TOP'
regex_done:
.local pmc cur
cur = self.'!cursor_init'(target, options :flat :named)
cur = cur.regex()
.return (cur)
.end
=item pos()
Return the cursor's current position.
=cut
.sub 'pos' :method
$P0 = getattribute self, '$!pos'
.return ($P0)
.end
=item from()
Return the cursor's from position.
=cut
.sub 'from' :method
$P0 = getattribute self, '$!from'
.return ($P0)
.end
=head2 Private methods
=over 4
=item !cursor_init(target)
Create a new cursor for matching C<target>.
=cut
.sub '!cursor_init' :method
.param string target
.param int from :named('from') :optional
.param pmc action :named('action') :optional
.local pmc parrotclass, cur
$P0 = self.'HOW'()
parrotclass = getattribute $P0, 'parrotclass'
cur = new parrotclass
$P0 = new ['CodeString']
$P0 = target
setattribute cur, '$!target', $P0
$P0 = box from
setattribute cur, '$!from', $P0
$P0 = box from
setattribute cur, '$!pos', $P0
setattribute cur, '$!action', action
.return (cur)
.end
=item !cursor_start([lang])
Create and initialize a new cursor from C<self>. If C<lang> is
provided, then the new cursor has the same type as lang.
=cut
.sub '!cursor_start' :method
.param pmc lang :optional
.param int has_lang :opt_flag
if has_lang goto have_lang
lang = self
have_lang:
.local pmc parrotclass, cur
$P0 = lang.'HOW'()
parrotclass = getattribute $P0, 'parrotclass'
cur = new parrotclass
.local pmc from, pos, target, action
from = getattribute self, '$!pos'
setattribute cur, '$!from', from
setattribute cur, '$!pos', from
target = getattribute self, '$!target'
setattribute cur, '$!target', target
action = getattribute self, '$!action'
setattribute cur, '$!action', action
.return (cur, from, target, from)
.end
=item !cursor_fail(pos)
Permanently fail this cursor.
=cut
.sub '!cursor_fail' :method
.local pmc pos
pos = box CURSOR_FAIL_RULE
setattribute self, '$!pos', pos
null $P0
setattribute self, '$!match', $P0
setattribute self, '@!bstack', $P0
setattribute self, '@!cstack', $P0
.end
=item !cursor_pass(pos, name)
Set the Cursor as passing at C<pos>; calling any reduction action
C<name> associated with the cursor. This method simply sets
C<$!match> to a boolean true value to indicate the regex was
successful; the C<MATCH> method above replaces this boolean true
with a "real" Match object when requested.
=cut
.sub '!cursor_pass' :method
.param pmc pos
.param string name
setattribute self, '$!pos', pos
.local pmc match
match = get_global '$!TRUE'
setattribute self, '$!match', match
unless name goto done
self.'!reduce'(name)
done:
.return (self)
.end
=item !cursor_caparray(caparray :slurpy)
Set the list of subcaptures that produce arrays to C<caparray>.
=cut
.sub '!cursor_caparray' :method
.param pmc caparray :slurpy
setattribute self, '@!caparray', caparray
.end
=item !cursor_names(names)
Set the Cursor's name (for binding) to C<names>.
=cut
.sub '!cursor_names' :method
.param pmc names
setattribute self, '$!names', names
.end
=item !cursor_pos(pos)
Set the cursor's position to C<pos>.
=cut
.sub '!cursor_pos' :method
.param pmc pos
setattribute self, '$!pos', pos
.end
=item !mark_push(rep, pos, mark)
Push a new backtracking point onto the cursor with the given
C<rep>, C<pos>, and backtracking C<mark>. (The C<mark> is typically
the address of a label to branch to when backtracking occurs.)
=cut
.sub '!mark_push' :method
.param int rep
.param int pos
.param int mark
.param pmc subcur :optional
.param int has_subcur :opt_flag
# cptr contains the desired number of elements in the cstack
.local int cptr
cptr = 0
# Initialize bstack if needed, and set cptr to be the cstack
# size requested by the top frame.
.local pmc bstack
bstack = getattribute self, '@!bstack'
if null bstack goto bstack_new
unless bstack goto bstack_done
$I0 = elements bstack
dec $I0
cptr = bstack[$I0]
goto bstack_done
bstack_new:
bstack = new ['ResizableIntegerArray']
setattribute self, '@!bstack', bstack
bstack_done:
# If a new subcursor is being pushed, then save it in cstack
# and change cptr to include the new subcursor.
unless has_subcur goto subcur_done
.local pmc cstack
cstack = getattribute self, '@!cstack'
unless null cstack goto have_cstack
cstack = new ['ResizablePMCArray']
setattribute self, '@!cstack', cstack
have_cstack:
cstack[cptr] = subcur
inc cptr
subcur_done:
# Save our mark frame information.
push bstack, mark
push bstack, pos
push bstack, rep
push bstack, cptr
.end
=item !mark_peek(mark)
Return information about the latest frame for C<mark>.
If C<mark> is zero, return information about the latest frame.
=cut
.sub '!mark_peek' :method
.param int tomark
.local pmc bstack
bstack = getattribute self, '@!bstack'
if null bstack goto no_mark
unless bstack goto no_mark
.local int bptr
bptr = elements bstack
bptr_loop:
bptr = bptr - 4
if bptr < 0 goto no_mark
.local int rep, pos, mark, cptr
mark = bstack[bptr]
unless tomark goto bptr_done
unless mark == tomark goto bptr_loop
bptr_done:
$I0 = bptr + 1
pos = bstack[$I0]
inc $I0
rep = bstack[$I0]
inc $I0
cptr = bstack[$I0]
.return (rep, pos, mark, bptr, bstack, cptr)
no_mark:
.return (0, CURSOR_FAIL_GROUP, 0, 0, bstack, 0)
.end
=item !mark_fail(tomark)
Remove the most recent C<mark> and backtrack the cursor to the
point given by that mark. If C<mark> is zero, then
backtracks the most recent mark. Returns the backtracked
values of repetition count, cursor position, and mark (address).
=cut
.sub '!mark_fail' :method
.param int mark
# Get the frame information for C<mark>.
.local int rep, pos, mark, bptr, cptr
.local pmc bstack
(rep, pos, mark, bptr, bstack, cptr) = self.'!mark_peek'(mark)
.local pmc subcur
null subcur
# If there's no bstack, there's nothing else to do.
if null bstack goto done
# If there's a subcursor associated with this mark, return it.
unless cptr > 0 goto cstack_done
.local pmc cstack
cstack = getattribute self, '@!cstack'
dec cptr
subcur = cstack[cptr]
# Set the cstack to the size requested by the soon-to-be-top mark frame.
unless bptr > 0 goto cstack_zero
$I0 = bptr - 1
$I0 = bstack[$I0]
assign cstack, $I0
goto cstack_done
cstack_zero:
assign cstack, 0
cstack_done:
# Pop the current mark frame and all above it.
assign bstack, bptr
done:
.return (rep, pos, mark, subcur)
.end
=item !mark_commit(mark)
Like C<!mark_fail> above this backtracks the cursor to C<mark>
(releasing any intermediate marks), but preserves the current
capture states.
=cut
.sub '!mark_commit' :method
.param int mark
# find mark
.local int rep, pos, mark, bptr, cptr
.local pmc bstack
(rep, pos, mark, bptr, bstack) = self.'!mark_peek'(mark)
# get current cstack size into cptr
if null bstack goto done
unless bstack goto done
$I0 = elements bstack
dec $I0
cptr = bstack[$I0]
# Pop the mark frame and everything above it.
assign bstack, bptr
# If we don't need to hold any cstack information, we're done.
unless cptr > 0 goto done
# If the top frame is an auto-fail frame, (re)use it to hold
# our needed cptr, otherwise create a new auto-fail frame to do it.
unless bptr > 0 goto cstack_push
$I0 = bptr - 3 # pos is at top-3
$I1 = bstack[$I0]
unless $I1 < 0 goto cstack_push
$I0 = bptr - 1 # cptr is at top-1
bstack[$I0] = cptr
goto done
cstack_push:
push bstack, 0 # mark
push bstack, CURSOR_FAIL # pos
push bstack, 0 # rep
push bstack, cptr # cptr
done:
.return (rep, pos, mark)
.end
=item !reduce(name [, key])
Perform any action associated with the current regex match.
=cut
.sub '!reduce' :method
.param string name
.param string key :optional
.param int has_key :opt_flag
.local pmc action
action = getattribute self, '$!action'
if null action goto action_done
$I0 = can action, name
unless $I0 goto action_done
.local pmc match
match = self.'MATCH'()
if has_key goto action_key
action.name(match)
goto action_done
action_key:
.tailcall action.name(match, key)
action_done:
.return ()
.end
=item !BACKREF(name)
Match the backreference given by C<name>.
=cut
.sub '!BACKREF' :method
.param string name
.local pmc cur
.local int pos, eos
.local string tgt
(cur, pos, tgt) = self.'!cursor_start'()
# search the cursor cstack for the latest occurrence of C<name>
.local pmc cstack
cstack = getattribute self, '@!cstack'
if null cstack goto pass
.local int cstack_it
cstack_it = elements cstack
cstack_loop:
dec cstack_it
unless cstack_it >= 0 goto pass
.local pmc subcur
subcur = cstack[cstack_it]
$P0 = getattribute subcur, '$!names'
if null $P0 goto cstack_loop
$S0 = $P0
if name != $S0 goto cstack_loop
# we found a matching subcursor, get the literal it matched
cstack_done:
.local int litlen
.local string litstr
$I1 = subcur.'pos'()
$I0 = subcur.'from'()
litlen = $I1 - $I0
litstr = substr tgt, $I0, litlen
# now test the literal against our target
$S0 = substr tgt, pos, litlen
unless $S0 == litstr goto fail
pos += litlen
pass:
cur.'!cursor_pass'(pos, '')
fail:
.return (cur)
.end
=back
=head2 Vtable functions
=over 4
=item get_bool
=cut
.sub '' :vtable('get_bool') :method
.local pmc match
match = getattribute self, '$!match'
if null match goto false
$I0 = istrue match
.return ($I0)
false:
.return (0)
.end
=head1 AUTHORS
Patrick Michaud <pmichaud@pobox.com> is the author and maintainer.
=cut
# Local Variables:
# mode: pir
# fill-column: 100
# End:
# vim: expandtab shiftwidth=4 ft=pir: