From 064e443fda325a34508ad68acf7a29329215ed41 Mon Sep 17 00:00:00 2001 From: Stefan O'Rear Date: Mon, 13 Sep 2010 00:27:14 -0700 Subject: [PATCH] Implement capturing for quantifiers and protoregexes --- lib/Cursor.cs | 8 ++++-- lib/SAFE.setting | 8 ++++++ src/Op.pm | 13 +++++++-- src/RxOp.pm | 38 +++++++++++++++++++++---- test.pl | 74 +++++++++++++++++++++++++++++++++++++++++++++++- test2.pl | 44 ---------------------------- 6 files changed, 130 insertions(+), 55 deletions(-) diff --git a/lib/Cursor.cs b/lib/Cursor.cs index cf6a2979..cd948aec 100644 --- a/lib/Cursor.cs +++ b/lib/Cursor.cs @@ -239,12 +239,16 @@ public sealed class RxFrame { public static DynMetaObject GatherIteratorMO; public static IP6 EMPTYP; public Frame End(Frame th) { + return End(th, MakeMatch()); + } + // currently just used for protoregex + public Frame End(Frame th, Cursor m) { if (return_one) { - return Kernel.Take(th, Kernel.NewROScalar(MakeMatch())); + return Kernel.Take(th, Kernel.NewROScalar(m)); } else { return_one = true; VarDeque ks = new VarDeque(); - ks.Push(Kernel.NewROScalar(MakeMatch())); + ks.Push(Kernel.NewROScalar(m)); DynObject it = new DynObject(GatherIteratorMO); it.SetSlot("reify", Kernel.NewRWScalar(Kernel.AnyP)); it.SetSlot("frame", Kernel.NewRWScalar(th)); diff --git a/lib/SAFE.setting b/lib/SAFE.setting index c747dde1..43b65d14 100644 --- a/lib/SAFE.setting +++ b/lib/SAFE.setting @@ -829,9 +829,17 @@ my class Cursor { method orig { Q:CgOp { (box Str (getfield backing (cast Cursor (@ {self})))) } } method ws() { Q:CgOp { (rawcall (cast Cursor (@ {self})) SimpleWS) } } + method at-key($k) { Q:CgOp { + (rawcall (cast Cursor (@ {self})) GetKey (unbox String (@ {$k.Str}))) + } } + method at-pos($i) { self.at-key($i) } } my class Match { + method at-key($k) { Q:CgOp { + (rawcall (cast Cursor (@ {self})) GetKey (unbox String (@ {$k.Str}))) + } } + method at-pos($i) { self.at-key($i) } method new($) { die "Match.new NYI" } method from { Q:CgOp { (box Num (cast Double (getfield from (cast Cursor (@ {self}))))) } } diff --git a/src/Op.pm b/src/Op.pm index ddd084d4..3160869a 100644 --- a/src/Op.pm +++ b/src/Op.pm @@ -1049,14 +1049,23 @@ use CgOp; my ($self, $body) = @_; local $::symtext = $self->sym; + my @mcaps; + local $::in_quant = 0; + my $u = $self->rxop->used_caps; + for (keys %$u) { + push @mcaps, CgOp::clr_string($_) if $u->{$_} >= 2; + } CgOp::prog( CgOp::setfield('rx', CgOp::callframe, CgOp::rawnew('RxFrame', CgOp::clr_string($self->name), CgOp::cast('Cursor', CgOp::fetch(CgOp::scopedlex('$ยข'))))), + CgOp::rawcall(CgOp::rxframe, 'PushCapture', + CgOp::const(CgOp::rawnewarr('String', @mcaps)), + CgOp::null('Cursor')), $self->rxop->code($body), - CgOp::rawccall(CgOp::getfield('rx', CgOp::callframe), 'End'), - CgOp::rawccall(CgOp::getfield('rx', CgOp::callframe), 'Backtrack'), + CgOp::rawccall(CgOp::rxframe, 'End'), + CgOp::rawccall(CgOp::rxframe, 'Backtrack'), CgOp::null('Variable')); } diff --git a/src/RxOp.pm b/src/RxOp.pm index e6fc4b2f..4cca7df8 100644 --- a/src/RxOp.pm +++ b/src/RxOp.pm @@ -13,6 +13,18 @@ use CgOp; sub opzyg { map { $_->opzyg } @{ $_[0]->zyg } } + # all that matters is 0-1-infty; $*in_quant valid here + sub used_caps { + my %r; + for my $k (@{ $_[0]->zyg }) { + my $re = $k->used_caps; + for my $cn (keys %$re) { + $r{$cn} += $re->{$cn}; + } + } + \%r; + } + my $nlabel = 0; sub label { "b" . ($nlabel++) } @@ -55,6 +67,8 @@ use CgOp; has min => (isa => 'Int', is => 'ro', required => 1); has max => (isa => 'Maybe[Int]', is => 'ro', default => undef); + sub used_caps { local $::in_quant = 1; $_[0]->zyg->[0]->used_caps } + sub code { my ($self, $body) = @_; my @code; @@ -289,6 +303,11 @@ use CgOp; has arglist => (isa => 'Maybe[ArrayRef[Op]]', is => 'ro'); has selfcut => (isa => 'Bool', is => 'ro', default => 0); + sub used_caps { + my ($self) = @_; + +{ map { $_ => $::in_quant ? 2 : 1 } @{ $self->captures } }; + } + sub true { my ($self) = @_; # all not quite right in the capturey case @@ -430,6 +449,17 @@ use CgOp; use Moose; extends 'RxOp'; + sub used_caps { + my %used; + for my $x (@{ $_[0]->zyg }) { + my $used_br = $x->used_caps; + for my $y (keys %$used_br) { + $used{$y} = $used_br->{$y} if $used_br->{$y} > ($used{$y} // 0); + } + } + \%used; + } + sub code { my ($self, $body) = @_; my @ls = map { $self->label } @{ $self->zyg }; @@ -521,12 +551,8 @@ use CgOp; CgOp::whileloop(0, 0, CgOp::unbox('Boolean', CgOp::fetch( CgOp::methodcall(CgOp::letvar('ks'), 'Bool'))), - CgOp::prog( - CgOp::rawcall(CgOp::rxframe, 'SetPos', - CgOp::getfield('pos', CgOp::cast('Cursor', - CgOp::fetch(CgOp::methodcall(CgOp::letvar('ks'), - 'shift'))))), - CgOp::rawccall(CgOp::rxframe, 'End')))))), + CgOp::rawccall(CgOp::rxframe, 'End', CgOp::cast('Cursor', + CgOp::fetch(CgOp::methodcall(CgOp::letvar('ks'), 'shift')))))))), CgOp::rawccall(CgOp::rxframe, 'Backtrack')); } diff --git a/test.pl b/test.pl index ed3b265c..b01a2a09 100644 --- a/test.pl +++ b/test.pl @@ -2,7 +2,7 @@ use Test; -plan 430; +plan 454; ok 1, "one is true"; ok 2, "two is also true"; @@ -957,3 +957,75 @@ is $ma.to, 4, '$ma.to works'; is $ma.Str, '29', '$ma.Str works'; } + +{ + use MONKEY_TYPING; + my class Foo { + method foo { 1 } + } + is Foo.foo, 2, "augments run early"; + is Any.g4077, 4077, "can augment core classes"; + is Cool.g4077, 4077, "augments visible in subclasses"; + + augment class Foo { + method foo { 2 } + } + augment class Any { + method g4077 { 4077 } + } +} + +{ + my $ma = (grammar { + token TOP { } + token foo { \d+ } + token bar { \D+ } + }).parse("123abc"); + ok $ma ~~ Match, " sub is a Match"; + is $ma, "123", " sub got 123"; + ok $ma ~~ Match, " sub is a Match"; + is $ma, "abc", " sub got 123"; + ok !$ma, "no sub"; + my $mb = (grammar { + token TOP { } + token foo { \D+ | \d+ } + }).parse("def456"); + ok $mb.defined, "grammar b matched"; + ok $mb ~~ List, " sub isa List"; + is +$mb, 2, "2 matches"; + ok $mb[0] ~~ Match, "[0] sub isa Match"; + is $mb[0], "def", "[0] got def"; + is $mb[1], "456", "[1] got 456"; + my $mc = (grammar { + token TOP { + } + token foo { \D+ | \d+ } + }).parse("def"); + ok $mc ~~ List, "+ makes a list"; + is +$mc, 1, "despite only one element"; + my $md = (grammar { + token TOP { \D+ \D+ } + token foo { \d+ \d+ } + token bar { \D+ } + }).parse("a1b2c"); + is $md, "b", " works"; + my $mf = (grammar { + proto token TOP {*} + token TOP:x { } + token foo { \w+ } + }).parse("abc"); + is $mf, "abc", "protoregex captures work"; + my $me = (grammar { + token TOP { + } + proto token tok {*} + token tok:num { ? + } + token tok:ws { \s+ } + token sign { \+ | \- } + token digit { \d | _ } + }).parse("2 -34 5"); + ok $me ~~ List, "a list of tokens"; + is +$me, 5, "5 of them"; + is +$me[0], 0, "first no sign"; + is +$me[2], 1, "third sign"; + is +$me[4], 0, "fifth no sign"; + is $me[2], '-34', "3rd token '-34'"; +} diff --git a/test2.pl b/test2.pl index ea9ca337..a82920e2 100644 --- a/test2.pl +++ b/test2.pl @@ -1,48 +1,4 @@ # vim: ft=perl6 use Test; -{ - use MONKEY_TYPING; - my class Foo { - method foo { 1 } - } - is Foo.foo, 2, "augments run early"; - is Any.g4077, 4077, "can augment core classes"; - is Cool.g4077, 4077, "augments visible in subclasses"; - - augment class Foo { - method foo { 2 } - } - augment class Any { - method g4077 { 4077 } - } -} - -use MONKEY_TYPING; -augment class Match { - method at-key($k) { Q:CgOp { - (rawcall (cast Cursor (@ {self})) GetKey (unbox String (@ {$k.Str}))) - } } - method at-pos($i) { self.at-key($i) } -} -augment class Cursor { - method at-key($k) { Q:CgOp { - (rawcall (cast Cursor (@ {self})) GetKey (unbox String (@ {$k.Str}))) - } } - method at-pos($i) { self.at-key($i) } -} - -{ - my $ma = (grammar { - regex TOP { } - regex foo { \d+ } - regex bar { \w+ } - }).parse("123abc"); - ok $ma ~~ Match, " sub is a Match"; - is $ma, "123", " sub got 123"; - ok $ma ~~ Match, " sub is a Match"; - is $ma, "abc", " sub got 123"; - ok !$ma, "no sub"; -} - done-testing;