Skip to content

Commit

Permalink
Implement capturing for quantifiers and protoregexes
Browse files Browse the repository at this point in the history
  • Loading branch information
sorear committed Sep 13, 2010
1 parent bedd19a commit 064e443
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 55 deletions.
8 changes: 6 additions & 2 deletions lib/Cursor.cs
Expand Up @@ -239,12 +239,16 @@ public sealed class RxFrame {
public static DynMetaObject GatherIteratorMO;
public static IP6 EMPTYP;
public Frame End(Frame th) {
return End(th, MakeMatch());
}
// currently just used for protoregex
public Frame End(Frame th, Cursor m) {
if (return_one) {
return Kernel.Take(th, Kernel.NewROScalar(MakeMatch()));
return Kernel.Take(th, Kernel.NewROScalar(m));
} else {
return_one = true;
VarDeque ks = new VarDeque();
ks.Push(Kernel.NewROScalar(MakeMatch()));
ks.Push(Kernel.NewROScalar(m));
DynObject it = new DynObject(GatherIteratorMO);
it.SetSlot("reify", Kernel.NewRWScalar(Kernel.AnyP));
it.SetSlot("frame", Kernel.NewRWScalar(th));
Expand Down
8 changes: 8 additions & 0 deletions lib/SAFE.setting
Expand Up @@ -829,9 +829,17 @@ my class Cursor {
method orig { Q:CgOp {
(box Str (getfield backing (cast Cursor (@ {self})))) } }
method ws() { Q:CgOp { (rawcall (cast Cursor (@ {self})) SimpleWS) } }
method at-key($k) { Q:CgOp {
(rawcall (cast Cursor (@ {self})) GetKey (unbox String (@ {$k.Str})))
} }
method at-pos($i) { self.at-key($i) }
}

my class Match {
method at-key($k) { Q:CgOp {
(rawcall (cast Cursor (@ {self})) GetKey (unbox String (@ {$k.Str})))
} }
method at-pos($i) { self.at-key($i) }
method new($) { die "Match.new NYI" }
method from { Q:CgOp { (box Num (cast Double (getfield from
(cast Cursor (@ {self}))))) } }
Expand Down
13 changes: 11 additions & 2 deletions src/Op.pm
Expand Up @@ -1049,14 +1049,23 @@ use CgOp;
my ($self, $body) = @_;

local $::symtext = $self->sym;
my @mcaps;
local $::in_quant = 0;
my $u = $self->rxop->used_caps;
for (keys %$u) {
push @mcaps, CgOp::clr_string($_) if $u->{$_} >= 2;
}

CgOp::prog(
CgOp::setfield('rx', CgOp::callframe,
CgOp::rawnew('RxFrame', CgOp::clr_string($self->name),
CgOp::cast('Cursor', CgOp::fetch(CgOp::scopedlex(''))))),
CgOp::rawcall(CgOp::rxframe, 'PushCapture',
CgOp::const(CgOp::rawnewarr('String', @mcaps)),
CgOp::null('Cursor')),
$self->rxop->code($body),
CgOp::rawccall(CgOp::getfield('rx', CgOp::callframe), 'End'),
CgOp::rawccall(CgOp::getfield('rx', CgOp::callframe), 'Backtrack'),
CgOp::rawccall(CgOp::rxframe, 'End'),
CgOp::rawccall(CgOp::rxframe, 'Backtrack'),
CgOp::null('Variable'));
}

Expand Down
38 changes: 32 additions & 6 deletions src/RxOp.pm
Expand Up @@ -13,6 +13,18 @@ use CgOp;

sub opzyg { map { $_->opzyg } @{ $_[0]->zyg } }

# all that matters is 0-1-infty; $*in_quant valid here
sub used_caps {
my %r;
for my $k (@{ $_[0]->zyg }) {
my $re = $k->used_caps;
for my $cn (keys %$re) {
$r{$cn} += $re->{$cn};
}
}
\%r;
}

my $nlabel = 0;
sub label { "b" . ($nlabel++) }

Expand Down Expand Up @@ -55,6 +67,8 @@ use CgOp;
has min => (isa => 'Int', is => 'ro', required => 1);
has max => (isa => 'Maybe[Int]', is => 'ro', default => undef);

sub used_caps { local $::in_quant = 1; $_[0]->zyg->[0]->used_caps }

sub code {
my ($self, $body) = @_;
my @code;
Expand Down Expand Up @@ -289,6 +303,11 @@ use CgOp;
has arglist => (isa => 'Maybe[ArrayRef[Op]]', is => 'ro');
has selfcut => (isa => 'Bool', is => 'ro', default => 0);

sub used_caps {
my ($self) = @_;
+{ map { $_ => $::in_quant ? 2 : 1 } @{ $self->captures } };
}

sub true {
my ($self) = @_;
# all not quite right in the capturey case
Expand Down Expand Up @@ -430,6 +449,17 @@ use CgOp;
use Moose;
extends 'RxOp';

sub used_caps {
my %used;
for my $x (@{ $_[0]->zyg }) {
my $used_br = $x->used_caps;
for my $y (keys %$used_br) {
$used{$y} = $used_br->{$y} if $used_br->{$y} > ($used{$y} // 0);
}
}
\%used;
}

sub code {
my ($self, $body) = @_;
my @ls = map { $self->label } @{ $self->zyg };
Expand Down Expand Up @@ -521,12 +551,8 @@ use CgOp;
CgOp::whileloop(0, 0,
CgOp::unbox('Boolean', CgOp::fetch(
CgOp::methodcall(CgOp::letvar('ks'), 'Bool'))),
CgOp::prog(
CgOp::rawcall(CgOp::rxframe, 'SetPos',
CgOp::getfield('pos', CgOp::cast('Cursor',
CgOp::fetch(CgOp::methodcall(CgOp::letvar('ks'),
'shift'))))),
CgOp::rawccall(CgOp::rxframe, 'End')))))),
CgOp::rawccall(CgOp::rxframe, 'End', CgOp::cast('Cursor',
CgOp::fetch(CgOp::methodcall(CgOp::letvar('ks'), 'shift')))))))),
CgOp::rawccall(CgOp::rxframe, 'Backtrack'));
}

Expand Down
74 changes: 73 additions & 1 deletion test.pl
Expand Up @@ -2,7 +2,7 @@

use Test;

plan 430;
plan 454;

ok 1, "one is true";
ok 2, "two is also true";
Expand Down Expand Up @@ -957,3 +957,75 @@
is $ma.to, 4, '$ma.to works';
is $ma.Str, '29', '$ma.Str works';
}
{
use MONKEY_TYPING;
my class Foo {
method foo { 1 }
}
is Foo.foo, 2, "augments run early";
is Any.g4077, 4077, "can augment core classes";
is Cool.g4077, 4077, "augments visible in subclasses";
augment class Foo {
method foo { 2 }
}
augment class Any {
method g4077 { 4077 }
}
}
{
my $ma = (grammar {
token TOP { <foo> <bar> }
token foo { \d+ }
token bar { \D+ }
}).parse("123abc");
ok $ma<foo> ~~ Match, "<foo> sub is a Match";
is $ma<foo>, "123", "<foo> sub got 123";
ok $ma<bar> ~~ Match, "<bar> sub is a Match";
is $ma<bar>, "abc", "<bar> sub got 123";
ok !$ma<quux>, "no <quux> sub";
my $mb = (grammar {
token TOP { <foo> <foo> }
token foo { \D+ | \d+ }
}).parse("def456");
ok $mb.defined, "grammar b matched";
ok $mb<foo> ~~ List, "<foo> sub isa List";
is +$mb<foo>, 2, "2 matches";
ok $mb<foo>[0] ~~ Match, "<foo>[0] sub isa Match";
is $mb<foo>[0], "def", "<foo>[0] got def";
is $mb<foo>[1], "456", "<foo>[1] got 456";
my $mc = (grammar {
token TOP { <foo>+ }
token foo { \D+ | \d+ }
}).parse("def");
ok $mc<foo> ~~ List, "<foo>+ makes a list";
is +$mc<foo>, 1, "despite only one element";
my $md = (grammar {
token TOP { \D+ <foo> \D+ }
token foo { \d+ <bar> \d+ }
token bar { \D+ }
}).parse("a1b2c");
is $md<foo><bar>, "b", "<foo><bar> works";
my $mf = (grammar {
proto token TOP {*}
token TOP:x { <foo> }
token foo { \w+ }
}).parse("abc");
is $mf<foo>, "abc", "protoregex captures work";
my $me = (grammar {
token TOP { <tok>+ }
proto token tok {*}
token tok:num { <sign>? <digit>+ }
token tok:ws { \s+ }
token sign { \+ | \- }
token digit { \d | _ }
}).parse("2 -34 5");
ok $me<tok> ~~ List, "a list of tokens";
is +$me<tok>, 5, "5 of them";
is +$me<tok>[0]<sign>, 0, "first no sign";
is +$me<tok>[2]<sign>, 1, "third sign";
is +$me<tok>[4]<sign>, 0, "fifth no sign";
is $me<tok>[2], '-34', "3rd token '-34'";
}
44 changes: 0 additions & 44 deletions test2.pl
@@ -1,48 +1,4 @@
# vim: ft=perl6
use Test;

{
use MONKEY_TYPING;
my class Foo {
method foo { 1 }
}
is Foo.foo, 2, "augments run early";
is Any.g4077, 4077, "can augment core classes";
is Cool.g4077, 4077, "augments visible in subclasses";

augment class Foo {
method foo { 2 }
}
augment class Any {
method g4077 { 4077 }
}
}

use MONKEY_TYPING;
augment class Match {
method at-key($k) { Q:CgOp {
(rawcall (cast Cursor (@ {self})) GetKey (unbox String (@ {$k.Str})))
} }
method at-pos($i) { self.at-key($i) }
}
augment class Cursor {
method at-key($k) { Q:CgOp {
(rawcall (cast Cursor (@ {self})) GetKey (unbox String (@ {$k.Str})))
} }
method at-pos($i) { self.at-key($i) }
}

{
my $ma = (grammar {
regex TOP { <foo> <bar> }
regex foo { \d+ }
regex bar { \w+ }
}).parse("123abc");
ok $ma<foo> ~~ Match, "<foo> sub is a Match";
is $ma<foo>, "123", "<foo> sub got 123";
ok $ma<bar> ~~ Match, "<bar> sub is a Match";
is $ma<bar>, "abc", "<bar> sub got 123";
ok !$ma<quux>, "no <quux> sub";
}

done-testing;

0 comments on commit 064e443

Please sign in to comment.