Skip to content

Commit

Permalink
Implement character classes
Browse files Browse the repository at this point in the history
  • Loading branch information
sorear committed Aug 25, 2010
1 parent 2667d2b commit 6cc2651
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 36 deletions.
11 changes: 11 additions & 0 deletions lib/Cursor.cs
Expand Up @@ -63,6 +63,14 @@ public class Cursor {
}
}

public Cursor CClass(CC cc) {
if (backing.Length - 1 >= pos && cc.Accepts(backing[pos])) {
return At(pos + 1);
} else {
return null;
}
}

public Cursor SetCaps(Matched caps) {
return new Cursor(caps, backing, pos);
}
Expand Down Expand Up @@ -99,6 +107,9 @@ public sealed class CC {
int l = 0;
int h = vec.Length / 2;

if (h == 0 || ch < vec[0])
return false;

while (h - l > 1) {
int m = l + (h - l) / 2;
if (vec[m * 2] > ch) {
Expand Down
21 changes: 21 additions & 0 deletions lib/SAFE.setting
Expand Up @@ -905,6 +905,27 @@ sub _rxstr($C, $str, $k) {
};
}
sub _rxdot($C, $k) {
Q:CgOp {
(letn rt (rawcall (unbox Cursor (@ (l $C))) AnyChar)
[ternary
(!= (l rt) (null Cursor))
(subcall (@ (l $k)) (box (@ (l $C)) (l rt)))
(null Variable)])
};
}
sub _rxcc($C, $cc, $k) {
Q:CgOp {
(letn rt (rawcall (unbox Cursor (@ {$C})) CClass
(unwrap CC (@ {$cc})))
[ternary
(!= (l rt) (null Cursor))
(subcall (@ {$k}) (box (@ {$C}) (l rt)))
(null Variable)])
};
}
sub _rxcut($C, $f, $k) {
my @l := gather $f($C, &take);
@l && $k(@l.shift);
Expand Down
1 change: 1 addition & 0 deletions src/CodeGen.pm
Expand Up @@ -65,6 +65,7 @@ use 5.010;
{ At => [m => 'Cursor'],
Exact => [m => 'Cursor'],
AnyChar => [m => 'Cursor'],
CClass => [m => 'Cursor'],
pos => [f => 'Int32'],
backing => [f => 'String'],
captures => [f => 'Matched'],
Expand Down
4 changes: 2 additions & 2 deletions src/Niecza/Actions.pm
Expand Up @@ -539,7 +539,7 @@ sub metachar__S_Lt_Gt { my ($cl, $M) = @_;
sub metachar__S_Back { my ($cl, $M) = @_;
my $cc = $M->{backslash}{_ast};
$M->{_ast} = ref($cc) ?
RxOp::CClass->new(cc => $cc,
RxOp::CClassElem->new(cc => $cc,
igcase => $::RX{i}, igmark => $::RX{a}) :
RxOp::String->new(text => $cc,
igcase => $::RX{i}, igmark => $::RX{a});
Expand Down Expand Up @@ -609,7 +609,7 @@ sub do_cclass { my ($cl, $M) = @_;
RxOp::CallMethod->new(name => $_->{name}->Str); # assumes no capture

if ($sign) {
$rxop = $rxop ? RxOp::SeqAlt->new(zyg => [ $exp, $rxop ]) : $rxop;
$rxop = $rxop ? RxOp::SeqAlt->new(zyg => [ $exp, $rxop ]) : $exp;
} else {
$rxop = RxOp::Sequence->new(zyg => [
RxOp::NotBefore->new(zyg => [ $exp ]),
Expand Down
25 changes: 25 additions & 0 deletions src/RxOp.pm
Expand Up @@ -460,6 +460,31 @@ use CgOp;

has cc => (isa => 'CClass', is => 'ro', required => 1);

# TODO: some kind of constant table
sub ccop {
my ($self) = @_;
my @ints = @{ $self->cc };
CgOp::rawnew('CC', CgOp::rawnewarr('int',
map { CgOp::int($_) } @ints));
}

sub op {
my ($self, $cn, $cont) = @_;
my $icn = Niecza::Actions->gensym;
$icn, Op::CallSub->new(
invocant => Op::Lexical->new(name => '&_rxcc'),
positionals => [
Op::Lexical->new(name => $icn),
Op::CgOp->new(op => CgOp::wrap($self->ccop)),
$self->_close_k($cn, $cont)
]);
}

sub lad {
my ($self) = @_;
CgOp::rawnew('LADCC', $self->ccop);
}

__PACKAGE__->meta->make_immutable;
no Moose;
}
Expand Down
26 changes: 25 additions & 1 deletion test.pl
Expand Up @@ -2,7 +2,7 @@

use Test;

plan 347;
plan 386;

ok 1, "one is true";
ok 2, "two is also true";
Expand Down Expand Up @@ -881,3 +881,27 @@
sub foo(Str $x) { $x ~ $x }
is foo("bar"), "barbar", "can parse type constraints";
}
sub rxtest($rgx, $rgxname, @y, @n) {
for @y {
my $k = $_ ~~ Pair ?? $_.key !! $_;
my $v = $_ ~~ Pair ?? $_.value !! $_;
ok $k ~~ $rgx, "$rgxname ~~ $v";
}
for @n {
my $k = $_ ~~ Pair ?? $_.key !! $_;
my $v = $_ ~~ Pair ?? $_.value !! $_;
ok !($k ~~ $rgx), "$rgxname !~~ $v";
}
}
rxtest /x.y/, "x.y", ("xay", "x y"), ("xy", "xaay");
rxtest /<!>/, '<!>', Nil, ("", "x");
rxtest /\s/, '\s', (" ", ("\n" => '\n'), ("\r" => '\r'), "\x3000"),
("x", "1", "+");
rxtest /\S/, '\S', ("x", "1", "+"),
(" ", ("\n" => '\n'), ("\r" => '\r'), ("\x3000" => 'id space'));
rxtest /\w/, '\w', ("x", "1", "_", "\x4E00"), ("+", " ");
rxtest /<[ y ]>/, '<[ y ]>', ("y"), (" ", "x", "z");
rxtest /<[ i .. k ]>/, '<[ i .. k ]>', ("i", "j", "k"), ("h", "l");
rxtest /<[ \W a..z ]>/, '<[\W a..z]>', ("a", "z", "+"), ("\x4E00");
33 changes: 0 additions & 33 deletions test2.pl
@@ -1,37 +1,4 @@
# vim: ft=perl6
use Test;

sub rxtest($rgx, $rgxname, @y, @n) {
for @y {
ok $_ ~~ $rgx, "$rgxname ~~ $_";
}
for @n {
ok !($_ ~~ $rgx), "$rgxname !~~ $_";
}
}

sub _rxdot($C, $k) {
Q:CgOp {
(letn rt (rawcall (unbox Cursor (@ (l $C))) AnyChar)
[ternary
(!= (l rt) (null Cursor))
(subcall (@ (l $k)) (box (@ (l $C)) (l rt)))
(null Variable)])
};
}
sub _rxcc($C, $cc, $k) {
Q:CgOp {
(letn rt (rawcall (unbox Cursor (@ {$C})) CClass
(unwrap CC (@ {$cc})))
[ternary
(!= (l rt) (null Cursor))
(subcall (@ {$k}) (box (@ {$C}) (l rt)))
(null Variable)])
};
}

rxtest /x.y/, "x.y", ("xay", "x y"), ("xy", "xaay");
rxtest /<!>/, '<!>', Nil, ("", "x");

done-testing;

0 comments on commit 6cc2651

Please sign in to comment.