Skip to content

Commit

Permalink
Hook up LTM engine to regex processing
Browse files Browse the repository at this point in the history
  • Loading branch information
sorear committed Aug 13, 2010
1 parent 052ee48 commit cd7f153
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 39 deletions.
6 changes: 6 additions & 0 deletions CgOp.pm
Expand Up @@ -566,6 +566,12 @@ use warnings;
zyg => \@args);
}

sub rawnewarr {
my ($name, @args) = @_;
CgOp::Primitive->new(op => [ 'clr_new_arr', $name, scalar @args ],
zyg => \@args);
}

# must only be called during to_cgop phase!
sub protosub {
my ($body, @extra) = @_;
Expand Down
8 changes: 8 additions & 0 deletions CodeGen.pm
Expand Up @@ -63,6 +63,8 @@ use 5.010;
SetCaps => [m => 'Cursor'],
SimpleWS => [m => 'Cursor'],
Bind => [m => 'Cursor'] },
'Lexer' =>
{ Run => [m => 'Int32[]'] },
'System.IO.File.ReadAllText' => [m => 'System.String'],
Expand Down Expand Up @@ -417,6 +419,12 @@ use 5.010;
$self->_push($class, "new $class(" . join(", ", @args) . ")");
}

sub clr_new_arr {
my ($self, $class, $nitems) = @_;
my @args = reverse map { ($self->_popn(1))[0] } 1 .. $nitems;
$self->_push($class . "[]", "new $class []{" . join(", ", @args) . "}");
}

sub clr_string {
my ($self, $text) = @_;
$self->_push('System.String', qm($text));
Expand Down
25 changes: 24 additions & 1 deletion Cursor.cs
Expand Up @@ -326,6 +326,28 @@ public class LADSequence : LAD {
}
}

public class LADImp : LAD {
public override void ToNFA(NFA pad, int from, int to) {
int knot = pad.AddNode();
pad.nodes[knot].final = true;
pad.AddEdge(from, knot, null);
}

public override void Dump(int indent) {
Console.WriteLine(new string(' ', indent) + "imp");
}
}

public class LADNull : LAD {
public override void ToNFA(NFA pad, int from, int to) {
pad.AddEdge(from, to, null);
}

public override void Dump(int indent) {
Console.WriteLine(new string(' ', indent) + "null");
}
}

// These objects get put in hash tables, so don't change nstates[] after
// that happens
public class LexerState {
Expand Down Expand Up @@ -403,7 +425,8 @@ public class Lexer {
public NFA pad = new NFA();
public string tag;

public static bool LtmTrace = true;
public static bool LtmTrace =
Environment.GetEnvironmentVariable("NIECZA_LTM_TRACE") != null;

public Lexer(string tag, LAD[] alts) {
this.alts = alts;
Expand Down
20 changes: 20 additions & 0 deletions Niecza/Actions.pm
Expand Up @@ -355,6 +355,23 @@ sub quant_atom_list { my ($cl, $M) = @_;
[ map { $_->{_ast} } @{ $M->{quantified_atom} } ]);
}

my %LISTrx_types = (
'&' => 'RxOp::Conj',
'|' => 'RxOp::Alt',
'&&' => 'RxOp::SeqConj',
'||' => 'RxOp::SeqAlt',
);
sub LISTrx { my ($cl, $M) = @_;
$M->{_ast} = $LISTrx_types{$M->{delims}[0]{sym}}->new(zyg =>
[ map { $_->{_ast} } @{ $M->{list} } ]);
}

sub regex_infix {}
sub regex_infix__S_Vert {}
sub regex_infix__S_VertVert {}
sub regex_infix__S_Amp {}
sub regex_infix__S_AmpAmp {}

sub metachar {}
sub metachar__S_sigwhite { my ($cl, $M) = @_;
$M->{_ast} = $::RX{s} ? RxOp::Sigspace->new : RxOp::Sequence->new;
Expand Down Expand Up @@ -663,6 +680,9 @@ my %loose2tight = (
'orelse' => '//', 'and' => '&&', 'or' => '||',
);
sub LIST { my ($cl, $M) = @_;
if ($M->isa('STD::Regex')) {
goto &LISTrx;
}
# STD guarantees that all elements of delims have the same sym
# the last item may have an ast of undef due to nulltermish
my $op = $M->{delims}[0]{sym};
Expand Down
92 changes: 92 additions & 0 deletions RxOp.pm
Expand Up @@ -79,6 +79,11 @@ use CgOp;
]);
}

sub lad {
my ($self) = @_;
CgOp::rawnew('LADStr', CgOp::clr_string($self->text));
}

__PACKAGE__->meta->make_immutable;
no Moose;
}
Expand Down Expand Up @@ -106,6 +111,13 @@ use CgOp;
$self->_close_k($cn, $cont)]);
}

sub lad {
my ($self) = @_;
$self->minimal ? CgOp::rawnew('LADImp') :
CgOp::rawnew('LAD' . ucfirst($qf{$self->type}),
$self->zyg->[0]->lad);
}

__PACKAGE__->meta->make_immutable;
no Moose;
}
Expand All @@ -127,6 +139,37 @@ use CgOp;
$cn, $cont;
}

sub lad {
my ($self) = @_;
my @z = map { $_->lad } @{ $self->zyg };
while (@z >= 2) {
my $x = pop @z;
$z[-1] = CgOp::rawnew('LADSequence', $z[-1], $x);
}
$z[0] // CgOp::rawnew('LADNull');
}


__PACKAGE__->meta->make_immutable;
no Moose;
}

{
package RxOp::ConfineLang;
use Moose;
extends 'RxOp';

# TODO once :lang is implemented, this will be a bit more complicated
sub op {
my ($self, $cn, $cont) = @_;
$self->zyg->[0]->op($cn, $cont);
}

sub lad {
my ($self) = @_;
$self->zyg->[0]->lad;
}

__PACKAGE__->meta->make_immutable;
no Moose;
}
Expand All @@ -150,6 +193,11 @@ use CgOp;
$self->_close_k($cn, $cont)]);
}

sub lad {
my ($self) = @_;
$self->zyg->[0]->lad;
}

__PACKAGE__->meta->make_immutable;
no Moose;
}
Expand Down Expand Up @@ -183,6 +231,11 @@ use CgOp;
$self->_close_k($cn, $cont)]);
}

sub lad {
my ($self) = @_;
$self->zyg->[0]->lad;
}

__PACKAGE__->meta->make_immutable;
no Moose;
}
Expand All @@ -205,6 +258,11 @@ use CgOp;
$self->_close_k($cn, $cont)]);
}

sub lad {
my ($self) = @_;
CgOp::rawnew('LADMethod', CgOp::clr_string($self->name));
}

__PACKAGE__->meta->make_immutable;
no Moose;
}
Expand All @@ -225,6 +283,40 @@ use CgOp;
$self->_close_k($cn, $cont)]);
}

sub lad {
my ($self) = @_;
CgOp::rawnew('LADImp');
}

__PACKAGE__->meta->make_immutable;
no Moose;
}

{
package RxOp::Alt;
use Moose;
extends 'RxOp';

sub op {
my ($self, $cn, $cont) = @_;
my $icn = Niecza::Actions->gensym;
$icn, Op::CallSub->new(
invocant => Op::Lexical->new(name => '&_rxalt'),
positionals => [
Op::Lexical->new(name => $icn),
Op::CgOp->new(op => CgOp::wrap(CgOp::rawnewarr('LAD',
map { $_->lad } @{ $self->zyg }))), #XXX
$self->_close_k($cn, $cont),
map { $self->_close_op($_) } @{ $self->zyg }
]);
}

sub lad {
my ($self) = @_;
CgOp::rawnew('LADAny', CgOp::rawnewarr('LAD',
map { $_->lad } @{ $self->zyg }));
}

__PACKAGE__->meta->make_immutable;
no Moose;
}
Expand Down
19 changes: 19 additions & 0 deletions SAFE.setting
Expand Up @@ -933,6 +933,25 @@ sub _rxcut($C, $f, $k) {
@l && $k(@l.shift);
}
sub _rxalt($C, $lad, $k, *@alts) {
sub lbody($ix) { @alts[$ix]($C, $k) }
Q:CgOp {
(letn csr (unbox Cursor (@ (l $C)))
lexer (rawnew Lexer (clr_string "")
(unwrap 'LAD[]' (@ (l $lad))))
fates (rawcall (l lexer) Run (getfield backing (l csr))
(getfield pos (l csr)))
i (int 0)
nfate (getfield Length (l fates))
(whileloop 0 0 (< (l i) (l nfate)) (prog
(sink (subcall (@ (l &lbody))
(box Num (cast Double (getindex (l i) (l fates))))))
(l i (+ (l i) (int 1)))))
(null Variable))
};
}
sub _rxcall(@list, $k) {
$k(@list.shift) while @list;
}
Expand Down
15 changes: 14 additions & 1 deletion test.pl
Expand Up @@ -2,7 +2,7 @@

use Test;

plan 276;
plan 280;

ok 1, "one is true";
ok 2, "two is also true";
Expand Down Expand Up @@ -721,3 +721,16 @@
ok !("abab" ~~ / ab <.ws> ab /), "ws does not match nothing";
ok ("ab ab" ~~ rule { ab ab }), "rule gives space";
}
{
sub meow(*@x) {
is @x[0], 'a', "can index [0] slurpies";
is @x[1], 'b', "can index [1] slurpies";
}
meow('a', 'b');
# doing a more reasonable test will probably require embedded blocks
ok "foobarx" ~~ / [ foo | foobar ]: x /, "LTM picks longest even if second";
ok "foobarx" ~~ / [ foobar | foo ]: x /, "LTM picks longest even if first";
}
37 changes: 0 additions & 37 deletions test2.pl
@@ -1,41 +1,4 @@
# vim: ft=perl6
use Test;

sub _rxcut($C, $f, $k) {
my @l := gather $f($C, &take);
@l && $k(@l.shift);
}

PRE-INIT {
Cursor.HOW.add-method("orig", anon method orig { Q:CgOp {
(box Str (getfield backing (unbox Cursor (@ (l self))))) } });
Cursor.HOW.add-method("ws", anon method ws() {
gather
Q:CgOp {
(letn rt (rawcall (unbox Cursor (@ (l self))) SimpleWS)
[ternary
(!= (l rt) (null Cursor))
(subcall (@ (l &take)) (box (@ (l self)) (l rt)))
(null Variable)])
};
});
}

ok ("aab" ~~ /a* ab/), "a*ab backtracks";
ok !("aab" ~~ /a*: ab/), "a*: ab doesn't";
ok ("aab" ~~ /a*! ab/), "a*! ab backtracks";
ok !("aab" ~~ /:r a* ab/), "ratcheting a* ab does not";
ok !("aab" ~~ /:r a*: ab/), "ratcheting a*: ab does not";
ok ("aab" ~~ /:r a*! ab/), "ratcheting a*! ab does";
ok !("aab" ~~ token { a* ab }), "a* ab in a token does not";

ok ("ab ab" ~~ / ab <.ws> ab /), "ws matches a space";
ok (q:to/end/ ~~ / ab <.ws> ab /), "ws matches a newline";
ab
ab
end
ok ("ab ab" ~~ / ab <.ws> ab /), "ws matches several spaces";
ok !("abab" ~~ / ab <.ws> ab /), "ws does not match nothing";
ok ("ab ab" ~~ rule { ab ab }), "rule gives space";

done-testing;

0 comments on commit cd7f153

Please sign in to comment.