From 09519c34af1c40fd955b0ff524d845445e4e5ccf Mon Sep 17 00:00:00 2001 From: Stefan O'Rear Date: Tue, 7 Sep 2010 01:15:54 -0700 Subject: [PATCH] Add a very simple regex optimizer. ~2% win so far --- Niecza.proj | 2 +- src/Niecza/Actions.pm | 5 ++++- src/Optimizer/RxSimple.pm | 47 +++++++++++++++++++++++++++++++++++++++ t/JsonTinyG.pl | 10 ++++----- 4 files changed, 57 insertions(+), 7 deletions(-) create mode 100644 src/Optimizer/RxSimple.pm diff --git a/Niecza.proj b/Niecza.proj index 8436040b..f3236ef1 100644 --- a/Niecza.proj +++ b/Niecza.proj @@ -16,7 +16,7 @@ - + diff --git a/src/Niecza/Actions.pm b/src/Niecza/Actions.pm index aa320e56..2ee06ca4 100644 --- a/src/Niecza/Actions.pm +++ b/src/Niecza/Actions.pm @@ -11,6 +11,7 @@ use Body; use Unit; use Sig; use CClass; +use Optimizer::RxSimple; use Try::Tiny; @@ -293,13 +294,14 @@ sub quote__S_Q { my ($cl, $M) = @_; } sub quote__S_Slash_Slash { my ($cl, $M) = @_; + my $rxop = Optimizer::RxSimple::run($M->{nibble}{_ast}); $M->{_ast} = Op::SubDef->new( var => $cl->gensym, body => Body->new( class => 'Regex', type => 'regex', signature => Sig->simple->for_regex, - do => Op::RegexBody->new(rxop => $M->{nibble}{_ast}))); + do => Op::RegexBody->new(rxop => $rxop))); } sub regex_block { my ($cl, $M) = @_; @@ -372,6 +374,7 @@ sub regex_def { my ($cl, $M) = @_; } local $::symtext = $symtext; + $ast = Optimizer::RxSimple::run($ast); $M->{_ast} = Op::SubDef->new( var => $var, method_too => ($scope eq 'has' ? $name : undef), diff --git a/src/Optimizer/RxSimple.pm b/src/Optimizer/RxSimple.pm new file mode 100644 index 00000000..57b8de2d --- /dev/null +++ b/src/Optimizer/RxSimple.pm @@ -0,0 +1,47 @@ +package Optimizer::RxSimple; +use 5.010; +use utf8; +use strict; +use warnings; + +sub run { + $_[0]->rxsimp(0); +} + +# XXX should use a multi sub. +sub RxOp::rxsimp { my ($self, $cut) = @_; + my $selfp = bless { %$self }, ref($self); + $selfp->{zyg} = [ map { $_->rxsimp(0) } @{ $self->zyg } ]; + $selfp; +} + +sub RxOp::mayback { 1 } + +sub RxOp::Sequence::rxsimp { my ($self, $cut) = @_; + my @kids; + for my $i (0 .. $#{ $self->zyg }) { + my $k = $self->zyg->[$i]; + $k = $k->rxsimp($cut && $i == $#{ $self->zyg }); + if ($k->isa('RxOp::Sequence')) { + push @kids, @{ $k->zyg }; + } else { + push @kids, $k; + } + } + (@kids == 1) ? $kids[0] : RxOp::Sequence->new(zyg => \@kids); +} + +sub RxOp::Sequence::mayback { my ($self) = @_; + for (@{ $self->zyg }) { + return 1 if $_->mayback; + } + return 0; +} + +sub RxOp::Cut::rxsimp { my ($self, $cut) = @_; + return $self->zyg->[0]->rxsimp(0) if !$self->zyg->[0]->mayback; + + return RxOp::Cut->new(zyg => [$self->zyg->[0]->rxsimp(1)]); +} + +1; diff --git a/t/JsonTinyG.pl b/t/JsonTinyG.pl index 76c21b4f..729d2e65 100644 --- a/t/JsonTinyG.pl +++ b/t/JsonTinyG.pl @@ -6,23 +6,23 @@ ] { say "parsed {$/.pos} chars" } } - rule object { '{' :: '}' } + rule object { '{' '}' } rule pairlist { [ [ "," ]* ]? } rule pair { ':' } - rule array { '[' :: [ [ "," ]* ]? ']' } + rule array { '[' [ [ "," ]* ]? ']' } proto token value {*} token value:sym { '-'? - [ 0 || <[1..9]> <[0..9]>* ] + [ 0 | <[1..9]> <[0..9]>* ] [ "." <[0..9]>+ ]? [ <[eE]> ["+"|"-"]? <[0..9]>+ ]? } token value:sym { } token value:sym { } token value:sym { } - token value:sym { } - token value:sym { } + token value:sym { :: } + token value:sym { :: } token value:sym { :: } token string {