Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
NQP RegexOptimizer for before assertions
  • Loading branch information
timo committed Nov 20, 2013
1 parent 795d519 commit 5c4d88d
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 48 deletions.
139 changes: 139 additions & 0 deletions src/NQP/Optimizer.nqp
@@ -1,7 +1,144 @@
class NQP::RegexOptimizer {
has @!outer;

method optimize($node, $outer, *%adverbs) {
say("going to optimize this:");
say($node.dump);
my @!outer := [$outer];
self.visit_children($node);
}

method all_subtypes($node, $type) {
my $res := 0;
for $node {
if $node.rxtype eq $type {
$res++
} else {
return -1;
}
}
return $res;
}

method visit_concat($node) {
# a single-child concat can become the child itself
self.visit_children($node);
if +@($node) == 1 {
return $node[0];
} else {
$node;
}
}

method stub_out_block($block) {
# remove both the stmts and the contents.
$block.shift();
$block.shift();
# put in an explosive die statement.
$block[0] := QAST::Op.new( :op('die_s'),
QAST::SVal.new( :value('INTERNAL ERROR: Execution of block eliminated by optimizer') ) );
# rescue the block into the first stmts of our current outer block
@!outer[+@!outer - 1][0].push($block);
}

method simplify_assertion($qast) {
if $qast.rxtype eq 'subrule' && $qast.subtype eq 'zerowidth'
&& nqp::istype($qast[0], QAST::Node) && nqp::istype($qast[0][0], QAST::SVal)
&& $qast[0][0].value eq 'before' {
if nqp::istype($qast[0], QAST::Node) && nqp::istype($qast[0][1], QAST::Block)
&& nqp::istype((my $regex := $qast[0][1][2]), QAST::Regex)
&& $regex.rxtype eq 'concat' && $regex[0].rxtype eq 'scan' && $regex[2].rxtype eq 'pass' {
my $simple := $regex[1];
# a concat with a single child becomes the child itself
if nqp::istype($simple, QAST::Regex) && $simple.rxtype eq 'concat' && +@($simple) == 1 {
$simple := $simple[0];
# even though it's not worth terribly much, we can do this optimization
# in any case, even if we're not going to do the optimization below:
$qast[0][1][2][1] := $simple;
}
my $result := 0;
if $simple.rxtype eq 'literal' && $simple.rxtype ne 'ignorecase' {
say("simplified a literal inside a before");
say($qast.dump);
$result := QAST::Regex.new(:rxtype<literal>, :subtype<zerowidth>, :node($simple.node),
:negate($qast.negate),
$simple[0]);
} elsif $simple.rxtype eq 'enumcharlist' && $simple.rxtype ne 'ignorecase' {
say("simplified an enumcharlist inside a before");
say($qast.dump);
$result := QAST::Regex.new(:rxtype<enumcharlist>, :subtype<zerowidth>, :node($simple.node),
:negate($qast.negate),
$simple[0]);
} elsif $simple.rxtype eq 'charrange' && $simple.rxtype ne 'ignorecase' {
say("simplified a charrange inside a before");
say($qast.dump);
$result := QAST::Regex.new(:rxtype<charrange>, :subtype<zerowidth>, :node($simple.node),
:negate($qast.negate),
$simple[0],
$simple[1],
$simple[2]);
} elsif $simple.rxtype eq 'cclass' && $simple.rxtype ne 'ignorecase' {
say("simplified a cclass inside a before");
say($qast.dump);
$result := QAST::Regex.new(:rxtype<cclass>, :subtype<zerowidth>, :node($simple.node),
:negate($qast.negate), :name($simple.name));
}
if $result {
self.stub_out_block($qast[0][1]);
$qast := $result;
say("this is our result:");
say($qast.dump);
}
}
}
$qast;
}

method visit_children($node) {
my int $i := 0;
unless nqp::isstr($node) {
while $i < +@($node) {
my $visit := $node[$i];
my $type;
if nqp::istype($visit, QAST::Regex) {
$type := $visit.rxtype;
} elsif nqp::istype($visit, QAST::Block) {
$type := "block";
@!outer.push($visit);
}
if $type eq 'scan' {
} elsif $type eq 'literal' {
} elsif $type eq 'quant' {
self.visit_children($visit);
} elsif $type eq 'subrule' {
$node[$i] := self.simplify_assertion($visit);
} elsif $type eq 'anchor' {
} elsif $type eq 'subcapture' {
} elsif $type eq 'enumcharlist' {
} elsif $type eq 'cclass' {
} elsif $type eq 'charrange' {
} elsif $type eq 'concat' {
$node[$i] := self.visit_concat($visit);
} elsif $type eq 'pass' || $type eq 'fail' {
} else {
# alt, altseq, conjseq, conj, quant
self.visit_children($visit);
}
$i := $i + 1;
if $type eq 'block' {
@!outer.pop();
}
}
}
}
}

class NQP::Optimizer {
has @!block_stack;
has %!adverbs;

method optimize($ast, *%adverbs) {
%!adverbs := %adverbs;
@!block_stack := [$ast[0]];
self.visit_children($ast);
$ast;
Expand Down Expand Up @@ -120,6 +257,8 @@ class NQP::Optimizer {
$node[$i] := self.visit_block($visit)
} elsif nqp::istype($visit, QAST::Want) {
self.visit_children($visit, :skip_selectors)
} elsif nqp::istype($visit, QAST::Regex) {
NQP::RegexOptimizer.new().optimize($visit, @!block_stack[+@!block_stack - 1], |%!adverbs);
} else {
self.visit_children($visit);
}
Expand Down
48 changes: 0 additions & 48 deletions src/QRegex/P6Regex/Actions.nqp
Expand Up @@ -433,58 +433,11 @@ class QRegex::P6Regex::Actions is HLL::Actions {
make $qast;
}

method simplify_assertion($qast) {
if $qast.rxtype eq 'subrule' && $qast.subtype eq 'zerowidth'
&& nqp::istype($qast[0], QAST::Node) && nqp::istype($qast[0][0], QAST::SVal)
&& $qast[0][0].value eq 'before' {
if nqp::istype($qast[0], QAST::Node) && nqp::istype($qast[0][1], QAST::Block)
&& nqp::istype((my $regex := $qast[0][1][2]), QAST::Regex)
&& $regex.rxtype eq 'concat' && $regex[0].rxtype eq 'scan' && $regex[2].rxtype eq 'pass' {
my $simple := $regex[1];
# a concat with a single child becomes the child itself
if nqp::istype($simple, QAST::Regex) && $simple.rxtype eq 'concat' && +@($simple) == 1 {
$simple := $simple[0];
# even though it's not worth terribly much, we can do this optimization
# in any case, even if we're not going to do the optimization below:
$qast[0][1][2][1] := $simple;
}
if $simple.rxtype eq 'literal' && $simple.rxtype ne 'ignorecase' {
say("simplified a literal inside a before");
say($qast.dump);
return QAST::Regex.new(:rxtype<literal>, :subtype<zerowidth>, :node($simple.node),
:negate($qast.negate),
$simple[0]);
} elsif $simple.rxtype eq 'enumcharlist' && $simple.rxtype ne 'ignorecase' {
say("simplified an enumcharlist inside a before");
say($qast.dump);
return QAST::Regex.new(:rxtype<enumcharlist>, :subtype<zerowidth>, :node($simple.node),
:negate($qast.negate),
$simple[0]);
} elsif $simple.rxtype eq 'charrange' && $simple.rxtype ne 'ignorecase' {
say("simplified a charrange inside a before");
say($qast.dump);
return QAST::Regex.new(:rxtype<charrange>, :subtype<zerowidth>, :node($simple.node),
:negate($qast.negate),
$simple[0],
$simple[1],
$simple[2]);
} elsif $simple.rxtype eq 'cclass' && $simple.rxtype ne 'ignorecase' {
say("simplified a cclass inside a before");
say($qast.dump);
return QAST::QRegex.new(:rxtype<cclass>, :subtype<zerowidth>, :node($simple.node),
:negate($qast.negate), :name($simple.name));
}
}
}
$qast;
}

method assertion:sym<?>($/) {
my $qast;
if $<assertion> {
$qast := $<assertion>.ast;
$qast.subtype('zerowidth');
$qast := self.simplify_assertion($qast);
}
else {
$qast := QAST::Regex.new( :rxtype<anchor>, :subtype<pass>, :node($/) );
Expand All @@ -498,7 +451,6 @@ class QRegex::P6Regex::Actions is HLL::Actions {
$qast := $<assertion>.ast;
$qast.negate( !$qast.negate );
$qast.subtype('zerowidth');
$qast := self.simplify_assertion($qast);
}
else {
$qast := QAST::Regex.new( :rxtype<anchor>, :subtype<fail>, :node($/) );
Expand Down

0 comments on commit 5c4d88d

Please sign in to comment.