Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
use node name instead of subtube to identify cclass codes
This allows us to set zerowidth on character classes. This patch
build the AST of cclass substractions in a way that make <[\w]-[\d]>
work.
  • Loading branch information
FROGGS committed Mar 2, 2013
1 parent e79c768 commit 8fba6d4
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 24 deletions.
7 changes: 3 additions & 4 deletions src/QAST/Compiler.nqp
Expand Up @@ -1274,21 +1274,20 @@ class QAST::Compiler is HLL::Compiler {
%cclass_code<s> := '.CCLASS_WHITESPACE';
%cclass_code<w> := '.CCLASS_WORD';
%cclass_code<n> := '.CCLASS_NEWLINE';
%cclass_code<nl> := '.CCLASS_NEWLINE';
}

method cclass($node) {
my $ops := self.post_new('Ops', :result(%*REG<cur>));
my $subtype := $node.subtype;
my $subtype := $node.name;
$ops.push_pirop('ge', %*REG<pos>, %*REG<eos>, %*REG<fail>);
my $cclass := %cclass_code{nqp::lc($subtype)};
my $cclass := %cclass_code{ $subtype };
self.panic("Unrecognized subtype '$subtype' in QAST::Regex cclass")
unless $cclass;
if $cclass ne '.CCLASS_ANY' {
my $testop := $node.negate ?? 'if' !! 'unless';
$ops.push_pirop('is_cclass', '$I11', $cclass, %*REG<tgt>, %*REG<pos>);
$ops.push_pirop($testop, '$I11', %*REG<fail>);
if $subtype eq 'nl' {
if $cclass eq '.CCLASS_NEWLINE' {
$ops.push_pirop('substr', '$S10', %*REG<tgt>, %*REG<pos>, 2);
$ops.push_pirop('iseq', '$I11', '$S10', '"\r\n"');
$ops.push_pirop('add', %*REG<pos>, '$I11');
Expand Down
2 changes: 1 addition & 1 deletion src/QRegex/NFA.nqp
Expand Up @@ -100,7 +100,7 @@ class QRegex::NFA {

method cclass($node, int $from, int $to) {
self.addedge($from, $to, $EDGE_CHARCLASS + ?$node.negate,
%cclass_code{nqp::lc($node.subtype)});
%cclass_code{ $node.name });
}

method concat($node, int $from, int $to) {
Expand Down
10 changes: 5 additions & 5 deletions src/QRegex/P5Regex/Actions.nqp
Expand Up @@ -80,8 +80,8 @@ class QRegex::P5Regex::Actions is HLL::Actions {

method p5metachar:sym<.>($/) {
make %*RX<s>
?? QAST::Regex.new( :rxtype<cclass>, :subtype<.>, :node($/) )
!! QAST::Regex.new( :rxtype<cclass>, :subtype<nl>, :negate(1), :node($/) );
?? QAST::Regex.new( :rxtype<cclass>, :name<.>, :node($/) )
!! QAST::Regex.new( :rxtype<cclass>, :name<n>, :negate(1), :node($/) );
}

method p5metachar:sym<^>($/) {
Expand Down Expand Up @@ -159,6 +159,7 @@ class QRegex::P5Regex::Actions is HLL::Actions {
elsif $_[0]<backslash> {
my $bs := $_[0]<backslash>.ast;
$bs.negate(!$bs.negate) if $<sign> eq '^';
$bs.subtype('zerowidth') if $bs.negate;
@alts.push($bs);
}
else {
Expand All @@ -172,7 +173,7 @@ class QRegex::P5Regex::Actions is HLL::Actions {
$<sign> eq '^' ??
QAST::Regex.new( :rxtype<concat>, :node($/),
QAST::Regex.new( :rxtype<conj>, :subtype<zerowidth>, |@alts ),
QAST::Regex.new( :rxtype<cclass>, :subtype<.> ) ) !!
QAST::Regex.new( :rxtype<cclass>, :name<.> ) ) !!
QAST::Regex.new( :rxtype<altseq>, |@alts );
make $qast;
}
Expand All @@ -183,8 +184,7 @@ class QRegex::P5Regex::Actions is HLL::Actions {
}

method p5backslash:sym<s>($/) {
make QAST::Regex.new(:rxtype<cclass>, '.CCLASS_WHITESPACE',
:subtype($<sym> eq 'n' ?? 'nl' !! ~$<sym>),
make QAST::Regex.new(:rxtype<cclass>, :name( nqp::lc(~$<sym>) ),
:negate($<sym> le 'Z'), :node($/));
}

Expand Down
25 changes: 11 additions & 14 deletions src/QRegex/P6Regex/Actions.nqp
Expand Up @@ -167,7 +167,7 @@ class QRegex::P6Regex::Actions is HLL::Actions {
}

method metachar:sym<.>($/) {
make QAST::Regex.new( :rxtype<cclass>, :subtype<.>, :node($/) );
make QAST::Regex.new( :rxtype<cclass>, :name<.>, :node($/) );
}

method metachar:sym<^>($/) {
Expand Down Expand Up @@ -263,8 +263,7 @@ class QRegex::P6Regex::Actions is HLL::Actions {
method metachar:sym<mod>($/) { make $<mod_internal>.ast; }

method backslash:sym<s>($/) {
make QAST::Regex.new(:rxtype<cclass>, '.CCLASS_WHITESPACE',
:subtype($<sym> eq 'n' ?? 'nl' !! ~$<sym>),
make QAST::Regex.new(:rxtype<cclass>, :name( nqp::lc(~$<sym>) ),
:negate($<sym> le 'Z'), :node($/));
}

Expand Down Expand Up @@ -425,20 +424,18 @@ class QRegex::P6Regex::Actions is HLL::Actions {
$qast.subtype('zerowidth');
$qast := QAST::Regex.new(:rxtype<concat>, :node($/),
$qast,
QAST::Regex.new( :rxtype<cclass>, :subtype<.> ));
QAST::Regex.new( :rxtype<cclass>, :name<.> ));
}

my $i := 1;
my $n := +$clist;
while $i < $n {
my $ast := $clist[$i].ast;
if $ast.negate {
if $ast.rxtype eq 'cclass' {
$ast := QAST::Regex.new( :rxtype<conj>, :subtype<zerowidth>, $ast );
}
else {
$ast.subtype('zerowidth');
}
$qast := QAST::Regex.new( $ast, $qast, :rxtype<concat>, :node($/));
if $ast.negate || $ast.rxtype eq 'cclass' && ~$ast.node le 'Z' {
$ast.subtype('zerowidth');
$qast := QAST::Regex.new( :rxtype<concat>, :node($/),
QAST::Regex.new( :rxtype<conj>, :subtype<zerowidth>, $ast ),
$qast );
}
else {
$qast := QAST::Regex.new( $qast, $ast, :rxtype<altseq>, :node($/));
Expand Down Expand Up @@ -511,6 +508,7 @@ class QRegex::P6Regex::Actions is HLL::Actions {
elsif $_[0]<backslash> {
my $bs := $_[0]<backslash>.ast;
$bs.negate(!$bs.negate) if $<sign> eq '-';
$bs.subtype('zerowidth') if $bs.negate;
@alts.push($bs);
}
else { $str := $str ~ ~$_[0]; }
Expand All @@ -521,10 +519,9 @@ class QRegex::P6Regex::Actions is HLL::Actions {
$<sign> eq '-' ??
QAST::Regex.new( :rxtype<concat>, :node($/),
QAST::Regex.new( :rxtype<conj>, :subtype<zerowidth>, |@alts ),
QAST::Regex.new( :rxtype<cclass>, :subtype<.> ) ) !!
QAST::Regex.new( :rxtype<cclass>, :name<.> ) ) !!
QAST::Regex.new( :rxtype<altseq>, |@alts );
}
#$qast.negate( $<sign> eq '-' );
make $qast;
}

Expand Down
21 changes: 21 additions & 0 deletions t/qregex/rx_charclass
Expand Up @@ -91,4 +91,25 @@
# todo :pge<regression>
abc <![d]> abc y negated charclass at end of string (issue 9)

<[abc]-[a]> abc <b> character class substraction 1
<[abc]-[a]>+ abc <bc> character class substraction 2
<[abc]-[\w]> abc n character class substraction 3
<[abc]-[a\w]> abc n character class substraction 4
<[abc]-[\W]> abc <a> character class substraction 5
<[abc]-[\W]>+ abc <abc> character class substraction 6
<[abc]-[a\W]> abc <b> character class substraction 7
<[abc]-[a\W]>+ abc <bc> character class substraction 8
<[abc]-[ab\Wc]> abc n character class substraction 9
<[ab1]-[\w\d]> ab1 n character class substraction 10
<[ab1]-[\w1]> ab1 n character class substraction 11
<[ab1]-[ab1]> ab1 n character class substraction 12
<[ab1]-[a\d]> ab1 <b> character class substraction 13
<[ab1]-[a\d]>+ ab1 <b> character class substraction 14
<[abc]-[a\d]> ab123c <b> character class substraction 15
<[abc]-[a\d]>+ ab123c <b> character class substraction 16
<[\w]-[\d]> a <b> character class substraction 17
<[\w]-[\d]>+ ab <b> character class substraction 18
<[\w]-[\D]> 1 <b> character class substraction 19
<[\w]-[\D]>+ 123 <b> character class substraction 20

## vim: noexpandtab tabstop=4 shiftwidth=4

0 comments on commit 8fba6d4

Please sign in to comment.