Skip to content
This repository has been archived by the owner on Feb 3, 2021. It is now read-only.

Commit

Permalink
Revert regex-interpolation branch changes back to 3ad311, fresh
Browse files Browse the repository at this point in the history
starting point for new approach to regex interpolation.
  • Loading branch information
pmichaud committed May 24, 2010
1 parent c62409b commit 3fcffdb
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 265 deletions.
78 changes: 18 additions & 60 deletions src/NQP/Actions.pm
Expand Up @@ -814,71 +814,29 @@ class NQP::RegexActions is Regex::P6Regex::Actions {
make PAST::Regex.new( $past, :pasttype('pastnode') );
}

method metachar:sym<var>($/) {
my $past;
my $name := $<pos> ?? +$<pos> !! ~$<name>;
if $<quantified_atom> {
if $<var> {
$/.CURSOR.panic('"$var = " syntax not yet supported in regexes');
}
$past := $<quantified_atom>[0].ast;
if $past.pasttype eq 'quant' && $past[0].pasttype eq 'subrule' {
Regex::P6Regex::Actions::subrule_alias($past[0], $name);
}
elsif $past.pasttype eq 'subrule' { Regex::P6Regex::Actions::subrule_alias($past, $name); }
else {
$past := PAST::Regex.new( $past, :name($name), :pasttype('subcapture'), :node($/) );
}
}
else {
if $<var> {
my @MODIFIERS := Q:PIR {
%r = get_hll_global ['Regex';'P6Regex';'Actions'], '@MODIFIERS'
};
my $subtype := @MODIFIERS[0]<i> ?? 'interp_literal_i' !! 'interp_literal';
$past := PAST::Regex.new( $<var>.ast, :pasttype('pastnode'),
:subtype($subtype), :node($/) );
} else {
$past := PAST::Regex.new( '!BACKREF', $name, :pasttype('subrule'),
:subtype('method'), :node($/) );
}
}
make $past;
}

method assertion:sym<var>($/) {
make PAST::Regex.new( $<var>.ast, :pasttype('pastnode'),
:subtype('interp_regex'), :node($/) );
}
method metachar:sym<{ }>($/) { make $<codeblock>.ast; }


method metachar:sym<{ }>($/) {
make PAST::Regex.new(:node($/), :pasttype('pastnode'), $<codeblock>.ast);
}

method assertion:sym<{ }>($/) {
make PAST::Regex.new( :node($/), :pasttype('pastnode'), :subtype('interp_regex'),
$<codeblock>.ast );
}
method assertion:sym<{ }>($/) { make $<codeblock>.ast; }

method codeblock($/) {
my $block := $<block>.ast;
$block.blocktype('immediate');
make bindmatch($block);
}

sub bindmatch($past) {
PAST::Stmts.new(
PAST::Op.new(
PAST::Var.new( :name('$/') ),
PAST::Op.new(
PAST::Var.new( :name('') ),
:name('MATCH'),
:pasttype('callmethod')
my $past :=
PAST::Regex.new(
PAST::Stmts.new(
PAST::Op.new(
PAST::Var.new( :name('$/') ),
PAST::Op.new(
PAST::Var.new( :name('') ),
:name('MATCH'),
:pasttype('callmethod')
),
:pasttype('bind')
),
$block
),
:pasttype('bind')
),
$past,
);
:pasttype('pastnode')
);
make $past;
}
}
20 changes: 0 additions & 20 deletions src/NQP/Grammar.pm
Expand Up @@ -550,26 +550,6 @@ grammar NQP::Regex is Regex::P6Regex::Grammar {
':' <?before 'my'> <statement=.LANG('MAIN', 'statement')> <.ws> ';'
}

token metachar:sym<$> {
<sym> <!before \w>
}

token metachar:sym<var> {
[
| '$<' $<name>=[<-[>]>+] '>'
| '$' $<pos>=[\d+]
| <?before <[$@]> \w> <var=.LANG('MAIN', 'variable')>
| <?before '%' \w> <.panic: "Use of hash variable in patterns is reserved">
]

[ <.ws> '=' <.ws> <quantified_atom> ]?
}

token assertion:sym<var> {
| <?before <[$@]> \w> <var=.LANG('MAIN', 'variable')>
| <?before '%' \w> <.panic: "Use of hash variable in patterns is reserved">
}

token metachar:sym<{ }> {
<?[{]> <codeblock>
}
Expand Down
111 changes: 8 additions & 103 deletions src/PAST/Compiler-Regex.pir
Expand Up @@ -744,127 +744,32 @@ second child of this node.

=item 'pastnode'(PAST::Regex node)

Evaluates the supplied PAST node and does various things with the result, based on subtype.

Subtype can be any of:

=over 4

=item zerowidth

Only test for truthiness and fail or not. No interpolation.

=item interp_regex

String values should be compiled into regexes and then interpolated.

=item interp_literal

String values should be treated as literals.

=item interp_literal_i

String values should be treated as literals and matched case-insensitively.

=item <nothing>

Don't interpolate anything, just execute the PAST code
=back
=cut

.sub 'pastnode' :method :multi(_, ['PAST'; 'Regex'])
.sub 'pastnode' :method :multi(_, ['PAST';'Regex'])
.param pmc node
.local pmc cur, pos, fail, ops, eos, off, tgt
(cur, pos, eos, off, tgt, fail) = self.'!rxregs'('cur pos eos off tgt fail')
.local pmc cur, pos, fail, ops
(cur, pos, fail) = self.'!rxregs'('cur pos fail')
ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
.local pmc zerowidth, negate, testop, subtype
subtype = node.'subtype'()

ops.'push_pirop'('inline', subtype, negate, 'inline'=>' # rx pastnode subtype=%1 negate=%2')
.local pmc cpast, cpost
cpast = node[0]
cpost = self.'as_post'(cpast, 'rtype'=>'P')

self.'!cursorop'(ops, '!cursor_pos', 0, pos)
ops.'push'(cpost)

# If this is just a zerowidth assertion, we don't actually interpolate anything. Just evaluate
# and fail or not.
if subtype == 'zerowidth' goto zerowidth_test

# Retain backwards compatibility with old pastnode semantics
unless subtype goto done

.local string prefix
prefix = self.'unique'('pastnode_')
.local pmc precompiled_label, done_label, loop_label, iterator_reg, label_reg
$S0 = concat prefix, '_precompiled'
precompiled_label = self.'post_new'('Label', 'result'=>$S0)
$S0 = concat prefix, '_done'
done_label = self.'post_new'('Label', 'result'=>$S0)
$S0 = concat prefix, '_loop'
loop_label = self.'post_new'('Label', 'result'=>$S0)
iterator_reg = self.'uniquereg'("P")
label_reg = self.'uniquereg'("I")

$S10 = subtype
$S10 = concat '"', $S10
$S10 = concat $S10, '"'
self.'!cursorop'(ops, '!process_pastnode_results_for_interpolation', 1, '$P10', cpost, $S10)

ops.'push_pirop'('iter', iterator_reg, '$P10')
ops.'push_pirop'('set_addr', label_reg, loop_label)
ops.'push'(loop_label)
ops.'push_pirop'('unless', iterator_reg, fail)
ops.'push_pirop'('shift', '$P10', iterator_reg)
self.'!cursorop'(ops, '!mark_push', 0, 0, pos, label_reg)

# Check if it's already a compiled Regex, and call it as a method if so
ops.'push_pirop'('isa', '$I10', '$P10', "['Sub']")
ops.'push_pirop'('if', '$I10', precompiled_label)

# XXX This is rakudo's Regex class. I'm not sure why the above test doesn't catch it, but
# need to figure it out so NQP doesn't have rakudo knowledge :(
ops.'push_pirop'('isa', '$I10', '$P10', "['Regex']")
ops.'push_pirop'('if', '$I10', precompiled_label)

# Otherwise, treat it as a literal
ops.'push_pirop'('set', '$S10', '$P10')
ops.'push_pirop'('length', '$I10', '$S10')
ops.'push_pirop'('add', '$I11', pos, '$I10')
ops.'push_pirop'('gt', '$I11', eos, fail)
ops.'push_pirop'('sub', '$I11', pos, off)
ops.'push_pirop'('substr', '$S11', tgt, '$I11', '$I10')
ne subtype, 'interp_literal_i', dont_downcase
ops.'push_pirop'('downcase', '$S10', '$S10')
ops.'push_pirop'('downcase', '$S11', '$S11')
dont_downcase:
ops.'push_pirop'('ne', '$S11', '$S10', fail)
ops.'push_pirop'('add', pos, '$I10')
ops.'push_pirop'('goto', done_label)

ops.'push'(precompiled_label)
ops.'push_pirop'('callmethod', '$P10', cur, 'result'=>'$P10')
ops.'push_pirop'('unless', '$P10', fail)
self.'!cursorop'(ops, '!mark_push', 0, 0, CURSOR_FAIL, 0, '$P10')
ops.'push_pirop'('callmethod', '"pos"', '$P10', 'result'=>pos)

ops.'push'(done_label)

goto done

zerowidth_test:
.local pmc subtype, negate, testop
subtype = node.'subtype'()
if subtype != 'zerowidth' goto done
negate = node.'negate'()
testop = self.'??!!'(negate, 'if', 'unless')
ops.'push_pirop'(testop, cpost, fail)
done:
.return (ops)

.end


=item pass(PAST::Regex node)

=cut
Expand Down
76 changes: 0 additions & 76 deletions src/Regex/Cursor.pir
Expand Up @@ -699,82 +699,6 @@ Match the backreference given by C<name>.
.return (cur)
.end

=item !process_pastnode_results_for_interpolation

Used by the pastnode PAST::Regex type to prepare the results of the evaluation for interpolation.

Takes two arguments:

=over 4

=item The node results

=item The subtype of the PAST::Regex node, which is one of:

=over 4

=item interp_regex

String values should be compiled into regexes and then interpolated.

=item interp_literal

String values should be treated as literals.

=item interp_literal_i

String values should be treated as literals and matched case-insensitively.

=back

=back

Returns a RPA containing the elements to be interpolated

=cut

.sub '!process_pastnode_results_for_interpolation' :method
.param pmc node
.param string subtype

.local pmc it, result, compiler, context
.local string codestr

result = new ['ResizablePMCArray']
$S0 = typeof node
if $S0 == 'ResizablePMCArray' goto array
$P1 = node
it = box 0
goto not_array
array:
it = iter node
loop:
unless it, loop_done
$P1 = shift it
not_array:
if subtype != 'interp_regex' goto literal
# Don't need to compile it if it's already a Sub
$I0 = isa $P1, ['Sub']
if $I0 goto literal
codestr = $P1
$P1 = split '/', codestr
codestr = join '\\/', $P1
codestr = concat '/', codestr
codestr = concat codestr, '/'
compiler = compreg 'NQP-rx'
$P2 = getinterp
context = $P2['context';0]
$P2 = compiler.'compile'(codestr, 'outer_ctx'=>context)
$P1 = $P2[0]
$P2 = getattribute context, 'current_sub'
$P1.'set_outer'($P2)
$P1 = $P1()
literal:
push result, $P1
goto loop
loop_done:
.return (result)
.end

=back

Expand Down
12 changes: 6 additions & 6 deletions src/setting/Regex.pm
Expand Up @@ -13,17 +13,17 @@ given, then return an array of all non-overlapping matches.

our sub match ($text, $regex, :$global?) {
my $match := $text ~~ $regex;
my @matches;
if $global {
my @matches;
while $match {
@matches.push($match);
$match := $match.CURSOR.parse($text, :rule($regex), :c($match.to));
}
@matches;
}
elsif $match {
@matches.push($match);
else {
$match;
}
@matches;
}


Expand All @@ -34,8 +34,8 @@ perform the replacement on all matches of C<$text>.
=end item

our sub subst ($text, $regex, $repl, :$global?) {
my @matches := match($text, $regex, $global);

my @matches := $global ?? match($text, $regex, :global)
!! [ $text ~~ $regex ];
my $is_code := pir::isa($repl, 'Sub');
my $offset := 0;
my $result := pir::new__Ps('StringBuilder');
Expand Down

0 comments on commit 3fcffdb

Please sign in to comment.