From 1f30c77d1730ffe9b2969edf56352e9105e4d307 Mon Sep 17 00:00:00 2001 From: pmichaud Date: Fri, 9 Oct 2009 03:09:01 -0500 Subject: [PATCH] Add more backslash sequences: \b, \e, \f, \h, \r, \t, \v and negations. --- STATUS | 8 ++++--- src/PAST/Compiler-Regex.pir | 6 ++--- src/Regex/P6Regex/Actions.pm | 45 +++++++++++++++++++++++++++++++++++- src/Regex/P6Regex/Grammar.pm | 8 ++++++- 4 files changed, 59 insertions(+), 8 deletions(-) diff --git a/STATUS b/STATUS index 8d6e5b6..e287386 100644 --- a/STATUS +++ b/STATUS @@ -1,4 +1,4 @@ -2009-10-08: +2009-10-09: At the moment, nqp-rx is configured to build an executable called "p6regex", which is a Perl 6 regular expression compiler for Parrot. @@ -41,7 +41,7 @@ The key files for the p6regex compiler are: src/Regex/P6Regex/Actions.pm # actions to create PAST from parse -Things that work (2009-10-08, 22h23 UTC): +Things that work (2009-10-09, 08h07 UTC): * bare literal strings * quantifiers *, +, ?, *:, +:, ?:, *?, +?, ??, *!, +!, ?! @@ -52,4 +52,6 @@ Things that work (2009-10-08, 22h23 UTC): * anchors ^, ^^, $, $$, <<, >> * backslash-quoted punctuation * #-comments (mostly) -* obsolete backslash sequences \A \Z \z \Q \E +* obsolete backslash sequences \A \Z \z \Q +* \b, \B, \e, \E, \f, \F, \h, \H, \r, \R, \t, \T, \v, \V +* enumerated character lists <[ab0..9]> diff --git a/src/PAST/Compiler-Regex.pir b/src/PAST/Compiler-Regex.pir index 34117f5..9b9eae2 100644 --- a/src/PAST/Compiler-Regex.pir +++ b/src/PAST/Compiler-Regex.pir @@ -477,16 +477,16 @@ character list. .local string charlist charlist = node[0] + charlist = self.'escape'(charlist) .local pmc negate, testop negate = node.'negate'() testop = self.'??!!'(negate, 'ge', 'lt') - ops.'push_pirop'('inline', charlist, negate, 'inline'=>' # rx enumcharlist %0 negate=%1') + ops.'push_pirop'('inline', negate, 'inline'=>' # rx enumcharlist negate=%0') ops.'push_pirop'('ge', pos, eos, fail) ops.'push_pirop'('sub', '$I10', pos, off) ops.'push_pirop'('substr', '$S10', tgt, '$I10', 1) - $S0 = self.'escape'(charlist) - ops.'push_pirop'('index', '$I11', $S0, '$S10') + ops.'push_pirop'('index', '$I11', charlist, '$S10') ops.'push_pirop'(testop, '$I11', 0, fail) ops.'push_pirop'('inc', pos) .return (ops) diff --git a/src/Regex/P6Regex/Actions.pm b/src/Regex/P6Regex/Actions.pm index 305e0dc..9aeab7b 100644 --- a/src/Regex/P6Regex/Actions.pm +++ b/src/Regex/P6Regex/Actions.pm @@ -136,6 +136,49 @@ method backslash:sym($/) { make $past; } +method backslash:sym($/) { + my $past := PAST::Regex.new( "\b", :pasttype('enumcharlist'), + :negate($ eq 'B')); + make $past; +} + +method backslash:sym($/) { + my $past := PAST::Regex.new( "\e", :pasttype('enumcharlist'), + :negate($ eq 'E')); + make $past; +} + +method backslash:sym($/) { + my $past := PAST::Regex.new( "\f", :pasttype('enumcharlist'), + :negate($ eq 'F')); + make $past; +} + +method backslash:sym($/) { + my $past := PAST::Regex.new( "\x[09,20,a0,1680,180e,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,200a,202f,205f,3000]", :pasttype('enumcharlist'), + :negate($ eq 'H')); + make $past; +} + +method backslash:sym($/) { + my $past := PAST::Regex.new( "\r", :pasttype('enumcharlist'), + :negate($ eq 'R')); + make $past; +} + +method backslash:sym($/) { + my $past := PAST::Regex.new( "\t", :pasttype('enumcharlist'), + :negate($ eq 'T')); + make $past; +} + +method backslash:sym($/) { + my $past := PAST::Regex.new( "\x[0a,0b,0c,0d,85,2028,2029]", :pasttype('enumcharlist'), + :negate($ eq 'V')); + make $past; +} + + method backslash:sym($/) { my $past := PAST::Regex.new( ~$/ , :pasttype('literal') ); make $past; @@ -173,6 +216,6 @@ method cclass_elem($/) { else { $str := $str ~ $_[0]; } } my $past := PAST::Regex.new( $str, :pasttype('enumcharlist') ); - if $ eq '-' { $past.negate(1); } + $past.negate( $ eq '-' ); make $past; } diff --git a/src/Regex/P6Regex/Grammar.pm b/src/Regex/P6Regex/Grammar.pm index 79d4ab7..4c9ac7e 100644 --- a/src/Regex/P6Regex/Grammar.pm +++ b/src/Regex/P6Regex/Grammar.pm @@ -64,10 +64,16 @@ grammar Regex::P6Regex::Grammar is PCT::Grammar; # proto token backslash { <...> } token backslash:sym { $:=[<[dswnDSWN]>] {*} } + token backslash:sym { $:=[<[bB]>] {*} } + token backslash:sym { $:=[<[eE]>] {*} } + token backslash:sym { $:=[<[fF]>] {*} } + token backslash:sym { $:=[<[hH]>] {*} } + token backslash:sym { $:=[<[rR]>] {*} } + token backslash:sym { $:=[<[tT]>] {*} } + token backslash:sym { $:=[<[vV]>] {*} } token backslash:sym { 'A' <.obs: '\\A as beginning-of-string matcher;^'> } token backslash:sym { 'z' <.obs: '\\z as end-of-string matcher;$'> } token backslash:sym { 'Z' <.obs: '\\Z as end-of-string matcher;\\n?$'> } - token backslash:sym { 'E' <.obs: '\\E as quotemeta;quotes or literal variable match'> } token backslash:sym { 'Q' <.obs: '\\Q as quotemeta;quotes or literal variable match'> } token backslash:sym { \W {*} }