Skip to content
This repository has been archived by the owner on Feb 3, 2021. It is now read-only.

Commit

Permalink
Regexes no longer need explicit action tokens at the end.
Browse files Browse the repository at this point in the history
  • Loading branch information
pmichaud committed Oct 18, 2009
1 parent 4dabcc0 commit 180f18e
Showing 1 changed file with 33 additions and 46 deletions.
79 changes: 33 additions & 46 deletions src/Regex/P6Regex/Grammar.pm
Expand Up @@ -4,22 +4,20 @@ grammar Regex::P6Regex::Grammar;

token normspace { <?before \s | '#' > <.ws> }

token quote { \' $<val>=[<-[']>*] \' {*} }
token quote { \' $<val>=[<-[']>*] \' }

token arg {
[
| <quote>
| $<value>=[\d+]
]
{*}
}

rule arglist { <arg> [ ',' <arg> ]* {*} }
rule arglist { <arg> [ ',' <arg> ]* }

token TOP {
<nibbler>
[ $ || <.panic: 'Confused'> ]
{*}
}

token nibbler {
Expand All @@ -29,17 +27,14 @@ grammar Regex::P6Regex::Grammar;
[ ['||'|'|']
[ <termish> || <.panic: 'Null pattern not allowed'> ]
]*
{*}
}

token termish {
<noun=quantified_atom>+
{*}
}

token quantified_atom {
<atom> [ <.ws> [ <quantifier> | <?before ':'> <backmod> <!alpha> ] ]?
{*}
}

token atom {
Expand All @@ -48,50 +43,46 @@ grammar Regex::P6Regex::Grammar;
| \w [ \w+! <?before \w> ]?
| <metachar>
]
{*}
}

# proto token quantifier { <...> }
token quantifier:sym<*> { $<sym>=['*'] <backmod> {*} }
token quantifier:sym<+> { $<sym>=['+'] <backmod> {*} }
token quantifier:sym<?> { $<sym>=['?'] <backmod> {*} }
token quantifier:sym<*> { $<sym>=['*'] <backmod> }
token quantifier:sym<+> { $<sym>=['+'] <backmod> }
token quantifier:sym<?> { $<sym>=['?'] <backmod> }
token quantifier:sym<**> {
$<sym>=['**'] \s* <backmod> \s*
[
|| $<min>=[\d+] [ '..' $<max>=[\d+|'*'] ]?
|| <quantified_atom>
]
{*}
}

token backmod { ':'? [ '?' | '!' | <!before ':'> ] }

# proto token metachar { <...> }
token metachar:sym<ws> { <.normspace> {*} }
token metachar:sym<[ ]> { '[' <nibbler> ']' {*} }
token metachar:sym<( )> { '(' <nibbler> ')' {*} }
token metachar:sym<'> { <quote> {*} }
token metachar:sym<.> { $<sym>=['.'] {*} }
token metachar:sym<^> { $<sym>=['^'] {*} }
token metachar:sym<^^> { $<sym>=['^^'] {*} }
token metachar:sym<$> { $<sym>=['$'] {*} }
token metachar:sym<$$> { $<sym>=['$$'] {*} }
token metachar:sym<:::> { $<sym>=[':::'] {*} }
token metachar:sym<::> { $<sym>=['::'] {*} }
token metachar:sym<lwb> { $<sym>=['<<'|'«'] {*} }
token metachar:sym<rwb> { $<sym>=['>>'|'»'] {*} }
token metachar:sym<bs> { \\ <backslash> {*} }
token metachar:sym<mod> { <mod_internal> {*} }
token metachar:sym<ws> { <.normspace> }
token metachar:sym<[ ]> { '[' <nibbler> ']' }
token metachar:sym<( )> { '(' <nibbler> ')' }
token metachar:sym<'> { <quote> }
token metachar:sym<.> { $<sym>=['.'] }
token metachar:sym<^> { $<sym>=['^'] }
token metachar:sym<^^> { $<sym>=['^^'] }
token metachar:sym<$> { $<sym>=['$'] }
token metachar:sym<$$> { $<sym>=['$$'] }
token metachar:sym<:::> { $<sym>=[':::'] }
token metachar:sym<::> { $<sym>=['::'] }
token metachar:sym<lwb> { $<sym>=['<<'|'«'] }
token metachar:sym<rwb> { $<sym>=['>>'|'»'] }
token metachar:sym<bs> { \\ <backslash> }
token metachar:sym<mod> { <mod_internal> }

token metachar:sym<{*}> {
$<sym>=['{*}']
[ \h* '#= ' \h* $<key>=[\S+ [\h+ \S+]*] ]?
{*}
}
token metachar:sym<assert> {
'<' <assertion>
[ '>' || <.panic: 'regex assertion not terminated by angle bracket'> ]
{*}
}

token metachar:sym<var> {
Expand All @@ -101,31 +92,30 @@ grammar Regex::P6Regex::Grammar;
]

[ <.ws> '=' <.ws> <quantified_atom> ]?
{*}
}

# proto token backslash { <...> }
token backslash:sym<w> { $<sym>=[<[dswnDSWN]>] {*} }
token backslash:sym<b> { $<sym>=[<[bB]>] {*} }
token backslash:sym<e> { $<sym>=[<[eE]>] {*} }
token backslash:sym<f> { $<sym>=[<[fF]>] {*} }
token backslash:sym<h> { $<sym>=[<[hH]>] {*} }
token backslash:sym<r> { $<sym>=[<[rR]>] {*} }
token backslash:sym<t> { $<sym>=[<[tT]>] {*} }
token backslash:sym<v> { $<sym>=[<[vV]>] {*} }
token backslash:sym<w> { $<sym>=[<[dswnDSWN]>] }
token backslash:sym<b> { $<sym>=[<[bB]>] }
token backslash:sym<e> { $<sym>=[<[eE]>] }
token backslash:sym<f> { $<sym>=[<[fF]>] }
token backslash:sym<h> { $<sym>=[<[hH]>] }
token backslash:sym<r> { $<sym>=[<[rR]>] }
token backslash:sym<t> { $<sym>=[<[tT]>] }
token backslash:sym<v> { $<sym>=[<[vV]>] }
token backslash:sym<A> { 'A' <.obs: '\\A as beginning-of-string matcher;^'> }
token backslash:sym<z> { 'z' <.obs: '\\z as end-of-string matcher;$'> }
token backslash:sym<Z> { 'Z' <.obs: '\\Z as end-of-string matcher;\\n?$'> }
token backslash:sym<Q> { 'Q' <.obs: '\\Q as quotemeta;quotes or literal variable match'> }
token backslash:sym<misc> { \W {*} }
token backslash:sym<misc> { \W }

# proto token assertion { <...> }

token assertion:sym<?> { '?' [ <?before '>' > | <assertion> ] {*} }
token assertion:sym<!> { '!' [ <?before '>' > | <assertion> ] {*} }
token assertion:sym<?> { '?' [ <?before '>' > | <assertion> ] }
token assertion:sym<!> { '!' [ <?before '>' > | <assertion> ] }

token assertion:sym<method> {
'.' <assertion> {*}
'.' <assertion>
}

token assertion:sym<name> {
Expand All @@ -136,10 +126,9 @@ grammar Regex::P6Regex::Grammar;
| ':' <arglist>
| <.normspace> <nibbler>
]?
{*}
}

token assertion:sym<[> { <?before '['|'+'|'-'> <cclass_elem>+ {*} }
token assertion:sym<[> { <?before '['|'+'|'-'> <cclass_elem>+ }

token cclass_elem {
$<sign>=['+'|'-'|<?>]
Expand All @@ -153,15 +142,13 @@ grammar Regex::P6Regex::Grammar;
| $<name>=[\w+]
]
<.normspace>?
{*}
}

token mod_internal {
[
| ':' $<n>=('!' | \d+)**1 <mod_ident> »
| ':' <mod_ident> [ '(' $<n>=[\d+] ')' ]?
]
{*}
}

token mod_ident:sym<ignorecase> { $<sym>=[i] 'gnorecase'? }
Expand Down

0 comments on commit 180f18e

Please sign in to comment.