This repository has been archived by the owner on Feb 3, 2021. It is now read-only.
/
Grammar.pm
119 lines (102 loc) · 3.31 KB
/
Grammar.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
grammar Regex::P6Regex::Grammar is PCT::Grammar;
token ws { [ \s+ | '#' \N*\n ]* }
token TOP {
<nibbler>
[ <.ws> $ || <.panic: "Syntax error"> ]
{*}
}
rule nibbler {
['||'|'|'|'&&'|'&']?
<termish>
[ ['||'|'|']
[ <termish> || <.panic: "Null pattern not allowed"> ]
]*
{*}
}
token termish {
<.ws>
<noun=quantified_atom>+
{*}
}
token quantified_atom {
<atom>
<.ws>
[ <quantifier> <.ws> ]?
{*}
}
token atom {
:dba('regex atom')
[
| \w [ \w+! <?before \w> ]?
| <metachar>
]
{*}
}
# proto token quantifier { <...> }
token quantifier:sym<*> { $<sym>=['*'] <quantmod> {*} }
token quantifier:sym<+> { $<sym>=['+'] <quantmod> {*} }
token quantifier:sym<?> { $<sym>=['?'] <quantmod> {*} }
token quantifier:sym<**> {
$<sym>=['**'] <quantmod>
[
| $<min>=[\d+] [ '..' $<max>=[\d+|'*'] ]?
]
{*}
}
token quantmod { ':'? [ '?' | '!' | '+' ]? {*} }
# proto token metachar { <...> }
token metachar:sym<[ ]> { '[' <nibbler> ']' {*} }
token metachar:sym<.> { $<sym>=['.'] {*} }
token metachar:sym<^> { $<sym>=['^'] {*} }
token metachar:sym<^^> { $<sym>=['^^'] {*} }
token metachar:sym<$> { $<sym>=['$'] {*} }
token metachar:sym<$$> { $<sym>=['$$'] {*} }
token metachar:sym<lwb> { $<sym>=['<<'|'«'] {*} }
token metachar:sym<rwb> { $<sym>=['>>'|'»'] {*} }
token metachar:sym<bs> { \\ <backslash> {*} }
token metachar:sym<assert> {
'<' <assertion>
[ '>' || <.panic: "regex assertion not terminated by angle bracket"> ]
{*}
}
# proto token backslash { <...> }
token backslash:sym<w> { $<sym>=[<[dswnDSWN]>] {*} }
token backslash:sym<b> { $<sym>=[<[bB]>] {*} }
token backslash:sym<e> { $<sym>=[<[eE]>] {*} }
token backslash:sym<f> { $<sym>=[<[fF]>] {*} }
token backslash:sym<h> { $<sym>=[<[hH]>] {*} }
token backslash:sym<r> { $<sym>=[<[rR]>] {*} }
token backslash:sym<t> { $<sym>=[<[tT]>] {*} }
token backslash:sym<v> { $<sym>=[<[vV]>] {*} }
token backslash:sym<A> { 'A' <.obs: '\\A as beginning-of-string matcher;^'> }
token backslash:sym<z> { 'z' <.obs: '\\z as end-of-string matcher;$'> }
token backslash:sym<Z> { 'Z' <.obs: '\\Z as end-of-string matcher;\\n?$'> }
token backslash:sym<Q> { 'Q' <.obs: '\\Q as quotemeta;quotes or literal variable match'> }
token backslash:sym<misc> { \W {*} }
# proto token assertion { <...> }
token assertion:sym<?> { '?' [ <?before '>' > | <assertion> ] }
token assertion:sym<!> { '!' [ <?before '>' > | <assertion> ] }
token assertion:sym<method> {
'.' <assertion>
}
token assertion:sym<name> {
$<longname>=[\w+]
[
| <?before '>'>
| '=' <assertion>
]?
{*}
}
token assertion:sym<[> { <?before '['|'+'|'-'> <cclass_elem>+ {*} }
token cclass_elem {
$<sign>=['+'|'-'|<?>]
[
| '[' $<charspec>=(
| '-' <.obs: "hyphen in enumerated character class;..">
| [ \\ (.) | (<-[\]]>) ] [ '..' (.) ]?
)*
']'
| $<name>=[\w+]
]
{*}
}