Skip to content

Commit

Permalink
Fix PCRE error -8 for hotstrings such as ::a::Send % SubStr("a", 1)
Browse files Browse the repository at this point in the history
  • Loading branch information
Lexikos committed Jan 20, 2024
1 parent c815be3 commit 3c20468
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 12 deletions.
2 changes: 1 addition & 1 deletion inc/identify_regex.ahk
@@ -1,4 +1,4 @@
get_identify_regex() => '
(
(?(DEFINE)(?<line_comment>(?<![^ `t`r`n]);.*)(?<block_comment>(?m:^[ `t]*/\*(?:.*\R?)+?(?:[ `t]*\*/|.*\Z)))(?<eol>(?=[ `t]*+(?&line_comment)?(?m:$)))(?<tosol>(?:(?&eol).*\R|(?&block_comment))++)(?<toeol>(?:[^ `t`r`n]++|[ `t]*+(?!(?&eol)))*+)(?<contsec>[ `t]*+\((?i:Join[^ `t`r`n]*+|(?&line_comment)|[^ `t`r`n()]++|[ `t]++)*+\R(?:[ `t]*+(?!\)).*\R)*+[ `t]*+\))(?<solcont>[ `t]*+(?:,(?!::| +& )|[<>=/|^,?:\.+\-*&!~](?![^"'`r`n]*?(?:".*?::(?!.*?")|'.*?::(?!.*?')|::))|(?i:AND|OR)(?=[ `t])))(?<eolcont>(?&eol)(?:(?<ec_bad>(?<=:=)|(?<=[:,]))|(?<=[<>=/|^,?:\.+\-*&!~](?<!\+\+|--))|(?<=(?<![\w[:^ascii:]\.])(?i:OR|IS|AS|IN))|(?<=(?<![\w[:^ascii:]\.])(?i:AND|NOT))|(?<=(?<![\w[:^ascii:]\.])(?i:CONTAINS)))(?&tosol)(?:(?&contsec)|(?(ec_bad)|(*:v2-cle))))(?<v1_cont>(?&tosol)(?:(?&solcont)(?&subexp)|[ `t]*+,[ `t]*+(?=%)(?&pct)|(?&contsec)(?&ambig)))(?<v1_fin>(?:.*+(?&v1_cont))*.*+)(?<ambig>(?:(?&exp)|(?&v1_cont)|.*+)++(*:~))(?<pct>(?=%[ `t])(?:(?&subexp)(?&exp)|(?&v1_fin)(*:v1-pct)))(?<expm>(*:exp)(?&exp))(?<v1_lines>(?&toeol)(?:(?&tosol)(?:(?&solcont)|(?&contsec))(?&v1_lines))?)(?<otb>(?<![<>=/|^,?:\.*&!~])(?<!(?<!\+)\+)(?<!(?<!\-)\-)[ `t]*+\{(?&eol))(?<enclf>\R(?:(?&contsec)|(?!(?&solcont))(*:v2-cbe)|))(?<encex>(?:[, `t]++|(?&enclf)|(?&subexp)|(?&line_comment))*+)(?<v2_exm>%(?:[^,`r`n;\[\]{}()"%']*+|,(?![ `t]*+%)|(?&subexp))*+%(*:v2-pct)|=>(*:v2-fat))(?<subexp>(?:(?!(?&otb))(?&eolcont)?[ `t]*+(?:[^ `t;,`r`n=\[\]{}()"%']++|\((?&encex)\)|\[(?&encex)\]|\{(?&encex)\}|(?>"(?>[^"``\r\n]|``["'``])*+"|'(?>[^'``\r\n]|``["'``])*+'(*:v2-sq))|'(?&tosol)(?&contsec)'(*:v2-sq)|(?<!\.)%[\w[:^ascii:]]++%|(?&v2_exm)|=|(?&v1_cont)))++)(?<exp>(?:(?&subexp)|[ `t]*+,|(?&eol))++(?&otb)?))(?:[ `t]*+(?&line_comment)(*SKIP)(?!)|(?m:^)[ `t{}]*(?:(?m:^[ `t]*/\*(?:.*\R?)+?(?:[ `t]*\*/|.*\Z))(*SKIP)(?!)|(?:[<>*~$!^+#]*(?>\w+|[^ `t`r`n])|~?(?>\w+|[^ `t`r`n]) & ~?(?>\w+|[^ `t`r`n]))(?i:[ `t]+up)?::(?:[<>*~$!^+#]*(?>\w+|[^ `t`r`n])(?&eol)(*:remap?)|(?&eol)(?!(?&tosol)[ `t]*+(?:[\{#]|.*?::|[\w[:^ascii:]]++\())(*:v1-hk)|(*:hotkey))|(?(?=:[^\:`r`n]*[xX]):[[:alnum:]\?\*\- ]*:([^```r`n]+|``.)*?::|:[[:alnum:]\?\*\- ]*:([^```r`n]+|``.)*?::(?:(?&v1_cont)|.*))(*:hotstring)|[\w[:^ascii:]]++:(?=[ `t]*+(?:(?<![^ `t`r`n]);.*)?(?m:$))(*:label)|[^ ,```r`n]+(?<!:):(?=[ `t]*+(?:(?<![^ `t`r`n]);.*)?(?m:$))(*:v1-lbl)|#(?:\w+,|(?i:NoEnv|If|CommentFlag|Delimiter|DerefChar|EscapeChar))(?&v1_fin)(*:v1-dir)|#(?i:HotIf)(*:v2-dir)(?&exp)?|#(?i:Include(?:Again)?)[ `t]+(?&v1_fin)(*:dir)|#\w+(?![^ `t`r`n])(?&ambig)(*:dir?)|(?<=[{}])(*SKIP)(?!))|[ `t]*+(?:(?!(?:[\w[:^ascii:]<>=/|^,?:\.+\-*&!~ `t()\[\]{}%]++|(?>"(?>[^"``\r\n]|``["'``])*+"|'(?>[^'``\r\n]|``["'``])*+'(*:v2-sq))|['"].*)*+(?=[ `t]*+(?:(?<![^ `t`r`n]);.*)?(?m:$)))(?&v1_fin)(*:v1-char)|(?i:else|try|finally)(?![^ `t`r`n])[ `t]*+\{?(*SKIP)(?!)|(?i:return|for|while|until|throw|switch)[ `t]++(?&expm)|(?i:if|while|return|until|loop|goto)(?=\()(?&expm)|(?i:local|global|static)(?![\w[:^ascii:]#@$])(?:[ `t]++[\w[:^ascii:]]++(?:(?=\()(?&exp)(*:v2-kw)|[ `t]*+\{(?&eol)(*:v2-kw)|(?![ `t]*+=)(?&expm))|(?&eol)(*:assume)|(?&v1_fin)(*:v1-kw))|(?i:if)[ `t]++(?:(?>[\w[:^ascii:]#@$]++|%[\w[:^ascii:]#@$]++%)++(?:[ `t]++(?i:not[ `t]++)?(?i:in|contains|between)[ `t]++(?&v1_fin)(*:v1-if)|[ `t]*+(?:[<>]=?|!?=)(?&ambig))|(?&expm))|[\w[:^ascii:]]++(?:[ `t]*+=(?:>(?&ambig)|.*?\?.+?:.*(?&ambig)|(?&v1_fin)(*:v1-ass))|[\(\[](?=.*[\)\]][ `t`r`n]*\{)(?:[ `t]*+[\w[:^ascii:]]++[ `t]*+(?::=(?&subexp))?[ `t]*+,)*+[ `t]*+(?:(?i:ByRef)[ `t]++[\w[:^ascii:]](*:v1-ref)|&(*:v2-ref)|[\w[:^ascii:]]++[ `t]*+=(*:v1-def)|\*[ `t]*+[\)\]](*:v2-vfn)).*|(?=[\(\[\.\?]|[ `t]*+(?>[\:\+\-\*/\.\|&\^]|<<|>>|//)=)(?&expm)|,(?&v1_fin)(*:v1-cmd)|(?&eol)(?&ambig)|[ `t]++(?:[ `t]*+(?:\^|(?:(?!\{)[\w[:^ascii:]<>=/|^,?:\.+\-*&!~ `t()\[\]{}%]|(?>"(?>[^"``\r\n]|``["'``])*+"|'(?>[^'``\r\n]|``["'``])*+'(*:v2-sq)))*+\{[ `t]*+(?:\w+|.)(?:[ `t]++\w+)?[ `t]*+\})(?&v1_fin)(*:v1-send)|(?:[^`r`n,\[\]{}()"%']*+,[ `t]*+)*+(?&pct)|(?&ambig)(*:cmd?)))|(?:\+\+|--)(?&expm)|.(?&ambig)(*:!!)))
(?(DEFINE)(?<line_comment>(?<![^ `t`r`n]);.*)(?<block_comment>(?m:^[ `t]*/\*(?:.*\R?)+?(?:[ `t]*\*/|.*\Z)))(?<eol>(?=[ `t]*+(?&line_comment)?(?m:$)))(?<tosol>(?:(?&eol).*\R|(?&block_comment))++)(?<toeol>(?:[^ `t`r`n]++|[ `t]*+(?!(?&eol)))*+)(?<contsec>[ `t]*+\((?i:Join[^ `t`r`n]*+|(?&line_comment)|[^ `t`r`n()]++|[ `t]++)*+\R(?:[ `t]*+(?!\)).*\R)*+[ `t]*+\))(?<solcont>[ `t]*+(?:,(?!::| +& )|[<>=/|^,?:\.+\-*&!~](?![^"'`r`n]*?(?:".*?::(?!.*?")|'.*?::(?!.*?')|::))|(?i:AND|OR)(?=[ `t])))(?<eolcont>(?&eol)(?:(?<ec_bad>(?<=:=)|(?<=[:,]))|(?<=[<>=/|^,?:\.+\-*&!~](?<!\+\+|--))|(?<=(?<![\w[:^ascii:]\.])(?i:OR|IS|AS|IN))|(?<=(?<![\w[:^ascii:]\.])(?i:AND|NOT))|(?<=(?<![\w[:^ascii:]\.])(?i:CONTAINS)))(?&tosol)(?:(?&contsec)|(?(ec_bad)|(*:v2-cle))))(?<v1_cont>(?&tosol)(?:(?&solcont)(?&subexp)|[ `t]*+,[ `t]*+(?=%)(?&pct)|(?&contsec)(?&ambig)))(?<v1_fin>(?:.*+(?&v1_cont))*.*+)(?<ambig>(?:(?&exp)|(?&v1_cont)|.*+)++(*:~))(?<pct>(?=%[ `t])(?:(?&subexp)(?&exp)|(?&v1_fin)(*:v1-pct)))(?<expm>(*:exp)(?&exp))(?<v1_lines>(?&toeol)(?:(?&tosol)(?:(?&solcont)|(?&contsec))(?&v1_lines))?)(?<otb>(?<![<>=/|^,?:\.*&!~])(?<!(?<!\+)\+)(?<!(?<!\-)\-)[ `t]*+\{(?&eol))(?<enclf>\R(?:(?&contsec)|(?!(?&solcont))(*:v2-cbe)|))(?<encex>(?:[, `t]++|(?&enclf)|(?&subexp)|(?&line_comment))*+)(?<v2_exm>%(?:[^,`r`n;\[\]{}()"%']*+|,(?![ `t]*+%)|(?&subexp))*+%(*:v2-pct)|=>(*:v2-fat))(?<subexp>(?:(?!(?&otb))(?&eolcont)?[ `t]*+(?:[^ `t;,`r`n=\[\]{}()"%']++|\((?&encex)\)|\[(?&encex)\]|\{(?&encex)\}|(?>"(?>[^"``\r\n]|``["'``])*+"|'(?>[^'``\r\n]|``["'``])*+'(*:v2-sq))|'(?&tosol)(?&contsec)'(*:v2-sq)|(?<!\.)%[\w[:^ascii:]]++%|(?&v2_exm)|=|(?&v1_cont)))++)(?<exp>(?:(?&subexp)|[ `t]*+,|(?&eol))++(?&otb)?))(?:[ `t]*+(?&line_comment)(*SKIP)(?!)|(?m:^)[ `t{}]*(?:(?m:^[ `t]*/\*(?:.*\R?)+?(?:[ `t]*\*/|.*\Z))(*SKIP)(?!)|(?:[<>*~$!^+#]*(?>\w+|[^ `t`r`n])|~?(?>\w+|[^ `t`r`n]) & ~?(?>\w+|[^ `t`r`n]))(?i:[ `t]+up)?::(?:[<>*~$!^+#]*(?>\w+|[^ `t`r`n])(?&eol)(*:remap?)|(?&eol)(?!(?&tosol)[ `t]*+(?:[\{#]|.*?::|[\w[:^ascii:]]++\())(*:v1-hk)|(*:hotkey))|(?(?=:[^\:`r`n]*[xX]):[[:alnum:]\?\*\- ]*:([^```r`n:]++|``.|:(?!:))*?::|:[[:alnum:]\?\*\- ]*:([^```r`n:]++|``.|:(?!:))*?::(?:(?&v1_cont)|.*))(*:hotstring)|[\w[:^ascii:]]++:(?=[ `t]*+(?:(?<![^ `t`r`n]);.*)?(?m:$))(*:label)|[^ ,```r`n]+(?<!:):(?=[ `t]*+(?:(?<![^ `t`r`n]);.*)?(?m:$))(*:v1-lbl)|#(?:\w+,|(?i:NoEnv|If|CommentFlag|Delimiter|DerefChar|EscapeChar))(?&v1_fin)(*:v1-dir)|#(?i:HotIf)(*:v2-dir)(?&exp)?|#(?i:Include(?:Again)?)[ `t]+(?&v1_fin)(*:dir)|#\w+(?![^ `t`r`n])(?&ambig)(*:dir?)|(?<=[{}])(*SKIP)(?!))|[ `t]*+(?:(?!(?:[\w[:^ascii:]<>=/|^,?:\.+\-*&!~ `t()\[\]{}%]++|(?>"(?>[^"``\r\n]|``["'``])*+"|'(?>[^'``\r\n]|``["'``])*+'(*:v2-sq))|['"].*)*+(?=[ `t]*+(?:(?<![^ `t`r`n]);.*)?(?m:$)))(?&v1_fin)(*:v1-char)|(?i:else|try|finally)(?![^ `t`r`n])[ `t]*+\{?(*SKIP)(?!)|(?i:return|for|while|until|throw|switch)[ `t]++(?&expm)|(?i:if|while|return|until|loop|goto)(?=\()(?&expm)|(?i:local|global|static)(?![\w[:^ascii:]#@$])(?:[ `t]++[\w[:^ascii:]]++(?:(?=\()(?&exp)(*:v2-kw)|[ `t]*+\{(?&eol)(*:v2-kw)|(?![ `t]*+=)(?&expm))|(?&eol)(*:assume)|(?&v1_fin)(*:v1-kw))|(?i:if)[ `t]++(?:(?>[\w[:^ascii:]#@$]++|%[\w[:^ascii:]#@$]++%)++(?:[ `t]++(?i:not[ `t]++)?(?i:in|contains|between)[ `t]++(?&v1_fin)(*:v1-if)|[ `t]*+(?:[<>]=?|!?=)(?&ambig))|(?&expm))|[\w[:^ascii:]]++(?:[ `t]*+=(?:>(?&ambig)|.*?\?.+?:.*(?&ambig)|(?&v1_fin)(*:v1-ass))|[\(\[](?=.*[\)\]][ `t`r`n]*\{)(?:[ `t]*+[\w[:^ascii:]]++[ `t]*+(?::=(?&subexp))?[ `t]*+,)*+[ `t]*+(?:(?i:ByRef)[ `t]++[\w[:^ascii:]](*:v1-ref)|&(*:v2-ref)|[\w[:^ascii:]]++[ `t]*+=(*:v1-def)|\*[ `t]*+[\)\]](*:v2-vfn)).*|(?=[\(\[\.\?]|[ `t]*+(?>[\:\+\-\*/\.\|&\^]|<<|>>|//)=)(?&expm)|,(?&v1_fin)(*:v1-cmd)|(?&eol)(?&ambig)|[ `t]++(?:[ `t]*+(?:\^|(?:(?!\{)[\w[:^ascii:]<>=/|^,?:\.+\-*&!~ `t()\[\]{}%]|(?>"(?>[^"``\r\n]|``["'``])*+"|'(?>[^'``\r\n]|``["'``])*+'(*:v2-sq)))*+\{[ `t]*+(?:\w+|.)(?:[ `t]++\w+)?[ `t]*+\})(?&v1_fin)(*:v1-send)|(?:[^`r`n,\[\]{}()"%']*+,[ `t]*+)*+(?&pct)|(?&ambig)(*:cmd?)))|(?:\+\+|--)(?&expm)|.(?&ambig)(*:!!)))
)'
6 changes: 3 additions & 3 deletions tools/identify-build.ahk
Expand Up @@ -57,14 +57,14 @@ eol := '(?=' ws0 '(?:' line_comment ')?(?m:$))'
; interpreted as continuation, but instead as a hotstring even with invalid options.
; This doesn't take into account that v2 takes the first unescaped :: while v1 takes
; the last :: (except that in a sequence of 3+ colons, it ignores the last : if odd).
hs_label := ':[[:alnum:]\?\*\- ]*:([^```r`n]+|``.)*?::'
hs_label := ':[[:alnum:]\?\*\- ]*:([^```r`n:]++|``.|:(?!:))*?::'
hs_label_is_x := ':[^\:`r`n]*[xX]'
hs_label_or_autoreplace(r) => iif('?=' hs_label_is_x, hs_label, hs_label '(?:' r ')')
;#region tests
assert_match hs_label, ':*:btw::by the way', ':*:btw::'
assert_match hs_label, ':B0*:abbrev::iation', ':B0*:abbrev::'
assert_match hs_label, '::foo`:::bar', '::foo:::'
assert_match hs_label, '::foo:::bar', '::foo:::'
assert_match hs_label, '::foo``:::bar', '::foo``:::'
assert_match hs_label, '::foo:::bar', '::foo::'
assert_match hs_label, '::a````::b', '::a````::'
assert_match 'm)^' hs_label_or_autoreplace('.*'), '
(
Expand Down
24 changes: 16 additions & 8 deletions tools/identify-tests.ahk
Expand Up @@ -232,6 +232,10 @@ tests := [
]},
{v: 0, t: ['::x::`n(`n``:``*``:EntryShortcutHere``:``:EntryHere`n)']},
{v: 0, t: ['::a````::b']},
{v: 0, t: ['::b::Send % SubStr("a", 1)']}, ; this is autoreplace
{v: 0, t: ['::b::Send SubStr("a", 1)']},
{v: 1, t: [':x:b::Send % SubStr("a", 1)']},
{v: 0, t: ['::x::Send SubStr("a", 1)']},
]

run_tests
Expand Down Expand Up @@ -266,16 +270,20 @@ run_tests() {
test_v(test, v) {
if test.HasProp('t')
for str in test.t {
for m in matches(str, classification_regex) {
if v && InStr(m.Mark, v)
continue 2
else if SubStr(m.Mark,1,1) = 'v' {
test_failed('expected v' v ', got ' m.Mark, str)
continue 2
try {
for m in matches(str, classification_regex) {
if v && InStr(m.Mark, v)
continue 2
else if SubStr(m.Mark,1,1) = 'v' {
test_failed('expected v' v ', got ' m.Mark, str)
continue 2
}
}
if v
test_failed('expected v' v, str)
}
if v
test_failed('expected v' v, str)
catch as e
test_failed(type(e) ': ' e.Message ' ' e.Extra, str)
}
if test.HasProp('f')
for str in test.f {
Expand Down

0 comments on commit 3c20468

Please sign in to comment.