Skip to content

Commit

Permalink
Deprecate literal unescaped "{" in regexes.
Browse files Browse the repository at this point in the history
We are deprecating literal left braces in regular expressions.  The 5.16
delta announced that this is coming.

This commit causes a warning to be raised when a literal "{" is
encountered.  However, it does not do this if the left brace is at the
beginning of a construct.  Such a brace does not cause problems for us
for our future use of it for other purposes, as, for example in things
like \b{w}, and there were a large number of core tests that failed
without this condition.

I didn't mention this exception in the diagnostic.  We may choose to
forbid it everywhere, and we certainly want to discourage its use
everywhere.  But this commit gets the essential components in early in
5.17, and we can tighten it up later if we decide to.
  • Loading branch information
Karl Williamson committed May 25, 2012
1 parent 9a54da5 commit 2a53d33
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 36 deletions.
6 changes: 3 additions & 3 deletions lib/diagnostics.pm
Expand Up @@ -186,7 +186,7 @@ use 5.009001;
use Carp;
$Carp::Internal{__PACKAGE__.""}++;

our $VERSION = '1.28';
our $VERSION = '1.29';
our $DEBUG;
our $VERBOSE;
our $PRETTY;
Expand Down Expand Up @@ -435,11 +435,11 @@ my %msg;
}
my $lhs = join( '', @toks );
$transfmt{$header}{pat} =
" s{^$lhs}\n {\Q$header\E}s\n\t&& return 1;\n";
" s^$lhs\Q$header\Es\n\t&& return 1;\n";
$transfmt{$header}{len} = $conlen;
} else {
$transfmt{$header}{pat} =
" m{^\Q$header\E} && return 1;\n";
" m^\Q$header\E && return 1;\n";
$transfmt{$header}{len} = length( $header );
}

Expand Down
11 changes: 11 additions & 0 deletions pod/perldiag.pod
Expand Up @@ -4928,6 +4928,17 @@ C<undef *foo>.
(A) You've accidentally run your script through B<csh> instead of Perl.
Check the #! line, or manually feed your script into Perl yourself.

=item Unescaped left brace in regex is deprecated, passed through

(D) You used a literal C<"{"> character in a regular expression pattern.
You should change to use C<"\{"> instead, because a future version of
Perl (tentatively v5.20) will consider this to be a syntax error. If
the pattern delimiters are also braces, any matching right brace
(C<"}">) should also be escaped to avoid confusing the parser, for
example,

qr{abc\{def\}ghi}

=item unexec of %s into %s failed!

(F) The unexec() routine failed for some reason. See your local FSF
Expand Down
29 changes: 12 additions & 17 deletions regcomp.c
Expand Up @@ -9077,12 +9077,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
vFAIL("Internal urp");
/* Supposed to be caught earlier. */
break;
case '{':
if (!regcurly(RExC_parse)) {
RExC_parse++;
goto defchar;
}
/* FALL THROUGH */
case '?':
case '+':
case '*':
Expand Down Expand Up @@ -9208,9 +9202,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
ret = reg_node(pRExC_state, op);
FLAGS(ret) = get_regex_charset(RExC_flags);
*flagp |= SIMPLE;
if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') {
ckWARNregdep(RExC_parse, "\"\\b{\" is deprecated; use \"\\b\\{\" instead");
}
goto finish_meta_pat;
case 'B':
RExC_seen_zerolen++;
Expand All @@ -9235,9 +9226,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
ret = reg_node(pRExC_state, op);
FLAGS(ret) = get_regex_charset(RExC_flags);
*flagp |= SIMPLE;
if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') {
ckWARNregdep(RExC_parse, "\"\\B{\" is deprecated; use \"\\B\\{\" instead");
}
goto finish_meta_pat;
case 's':
switch (get_regex_charset(RExC_flags)) {
Expand Down Expand Up @@ -9744,15 +9732,22 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
/* FALL THROUGH */
default:
if (!SIZE_ONLY&& isALPHA(*p)) {
/* Include any { following the alpha to emphasize
* that it could be part of an escape at some point
* in the future */
int len = (*(p + 1) == '{') ? 2 : 1;
ckWARN3reg(p + len, "Unrecognized escape \\%.*s passed through", len, p);
ckWARN2reg(p + 1, "Unrecognized escape \\%.1s passed through", p);
}
goto normal_default;
}
break;
case '{':
/* Currently we don't warn when the lbrace is at the start
* of a construct. This catches it in the middle of a
* literal string, or when its the first thing after
* something like "\b" */
if (! SIZE_ONLY
&& (len || (p > RExC_start && isALPHA_A(*(p -1)))))
{
ckWARNregdep(p + 1, "Unescaped left brace in regex is deprecated, passed through");
}
/*FALLTHROUGH*/
default:
normal_default:
if (UTF8_IS_START(*p) && UTF) {
Expand Down
6 changes: 3 additions & 3 deletions t/comp/parser.t
Expand Up @@ -125,11 +125,11 @@ is( $@, '', 'PL_lex_brackstack' );
is("${a}[", "A[", "interpolation, qq//");
my @b=("B");
is("@{b}{", "B{", "interpolation, qq//");
is(qr/${a}{/, '(?^:A{)', "interpolation, qr//");
is(qr/${a}\{/, '(?^:A\{)', "interpolation, qr//");
my $c = "A{";
$c =~ /${a}{/;
$c =~ /${a}\{/;
is($&, 'A{', "interpolation, m//");
$c =~ s/${a}{/foo/;
$c =~ s/${a}\{/foo/;
is($c, 'foo', "interpolation, s/...//");
$c =~ s/foo/${a}{/;
is($c, 'A{', "interpolation, s//.../");
Expand Down
4 changes: 2 additions & 2 deletions t/io/open.t
Expand Up @@ -267,7 +267,7 @@ SKIP: {

open($fh1{k}, "TEST");
gimme($fh1{k});
like($@, qr/<\$fh1{...}> line 1\./, "autoviv fh package helem");
like($@, qr/<\$fh1\{...}> line 1\./, "autoviv fh package helem");

my @fh2;
open($fh2[0], "TEST");
Expand All @@ -277,7 +277,7 @@ SKIP: {
my %fh3;
open($fh3{k}, "TEST");
gimme($fh3{k});
like($@, qr/<\$fh3{...}> line 1\./, "autoviv fh lexical helem");
like($@, qr/<\$fh3\{...}> line 1\./, "autoviv fh lexical helem");
}

SKIP: {
Expand Down
10 changes: 7 additions & 3 deletions t/lib/warnings/regcomp
Expand Up @@ -57,20 +57,24 @@ Unrecognized escape \m passed through in regex; marked by <-- HERE in m/a\m <--
use warnings 'regexp';
"foo" =~ /\q/;
"foo" =~ /\q{/;
"foo" =~ /\w{/;
"foo" =~ /a\b{cde/;
"foo" =~ /a\B{cde/;
"bar" =~ /\_/;
no warnings 'regexp';
"foo" =~ /\q/;
"foo" =~ /\q{/;
"foo" =~ /\w{/;
"foo" =~ /a\b{cde/;
"foo" =~ /a\B{cde/;
"bar" =~ /\_/;
EXPECT
Unrecognized escape \q passed through in regex; marked by <-- HERE in m/\q <-- HERE / at - line 4.
Unrecognized escape \q{ passed through in regex; marked by <-- HERE in m/\q{ <-- HERE / at - line 5.
"\b{" is deprecated; use "\b\{" instead in regex; marked by <-- HERE in m/a\ <-- HERE b{cde/ at - line 6.
"\B{" is deprecated; use "\B\{" instead in regex; marked by <-- HERE in m/a\ <-- HERE B{cde/ at - line 7.
Unrecognized escape \q passed through in regex; marked by <-- HERE in m/\q <-- HERE {/ at - line 5.
Unescaped left brace in regex is deprecated, passed through in regex; marked by <-- HERE in m/\q{ <-- HERE / at - line 5.
Unescaped left brace in regex is deprecated, passed through in regex; marked by <-- HERE in m/\w{ <-- HERE / at - line 6.
Unescaped left brace in regex is deprecated, passed through in regex; marked by <-- HERE in m/a\b{ <-- HERE cde/ at - line 7.
Unescaped left brace in regex is deprecated, passed through in regex; marked by <-- HERE in m/a\B{ <-- HERE cde/ at - line 8.
########
# regcomp.c [S_regpposixcc S_checkposixcc]
#
Expand Down
12 changes: 6 additions & 6 deletions t/op/taint.t
Expand Up @@ -152,7 +152,7 @@ my $TEST = 'TEST';
while (my $v = $vars[0]) {
local $ENV{$v} = $TAINT;
last if eval { `$echo 1` };
last unless $@ =~ /^Insecure \$ENV{$v}/;
last unless $@ =~ /^Insecure \$ENV\{$v}/;
shift @vars;
}
is("@vars", "");
Expand All @@ -163,7 +163,7 @@ my $TEST = 'TEST';
is(eval { `$echo 1` }, "1\n");
$ENV{TERM} = 'e=mc2' . $TAINT;
is(eval { `$echo 1` }, undef);
like($@, qr/^Insecure \$ENV{TERM}/);
like($@, qr/^Insecure \$ENV\{TERM}/);
}

my $tmp;
Expand All @@ -182,22 +182,22 @@ my $TEST = 'TEST';

local $ENV{PATH} = $tmp;
is(eval { `$echo 1` }, undef);
like($@, qr/^Insecure directory in \$ENV{PATH}/);
like($@, qr/^Insecure directory in \$ENV\{PATH}/);
}

SKIP: {
skip "This is not VMS", 4 unless $Is_VMS;

$ENV{'DCL$PATH'} = $TAINT;
is(eval { `$echo 1` }, undef);
like($@, qr/^Insecure \$ENV{DCL\$PATH}/);
like($@, qr/^Insecure \$ENV\{DCL\$PATH}/);
SKIP: {
skip q[can't find world-writeable directory to test DCL$PATH], 2
unless $tmp;

$ENV{'DCL$PATH'} = $tmp;
is(eval { `$echo 1` }, undef);
like($@, qr/^Insecure directory in \$ENV{DCL\$PATH}/);
like($@, qr/^Insecure directory in \$ENV\{DCL\$PATH}/);
}
$ENV{'DCL$PATH'} = '';
}
Expand Down Expand Up @@ -2112,7 +2112,7 @@ end
ok("A" =~ /\p{$prop}/, "user-defined property: non-tainted case");
$prop = "IsA$TAINT";
eval { "A" =~ /\p{$prop}/};
like($@, qr/Insecure user-defined property \\p{main::IsA}/,
like($@, qr/Insecure user-defined property \\p\{main::IsA}/,
"user-defined property: tainted case");
}

Expand Down
2 changes: 1 addition & 1 deletion t/re/pat.t
Expand Up @@ -152,7 +152,7 @@ sub run_tests {

{
$_ = 'now is the {time for all} good men to come to.';
/ {([^}]*)}/;
/ \{([^}]*)}/;
is($1, 'time for all', "Match braces");
}

Expand Down
3 changes: 2 additions & 1 deletion t/re/pat_advanced.t
Expand Up @@ -986,7 +986,7 @@ sub run_tests {
my $w;
local $SIG {__WARN__} = sub {$w .= "@_"};
eval 'q(xxWxx) =~ /[\N{WARN}]/';
ok $w && $w =~ /Using just the first character returned by \\N{} in character class/,
ok $w && $w =~ /Using just the first character returned by \\N\{} in character class/,
"single character in [\\N{}] warning";

undef $w;
Expand Down Expand Up @@ -1137,6 +1137,7 @@ sub run_tests {

{
# \, breaks {3,4}
no warnings qw{deprecated regexp};
ok "xaaay" !~ /xa{3\,4}y/, '\, in a pattern';
ok "xa{3,4}y" =~ /xa{3\,4}y/, '\, in a pattern';

Expand Down

0 comments on commit 2a53d33

Please sign in to comment.