Skip to content

Commit

Permalink
[perl #131211] fixup File::Glob degenerate matching
Browse files Browse the repository at this point in the history
The old code would go quadratic with recursion and backtracking
when doing patterns like "a*a*a*a*a*a*a*x" on a file like
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".

This patch changes the code to not recurse, and to not backtrack,
as per this article from Russ Cox: https://research.swtch.com/glob

It also adds a micro-optimisation for M_ONE and M_SET under the new code.

Thanks to Avar and Russ Cox for helping with this patch, along with
Jilles Tjoelker and the rest of the FreeBSD community.
  • Loading branch information
demerphq authored and xsawyerx committed Jun 1, 2017
1 parent 1b51016 commit 0db967b
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 15 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Expand Up @@ -3948,6 +3948,7 @@ ext/File-Glob/t/basic.t See if File::Glob works
ext/File-Glob/t/case.t See if File::Glob works
ext/File-Glob/t/global.t See if File::Glob works
ext/File-Glob/t/rt114984.t See if File::Glob works
ext/File-Glob/t/rt131211.t See if File::Glob works
ext/File-Glob/t/taint.t See if File::Glob works
ext/File-Glob/t/threads.t See if File::Glob + threads works
ext/File-Glob/TODO File::Glob extension todo list
Expand Down
64 changes: 49 additions & 15 deletions ext/File-Glob/bsd_glob.c
Expand Up @@ -563,8 +563,12 @@ glob0(const Char *pattern, glob_t *pglob)
break;
case BG_STAR:
pglob->gl_flags |= GLOB_MAGCHAR;
/* collapse adjacent stars to one,
* to avoid exponential behavior
/* Collapse adjacent stars to one.
* This is required to ensure that a pattern like
* "a**" matches a name like "a", as without this
* check when the first star matched everything it would
* cause the second star to return a match fail.
* As long ** is folded here this does not happen.
*/
if (bufnext == patbuf || bufnext[-1] != M_ALL)
*bufnext++ = M_ALL;
Expand Down Expand Up @@ -909,35 +913,56 @@ globextend(const Char *path, glob_t *pglob, size_t *limitp)


/*
* pattern matching function for filenames. Each occurrence of the *
* pattern causes a recursion level.
* pattern matching function for filenames using state machine to avoid
* recursion. We maintain a "nextp" and "nextn" to allow us to backtrack
* without additional callframes, and to do cleanly prune the backtracking
* state when multiple '*' (start) matches are included in the patter.
*
* Thanks to Russ Cox for the improved state machine logic to avoid quadratic
* matching on failure.
*
* https://research.swtch.com/glob
*
* An example would be a pattern
* ("a*" x 100) . "y"
* against a file name like
* ("a" x 100) . "x"
*
*/
static int
match(Char *name, Char *pat, Char *patend, int nocase)
{
int ok, negate_range;
Char c, k;
Char *nextp = NULL;
Char *nextn = NULL;

loop:
while (pat < patend) {
c = *pat++;
switch (c & M_MASK) {
case M_ALL:
if (pat == patend)
return(1);
do
if (match(name, pat, patend, nocase))
return(1);
while (*name++ != BG_EOS)
;
return(0);
if (*name == BG_EOS)
return 0;
nextn = name + 1;
nextp = pat - 1;
break;
case M_ONE:
/* since * matches leftmost-shortest first *
* if we encounter the EOS then backtracking *
* will not help, so we can exit early here. */
if (*name++ == BG_EOS)
return(0);
return 0;
break;
case M_SET:
ok = 0;
/* since * matches leftmost-shortest first *
* if we encounter the EOS then backtracking *
* will not help, so we can exit early here. */
if ((k = *name++) == BG_EOS)
return(0);
return 0;
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != BG_EOS)
++pat;
while (((c = *pat++) & M_MASK) != M_END)
Expand All @@ -953,16 +978,25 @@ match(Char *name, Char *pat, Char *patend, int nocase)
} else if (nocase ? (tolower(c) == tolower(k)) : (c == k))
ok = 1;
if (ok == negate_range)
return(0);
goto fail;
break;
default:
k = *name++;
if (nocase ? (tolower(k) != tolower(c)) : (k != c))
return(0);
goto fail;
break;
}
}
return(*name == BG_EOS);
if (*name == BG_EOS)
return 1;

fail:
if (nextn) {
pat = nextp;
name = nextn;
goto loop;
}
return 0;
}

/* Free allocated data belonging to a glob_t structure. */
Expand Down
94 changes: 94 additions & 0 deletions ext/File-Glob/t/rt131211.t
@@ -0,0 +1,94 @@
use strict;
use warnings;
use v5.16.0;
use File::Temp 'tempdir';
use File::Spec::Functions;
use Test::More;
use Time::HiRes qw(time);

plan tests => 13;

my $path = tempdir uc cleanup => 1;
my @files= (
"x".("a" x 50)."b", # 0
"abbbbbbbbbbbbc", # 1
"abbbbbbbbbbbbd", # 2
"aaabaaaabaaaabc", # 3
"pq", # 4
"r", # 5
"rttiiiiiii", # 6
"wewewewewewe", # 7
"weeeweeeweee", # 8
"weewweewweew", # 9
"wewewewewewewewewewewewewewewewewq", # 10
"wtttttttetttttttwr", # 11
);


foreach (@files) {
open(my $f, ">", catfile $path, $_);
}

my $elapsed_fail= 0;
my $elapsed_match= 0;
my @got_files;
my @no_files;
my $count = 0;

while (++$count < 10) {
$elapsed_match -= time;
@got_files= glob catfile $path, "x".("a*" x $count) . "b";
$elapsed_match += time;

$elapsed_fail -= time;
@no_files= glob catfile $path, "x".("a*" x $count) . "c";
$elapsed_fail += time;
last if $elapsed_fail > $elapsed_match * 100;
}

is $count,10,
"tried all the patterns without bailing out";

cmp_ok $elapsed_fail/$elapsed_match,"<",2,
"time to fail less than twice the time to match";
is "@got_files", catfile($path, $files[0]),
"only got the expected file for xa*..b";
is "@no_files", "", "shouldnt have files for xa*..c";


@got_files= glob catfile $path, "a*b*b*b*bc";
is "@got_files", catfile($path, $files[1]),
"only got the expected file for a*b*b*b*bc";

@got_files= sort glob catfile $path, "a*b*b*bc";
is "@got_files", catfile($path, $files[3])." ".catfile($path,$files[1]),
"got the expected two files for a*b*b*bc";

@got_files= sort glob catfile $path, "p*";
is "@got_files", catfile($path, $files[4]),
"p* matches pq";

@got_files= sort glob catfile $path, "r*???????";
is "@got_files", catfile($path, $files[6]),
"r*??????? works as expected";

@got_files= sort glob catfile $path, "w*e*w??e";
is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (7,8)),
"w*e*w??e works as expected";

@got_files= sort glob catfile $path, "w*e*we??";
is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (7,8,9,10)),
"w*e*we?? works as expected";

@got_files= sort glob catfile $path, "w**e**w";
is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (9)),
"w**e**w works as expected";

@got_files= sort glob catfile $path, "*wee*";
is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (8,9)),
"*wee* works as expected";

@got_files= sort glob catfile $path, "we*";
is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (7,8,9,10)),
"we* works as expected";

0 comments on commit 0db967b

Please sign in to comment.