Skip to content

Commit

Permalink
Merge pull request #1040 from blackwhale/regex-bug9211
Browse files Browse the repository at this point in the history
Regex bugfix (issue 9211). I assume FreeBSD's failure is unrelated.
  • Loading branch information
andralex committed Dec 29, 2012
2 parents 901500b + 15879f4 commit efb7a7b
Showing 1 changed file with 121 additions and 108 deletions.
229 changes: 121 additions & 108 deletions std/regex.d
Expand Up @@ -576,12 +576,14 @@ static assert(Bytecode.sizeof == 4);
{
put(sink,`"`);
int i=0;
do{
do
{
put(sink,cast(char[])([cast(dchar)irb[i].data]));
++i;
} while(i<irb.length && irb[i].code==IR.Char);
put(sink,"\"");
if (pc<i){
if(pc<i)
{
put(sink,"\n");
for (int ii=indent+pc+1;ii>0;++ii)
put(sink,"=");
Expand Down Expand Up @@ -2163,7 +2165,7 @@ bool startOfLine(dchar back, bool seenNl)
}

//Test if bytecode starting at pc in program 're' can match given codepoint
//Returns: length of matched atom if test is positive, 0 - can't tell, -1 if doesn't match
//Returns: 0 - can't tell, -1 if doesn't match
int quickTestFwd(RegEx)(uint pc, dchar front, const ref RegEx re)
{
static assert(IRL!(IR.OrChar) == 1);//used in code processing IR.OrChar
Expand Down Expand Up @@ -2199,7 +2201,7 @@ int quickTestFwd(RegEx)(uint pc, dchar front, const ref RegEx re)
break;
case IR.Trie:
if(re.tries[re.ir[pc].data][front])
return IRL!(IR.Trie);
return 0;
else
return -1;
default:
Expand Down Expand Up @@ -3042,7 +3044,8 @@ struct StreamTester(Char)
refStream=Input!(Char)(input,splits);
stream=new StreamCBuf!(Char)();
pos=0;
if (splits.length) {
if (splits.length)
{
stream.addChunk(allStr);
stream.hasEnd=true;
}
Expand Down Expand Up @@ -5042,7 +5045,8 @@ enum OneShot { Fwd, Bwd };
if(!searchFn())
break;
}
else if(!next()){
else if(!next())
{
if (!atEnd) return false;
exhausted = true;
break;
Expand Down Expand Up @@ -5097,7 +5101,7 @@ enum OneShot { Fwd, Bwd };
void eval(bool withInput)(Thread!DataIndex* t, Group!DataIndex[] matches)
{
ThreadList!DataIndex worklist;
debug(fred_matching) writeln("Evaluating thread");
debug(fred_matching) writeln("---- Evaluating thread");
for(;;)
{
debug(fred_matching)
Expand Down Expand Up @@ -5312,12 +5316,7 @@ enum OneShot { Fwd, Bwd };
static if(withInput)
{
int test = quickTestFwd(pc1, front, re);
if(test > 0)
{
nlist.insertBack(fork(t, pc1 + test, t.counter));
t.pc = pc2;
}
else if(test == 0)
if(test >= 0)
{
worklist.insertFront(fork(t, pc2, t.counter));
t.pc = pc1;
Expand Down Expand Up @@ -5479,90 +5478,89 @@ enum OneShot { Fwd, Bwd };
case IR.Nop:
t.pc += IRL!(IR.Nop);
break;
static if(withInput)
{
case IR.OrChar:
uint len = re.ir[t.pc].sequence;
uint end = t.pc + len;
static assert(IRL!(IR.OrChar) == 1);
for(; t.pc<end; t.pc++)
if(re.ir[t.pc].data == front)
break;
if(t.pc != end)
{
t.pc = end;
nlist.insertBack(t);
}
else
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
case IR.Char:
if(front == re.ir[t.pc].data)
{
t.pc += IRL!(IR.Char);
nlist.insertBack(t);
}
else
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
case IR.Any:
t.pc += IRL!(IR.Any);
if(!(re.flags & RegexOption.singleline)
&& (front == '\r' || front == '\n'))
recycle(t);
else
nlist.insertBack(t);
t = worklist.fetch();
if(!t)
return;
break;
case IR.CodepointSet:
if(re.charsets[re.ir[t.pc].data].scanFor(front))
{
t.pc += IRL!(IR.CodepointSet);
nlist.insertBack(t);
}
else
{
recycle(t);
}
t = worklist.fetch();
if(!t)
return;
break;
case IR.Trie:
if(re.tries[re.ir[t.pc].data][front])
{
t.pc += IRL!(IR.Trie);
nlist.insertBack(t);
}
else
{

static if(withInput)
{
case IR.OrChar:
uint len = re.ir[t.pc].sequence;
uint end = t.pc + len;
static assert(IRL!(IR.OrChar) == 1);
for(; t.pc<end; t.pc++)
if(re.ir[t.pc].data == front)
break;
if(t.pc != end)
{
t.pc = end;
nlist.insertBack(t);
}
else
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
case IR.Char:
if(front == re.ir[t.pc].data)
{
t.pc += IRL!(IR.Char);
nlist.insertBack(t);
}
else
recycle(t);
t = worklist.fetch();
if(!t)
return;
break;
case IR.Any:
t.pc += IRL!(IR.Any);
if(!(re.flags & RegexOption.singleline)
&& (front == '\r' || front == '\n'))
recycle(t);
else
nlist.insertBack(t);
t = worklist.fetch();
if(!t)
return;
break;
case IR.CodepointSet:
if(re.charsets[re.ir[t.pc].data].scanFor(front))
{
t.pc += IRL!(IR.CodepointSet);
nlist.insertBack(t);
}
else
{
recycle(t);
}
t = worklist.fetch();
if(!t)
return;
break;
case IR.Trie:
if(re.tries[re.ir[t.pc].data][front])
{
t.pc += IRL!(IR.Trie);
nlist.insertBack(t);
}
else
{
recycle(t);
}
t = worklist.fetch();
if(!t)
return;
break;
default:
assert(0, "Unrecognized instruction " ~ re.ir[t.pc].mnemonic);
}
else
{
default:
recycle(t);
}
t = worklist.fetch();
if(!t)
return;
break;
default:
assert(0, "Unrecognized instruction " ~ re.ir[t.pc].mnemonic);
}
else
{
default:
recycle(t);
t = worklist.fetch();
if(t)
break;
else
return;
}
t = worklist.fetch();
if(!t)
return;
}
}
}

Expand All @@ -5588,26 +5586,28 @@ enum OneShot { Fwd, Bwd };
startPc = cast(uint)re.ir.length-IRL!(IR.LookbehindEnd);
if(!atEnd)//if no char
{
if (startPc!=RestartPc){
debug(fred_matching)
{
static if(direction == OneShot.Fwd)
writefln("-- Threaded matching (forward) threads at %s", s[index..s.lastIndex]);
else
writefln("-- Threaded matching (backward) threads at %s", retro(s[index..s.lastIndex]));
}
if(startPc!=RestartPc)
{
auto startT = createStart(index, startPc);
genCounter++;
evalFn!true(startT, matches);
}
for(;;)
{
genCounter++;
debug(fred_matching) writeln("\n-- Started iteration of main cycle");
genCounter++;
debug(fred_matching)
{
static if(direction == OneShot.Fwd)
writefln("Threaded matching (forward) threads at %s", s[index..s.lastIndex]);
else
writefln("Threaded matching (backward) threads at %s", retro(s[index..s.lastIndex]));
foreach(t; clist[])
{
assert(t);
writef("pc=%s ",t.pc);
write(t.matches);
writeln();
}
}
for(Thread!DataIndex* t = clist.fetch(); t; t = clist.fetch())
Expand All @@ -5621,14 +5621,16 @@ enum OneShot { Fwd, Bwd };
}
clist = nlist;
nlist = (ThreadList!DataIndex).init;
if(!next()){
if(!next())
{
if (!atEnd) return MatchResult.PartialMatch;
break;
}
debug(fred_matching) writeln("-- Ended iteration of main cycle\n");
}
}
genCounter++; //increment also on each end
debug(fred_matching) writefln("Threaded matching (%s) threads at end",
debug(fred_matching) writefln("-- Threaded matching (%s) threads at end",
direction == OneShot.Fwd ? "forward" : "backward");
//try out all zero-width posibilities
for(Thread!DataIndex* t = clist.fetch(); t; t = clist.fetch())
Expand All @@ -5647,7 +5649,7 @@ enum OneShot { Fwd, Bwd };
void evalBack(bool withInput)(Thread!DataIndex* t, Group!DataIndex[] matches)
{
ThreadList!DataIndex worklist;
debug(fred_matching) writeln("Evaluating thread backwards");
debug(fred_matching) writeln("---- Evaluating thread backwards");
do
{
debug(fred_matching)
Expand Down Expand Up @@ -7629,6 +7631,17 @@ else
//note that the regex pattern itself is probably bogus
assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
}

// bugzilla 9211
unittest
{
auto rx_1 = regex(r"^(\w)*(\d)");
auto m = match("1234", rx_1);
assert(equal(m.front, ["1234", "3", "4"]));
auto rx_2 = regex(r"^([0-9])*(\d)");
auto m2 = match("1234", rx_2);
assert(equal(m2.front, ["1234", "3", "4"]));
}
}

}//version(unittest)

0 comments on commit efb7a7b

Please sign in to comment.