Skip to content

Commit

Permalink
Merge pull request #4147 from DmitryOlshansky/faster-ct-bt
Browse files Browse the repository at this point in the history
[std.regex] Even faster C-T regex
  • Loading branch information
schveiguy committed Apr 8, 2016
2 parents d4b1150 + 7f8b615 commit 635c9aa
Showing 1 changed file with 33 additions and 11 deletions.
44 changes: 33 additions & 11 deletions std/regex/internal/backtracking.d
Expand Up @@ -59,9 +59,12 @@ template BacktrackingMatcher(bool CTregex)
mask |= 1UL<<d;
return p != 0;
}
offset = idx;
mask = 1;
return false;
else
{
offset = idx;
mask = 1;
return false;
}
}
}
//local slice of matches, global for backref
Expand Down Expand Up @@ -635,16 +638,19 @@ template BacktrackingMatcher(bool CTregex)

bool prevStack()
{
import core.stdc.stdlib;
size_t* prev = memory.ptr-1;
prev = cast(size_t*)*prev;//take out hidden pointer
if(!prev)
return false;
free(memory.ptr);//last segment is freed in RegexMatch
immutable size = initialStack*(stateSize + 2*re.ngroup);
memory = prev[0..size];
lastState = size;
return true;
else
{
import core.stdc.stdlib;
free(memory.ptr);//last segment is freed in RegexMatch
immutable size = initialStack*(stateSize + 2*re.ngroup);
memory = prev[0..size];
lastState = size;
return true;
}
}

void stackPush(T)(T val)
Expand Down Expand Up @@ -775,10 +781,11 @@ struct CtContext
//to mark the portion of matches to save
int match, total_matches;
int reserved;
CodepointSet[] charsets;


//state of codegenerator
struct CtState
static struct CtState
{
string code;
int addr;
Expand All @@ -789,6 +796,7 @@ struct CtContext
match = 1;
reserved = 1; //first match is skipped
total_matches = re.ngroup;
charsets = re.charsets;
}

CtContext lookaround(uint s, uint e)
Expand Down Expand Up @@ -1248,13 +1256,27 @@ struct CtContext
$$`, bailOut, addr >= 0 ? "next();" :"",nextInstr);
break;
case IR.CodepointSet:
code ~= ctSub( `
if(charsets.length)
{
string name = `func_`~to!string(addr+1);
string funcCode = charsets[ir[0].data].toSourceCode(name);
code ~= ctSub( `
static $$
if(atEnd || !$$(front))
$$
$$
$$`, funcCode, name, bailOut, addr >= 0 ? "next();" :"", nextInstr);
}
else
code ~= ctSub( `
if(atEnd || !re.charsets[$$].scanFor(front))
$$
$$
$$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
break;
case IR.Trie:
if(charsets.length && charsets[ir[0].data].byInterval.length <= 8)
goto case IR.CodepointSet;
code ~= ctSub( `
if(atEnd || !re.matchers[$$][front])
$$
Expand Down

0 comments on commit 635c9aa

Please sign in to comment.