Skip to content

Commit

Permalink
do reverse of a bytecode in lookbehinds
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitryOlshansky committed Aug 17, 2013
1 parent 525ef30 commit a7b5756
Showing 1 changed file with 43 additions and 6 deletions.
49 changes: 43 additions & 6 deletions std/regex.d
Expand Up @@ -580,6 +580,13 @@ static assert(Bytecode.sizeof == 4);
return output.data;
}

//disassemble the whole chunk
@trusted void printBytecode(in Bytecode[] slice, in NamedGroup[] dict=[])
{
import std.stdio;
for(size_t pc=0; pc<slice.length; pc += slice[pc].length)
writeln(disassemble(slice, pc, dict));
}
//index entry structure for name --> number of submatch
struct NamedGroup
{
Expand All @@ -599,6 +606,31 @@ struct Group(DataIndex)
}
}

void reverseBytecode(Bytecode[] code)
{
Bytecode[] rev = new Bytecode[code.length];
uint revPc = rev.length;
for(uint pc = 0; pc < code.length; )
{
uint len = code[pc].length;
if(code[pc].isAtom)
{
rev[revPc - len .. revPc] = code[pc .. pc + len];
revPc -= len;
}
else if(code[pc].isStart || code[pc].isEnd)
{
uint second = code[pc].indexOfPair(pc);
uint secLen = code[second].length;
rev[revPc - secLen .. revPc] = code[second .. second + secLen];
revPc -= secLen;
}
pc += len;
}
assert(revPc == 0);
code[] = rev[];
}

//Regular expression engine/parser options:
// global - search all nonoverlapping matches in input
// casefold - case insensitive matching, do casefolding on match in unicode mode
Expand Down Expand Up @@ -1046,7 +1078,6 @@ struct Parser(R)
assert(lookaroundNest);
fixLookaround(fix);
lookaroundNest--;
put(ir[fix].paired);
break;
case IR.Option: //| xxx )
//two fixups: last option + full OR
Expand All @@ -1064,7 +1095,6 @@ struct Parser(R)
lookaroundNest--;
fix = fixupStack.pop();
fixLookaround(fix);
put(ir[fix].paired);
break;
default://(?:xxx)
fixupStack.pop();
Expand Down Expand Up @@ -1327,7 +1357,7 @@ struct Parser(R)
"maximum lookaround depth is exceeded");
}

//fixup lookaround with start at offset fix
//fixup lookaround with start at offset fix and append a proper *-End opcode
void fixLookaround(uint fix)
{
ir[fix] = Bytecode(ir[fix].code,
Expand All @@ -1338,6 +1368,16 @@ struct Parser(R)
//groups are cumulative across lookarounds
ir[fix+2] = Bytecode.fromRaw(groupStack.top+g);
groupStack.top += g;
if(ir[fix].code == IR.LookbehindStart || ir[fix].code == IR.NeglookbehindStart)
{
reverseBytecode(ir[fix + IRL!(IR.LookbehindStart) .. $]);
}
put(ir[fix].paired);
debug(fred_parser)
{
//writeln("After reverse:");
printBytecode(ir[fix..$]);
}
}

//CodepointSet operations relatively in order of priority
Expand Down Expand Up @@ -2127,9 +2167,6 @@ private:
//print out disassembly a program's IR
@trusted debug(std_regex_parser) void print() const
{//@@@BUG@@@ write is system
writefln("PC\tINST\n");
prettyPrint(delegate void(const(char)[] s){ write(s); },ir);
writefln("\n");
for(uint i = 0; i < ir.length; i += ir[i].length)
{
writefln("%d\t%s ", i, disassemble(ir, i, dict));
Expand Down

0 comments on commit a7b5756

Please sign in to comment.