From d127c093bed9367399ee3502ed896fc37b3dedc4 Mon Sep 17 00:00:00 2001 From: Nick Gammon Date: Sun, 26 Jul 2015 11:02:06 +1000 Subject: [PATCH] Updated lpeg (Lua Parsing Expression Grammar) from version 0.10 to 0.12 --- scripting/lpeg.c | 2433 -------------------------------------- scripting/lpeg.h | 42 - scripting/lpeg/lpcap.c | 544 +++++++++ scripting/lpeg/lpcap.h | 43 + scripting/lpeg/lpcode.c | 993 ++++++++++++++++ scripting/lpeg/lpcode.h | 40 + scripting/lpeg/lpprint.h | 35 + scripting/lpeg/lptree.c | 1307 ++++++++++++++++++++ scripting/lpeg/lptree.h | 77 ++ scripting/lpeg/lptypes.h | 161 +++ scripting/lpeg/lpvm.c | 363 ++++++ scripting/lpeg/lpvm.h | 58 + 12 files changed, 3621 insertions(+), 2475 deletions(-) delete mode 100644 scripting/lpeg.c delete mode 100644 scripting/lpeg.h create mode 100644 scripting/lpeg/lpcap.c create mode 100644 scripting/lpeg/lpcap.h create mode 100644 scripting/lpeg/lpcode.c create mode 100644 scripting/lpeg/lpcode.h create mode 100644 scripting/lpeg/lpprint.h create mode 100644 scripting/lpeg/lptree.c create mode 100644 scripting/lpeg/lptree.h create mode 100644 scripting/lpeg/lptypes.h create mode 100644 scripting/lpeg/lpvm.c create mode 100644 scripting/lpeg/lpvm.h diff --git a/scripting/lpeg.c b/scripting/lpeg.c deleted file mode 100644 index ab6432e1..00000000 --- a/scripting/lpeg.c +++ /dev/null @@ -1,2433 +0,0 @@ -#pragma warning( disable : 4244) // conversion from 'int ' to 'short ', possible loss of data -#pragma warning( disable : 4505) // unreferenced local function has been removed - -// Implements: - -// lpeg.B -// lpeg.C -// lpeg.Carg -// lpeg.Cb -// lpeg.Cc -// lpeg.Cf -// lpeg.Cg -// lpeg.Cmt -// lpeg.Cp -// lpeg.Cs -// lpeg.Ct -// lpeg.P -// lpeg.R -// lpeg.S -// lpeg.V -// lpeg.locale -// lpeg.match -// lpeg.print -// lpeg.setmaxstack -// lpeg.span -// lpeg.type -// lpeg.version - -/* -** $Id: lpeg.c,v 1.112 2010/11/03 17:07:50 roberto Exp $ -** LPeg - PEG pattern matching for Lua -** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) -** written by Roberto Ierusalimschy -*/ - - -#include -#include -#include -#include -#include - -#ifdef LUA_52 - #include "..\..\lua52\src\lua.h" - #include "..\..\lua52\src\lauxlib.h" -#else - #include "..\lua.h" - #include "..\lauxlib.h" -#endif - -#include "lpeg.h" - - -#define VERSION "0.10" -#define PATTERN_T "lpeg-pattern" -#define MAXSTACKIDX "lpeg-maxstack" - - -/* -** compatibility with Lua 5.2 -*/ -#if (LUA_VERSION_NUM == 502) - -#undef lua_equal -#define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) - -#undef lua_getfenv -#define lua_getfenv lua_getuservalue -#undef lua_setfenv -#define lua_setfenv lua_setuservalue - -#undef lua_objlen -#define lua_objlen lua_rawlen - -#undef luaL_register -#define luaL_register(L,n,f) \ - { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); } - -#endif - - - -/* initial size for call/backtrack stack */ -#define INITBACK 100 - -/* default maximum size for call/backtrack stack */ -#define MAXBACK INITBACK - -/* size for call/backtrack stack for verifier */ -#define MAXBACKVER 200 - -/* initial size for capture's list */ -#define INITCAPSIZE 32 - - -/* index, on Lua stack, for subject */ -#define SUBJIDX 2 - -/* number of fixed arguments to 'match' (before capture arguments) */ -#define FIXEDARGS 3 - -/* index, on Lua stack, for substitution value cache */ -#define subscache(cs) ((cs)->ptop + 1) - -/* index, on Lua stack, for capture list */ -#define caplistidx(ptop) ((ptop) + 2) - -/* index, on Lua stack, for pattern's fenv */ -#define penvidx(ptop) ((ptop) + 3) - -/* index, on Lua stack, for backtracking stack */ -#define stackidx(ptop) ((ptop) + 4) - - - -typedef unsigned char byte; - - -#define CHARSETSIZE ((UCHAR_MAX/CHAR_BIT) + 1) - - -typedef byte Charset[CHARSETSIZE]; - - -/* Virtual Machine's instructions */ -typedef enum Opcode { - IAny, IChar, ISet, ISpan, - IBack, - IRet, IEnd, - IChoice, IJmp, ICall, IOpenCall, - ICommit, IPartialCommit, IBackCommit, IFailTwice, IFail, IGiveup, - IFunc, - IFullCapture, IEmptyCapture, IEmptyCaptureIdx, - IOpenCapture, ICloseCapture, ICloseRunTime -} Opcode; - - -#define ISJMP 0x1 -#define ISCHECK 0x2 -#define ISFIXCHECK 0x4 -#define ISNOFAIL 0x8 -#define ISCAPTURE 0x10 -#define ISMOVABLE 0x20 -#define ISFENVOFF 0x40 - -static const int opproperties[] = { - /* IAny */ ISCHECK | ISFIXCHECK | ISJMP, - /* IChar */ ISCHECK | ISFIXCHECK | ISJMP, - /* ISet */ ISCHECK | ISFIXCHECK | ISJMP, - /* ISpan */ ISNOFAIL, - /* IBack */ 0, - /* IRet */ 0, - /* IEnd */ 0, - /* IChoice */ ISJMP, - /* IJmp */ ISJMP | ISNOFAIL, - /* ICall */ ISJMP, - /* IOpenCall */ ISFENVOFF, - /* ICommit */ ISJMP, - /* IPartialCommit */ ISJMP, - /* IBackCommit */ ISJMP, - /* IFailTwice */ 0, - /* IFail */ 0, - /* IGiveup */ 0, - /* IFunc */ ISCHECK | ISJMP, - /* IFullCapture */ ISCAPTURE | ISNOFAIL | ISFENVOFF, - /* IEmptyCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE, - /* IEmptyCaptureIdx */ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF, - /* IOpenCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF, - /* ICloseCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF, - /* ICloseRunTime */ ISCAPTURE | ISFENVOFF -}; - - -typedef union Instruction { - struct Inst { - byte code; - byte aux; - short offset; - } i; - PattFunc f; - int iv; - byte buff[1]; -} Instruction; - -static const Instruction giveup = {{IGiveup, 0, 0}}; - -#define getkind(op) ((op)->i.aux & 0xF) -#define getoff(op) (((op)->i.aux >> 4) & 0xF) - -#define dest(p,x) ((x) + ((p)+(x))->i.offset) - -#define MAXOFF 0xF -#define MAXAUX 0xFF - -/* maximum size (in elements) for a pattern */ -#define MAXPATTSIZE (SHRT_MAX - 10) - - -#define isprop(op,p) (opproperties[(op)->i.code] & (p)) -#define isjmp(op) (isprop(op, ISJMP) && (op)->i.offset != 0) -#define iscapture(op) isprop(op, ISCAPTURE) -#define ischeck(op) (isprop(op, ISCHECK) && (op)->i.offset == 0) -#define isfixcheck(op) (isprop(op, ISFIXCHECK) && (op)->i.offset == 0) -#define istest(op) (isprop(op, ISCHECK) && (op)->i.offset != 0) -#define isnofail(op) isprop(op, ISNOFAIL) -#define ismovable(op) isprop(op, ISMOVABLE) -#define isfenvoff(op) isprop(op, ISFENVOFF) - - -/* kinds of captures */ -typedef enum CapKind { - Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, - Cquery, Cstring, Csubst, Cfold, Cruntime, Cgroup -} CapKind; - -#define iscapnosize(k) ((k) == Cposition || (k) == Cconst) - - -typedef struct Capture { - const char *s; /* position */ - short idx; - byte kind; - byte siz; -} Capture; - - -/* size (in elements) for an instruction plus extra l bytes */ -#define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1) - - -/* size (in elements) for a ISet instruction */ -#define CHARSETINSTSIZE instsize(CHARSETSIZE) - -/* size (in elements) for a IFunc instruction */ -#define funcinstsize(p) ((p)->i.aux + 2) - - -#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) b; } - - -#define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7)))) -#define setchar(st,c) ((st)[(c) >> 3] |= (1 << ((c) & 7))) - - - -static int sizei (const Instruction *i) { - switch((Opcode)i->i.code) { - case ISet: case ISpan: return CHARSETINSTSIZE; - case IFunc: return funcinstsize(i); - default: return 1; - } -} - - -static const char *val2str (lua_State *L, int idx) { - const char *k = lua_tostring(L, idx); - if (k != NULL) - return lua_pushfstring(L, "rule '%s'", k); - else - return lua_pushfstring(L, "rule ", luaL_typename(L, idx)); -} - - -static int getposition (lua_State *L, int t, int i) { - int res; - lua_getfenv(L, -1); - lua_rawgeti(L, -1, i); /* get key from pattern's environment */ - lua_gettable(L, t); /* get position from positions table */ - res = lua_tointeger(L, -1); - if (res == 0) { /* key has no registered position? */ - lua_rawgeti(L, -2, i); /* get key again */ - return luaL_error(L, "%s is not defined in given grammar", val2str(L, -1)); - } - lua_pop(L, 2); /* remove environment and position */ - return res; -} - - -/* -** {====================================================== -** Printing patterns -** ======================================================= -*/ - - -static void printcharset (const Charset st) { - int i; - printf("["); - for (i = 0; i <= UCHAR_MAX; i++) { - int first = i; - while (testchar(st, i) && i <= UCHAR_MAX) i++; - if (i - 1 == first) /* unary range? */ - printf("(%02x)", first); - else if (i - 1 > first) /* non-empty range? */ - printf("(%02x-%02x)", first, i - 1); - } - printf("]"); -} - - -static void printcapkind (int kind) { - const char *const modes[] = { - "close", "position", "constant", "backref", - "argument", "simple", "table", "function", - "query", "string", "substitution", "fold", - "runtime", "group"}; - printf("%s", modes[kind]); -} - - -static void printjmp (const Instruction *op, const Instruction *p) { - printf("-> "); - if (p->i.offset == 0) printf("FAIL"); - else printf("%d", (int)(dest(0, p) - op)); -} - - -static void printinst (const Instruction *op, const Instruction *p) { - const char *const names[] = { - "any", "char", "set", "span", "back", - "ret", "end", - "choice", "jmp", "call", "open_call", - "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", - "func", - "fullcapture", "emptycapture", "emptycaptureidx", "opencapture", - "closecapture", "closeruntime" - }; - printf("%02ld: %s ", (long)(p - op), names[p->i.code]); - switch ((Opcode)p->i.code) { - case IChar: { - printf("'%c'", p->i.aux); - printjmp(op, p); - break; - } - case IAny: { - printf("* %d", p->i.aux); - printjmp(op, p); - break; - } - case IFullCapture: case IOpenCapture: - case IEmptyCapture: case IEmptyCaptureIdx: - case ICloseCapture: case ICloseRunTime: { - printcapkind(getkind(p)); - printf("(n = %d) (off = %d)", getoff(p), p->i.offset); - break; - } - case ISet: { - printcharset((p+1)->buff); - printjmp(op, p); - break; - } - case ISpan: { - printcharset((p+1)->buff); - break; - } - case IOpenCall: { - printf("-> %d", p->i.offset); - break; - } - case IChoice: { - printjmp(op, p); - printf(" (%d)", p->i.aux); - break; - } - case IJmp: case ICall: case ICommit: - case IPartialCommit: case IBackCommit: { - printjmp(op, p); - break; - } - default: break; - } - printf("\n"); -} - - -static void printpatt (Instruction *p) { - Instruction *op = p; - for (;;) { - printinst(op, p); - if ((Opcode)p->i.code == IEnd) break; - p += sizei(p); - } -} - - -static void printcap (Capture *cap) { - printcapkind(cap->kind); - printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s); -} - - -static void printcaplist (Capture *cap) { - for (; cap->s; cap++) printcap(cap); -} - -/* }====================================================== */ - - -/* -** {====================================================== -** Virtual Machine -** ======================================================= -*/ - - -typedef struct Stack { - const char *s; - const Instruction *p; - int caplevel; -} Stack; - - -#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop))) - - -static int runtimecap (lua_State *L, Capture *close, Capture *ocap, - const char *o, const char *s, int ptop); - - -static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { - Capture *newc; - if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) - luaL_error(L, "too many captures"); - newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); - memcpy(newc, cap, captop * sizeof(Capture)); - lua_replace(L, caplistidx(ptop)); - return newc; -} - - -static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { - Stack *stack = getstackbase(L, ptop); - Stack *newstack; - int n = *stacklimit - stack; - int max, newn; - lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); - max = lua_tointeger(L, -1); - lua_pop(L, 1); - if (n >= max) - luaL_error(L, "too many pending calls/choices"); - newn = 2*n; if (newn > max) newn = max; - newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); - memcpy(newstack, stack, n * sizeof(Stack)); - lua_replace(L, stackidx(ptop)); - *stacklimit = newstack + newn; - return newstack + n; - -} - - -static void adddyncaptures (const char *s, Capture *base, int n, int fd) { - int i; - assert(base[0].kind == Cruntime && base[0].siz == 0); - base[0].idx = fd; /* first returned capture */ - for (i = 1; i < n; i++) { /* add extra captures */ - base[i].siz = 1; /* mark it as closed */ - base[i].s = s; - base[i].kind = Cruntime; - base[i].idx = fd + i; /* stack index */ - } - base[n].kind = Cclose; /* add closing entry */ - base[n].siz = 1; - base[n].s = s; -} - - -#define condfailed(p) { int f = p->i.offset; if (f) p+=f; else goto fail; } - -static const char *match (lua_State *L, - const char *o, const char *s, const char *e, - Instruction *op, Capture *capture, int ptop) { - Stack stackbase[INITBACK]; - Stack *stacklimit = stackbase + INITBACK; - Stack *stack = stackbase; /* point to first empty slot in stack */ - int capsize = INITCAPSIZE; - int captop = 0; /* point to first empty slot in captures */ - const Instruction *p = op; - stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; - lua_pushlightuserdata(L, stackbase); - for (;;) { -#if defined(DEBUG) - printf("s: |%s| stck: %d c: %d ", - s, stack - getstackbase(L, ptop), captop); - printinst(op, p); -#endif - switch ((Opcode)p->i.code) { - case IEnd: { - assert(stack == getstackbase(L, ptop) + 1); - capture[captop].kind = Cclose; - capture[captop].s = NULL; - return s; - } - case IGiveup: { - assert(stack == getstackbase(L, ptop)); - return NULL; - } - case IRet: { - assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); - p = (--stack)->p; - continue; - } - case IAny: { - int n = p->i.aux; - if (n <= e - s) { p++; s += n; } - else condfailed(p); - continue; - } - case IChar: { - if ((byte)*s == p->i.aux && s < e) { p++; s++; } - else condfailed(p); - continue; - } - case ISet: { - int c = (byte)*s; - if (testchar((p+1)->buff, c) && s < e) - { p += CHARSETINSTSIZE; s++; } - else condfailed(p); - continue; - } - case IBack: { - int n = p->i.aux; - if (n > s - o) goto fail; - s -= n; p++; - continue; - } - case ISpan: { - for (; s < e; s++) { - int c = (byte)*s; - if (!testchar((p+1)->buff, c)) break; - } - p += CHARSETINSTSIZE; - continue; - } - case IFunc: { - const char *r = (p+1)->f(s, e, o, (p+2)->buff); - if (r != NULL) { s = r; p += funcinstsize(p); } - else condfailed(p); - continue; - } - case IJmp: { - p += p->i.offset; - continue; - } - case IChoice: { - if (stack == stacklimit) - stack = doublestack(L, &stacklimit, ptop); - stack->p = dest(0, p); - stack->s = s - p->i.aux; - stack->caplevel = captop; - stack++; - p++; - continue; - } - case ICall: { - if (stack == stacklimit) - stack = doublestack(L, &stacklimit, ptop); - stack->s = NULL; - stack->p = p + 1; /* save return address */ - stack++; - p += p->i.offset; - continue; - } - case ICommit: { - assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); - stack--; - p += p->i.offset; - continue; - } - case IPartialCommit: { - assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); - (stack - 1)->s = s; - (stack - 1)->caplevel = captop; - p += p->i.offset; - continue; - } - case IBackCommit: { - assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); - s = (--stack)->s; - captop = stack->caplevel; - p += p->i.offset; - continue; - } - case IFailTwice: - assert(stack > getstackbase(L, ptop)); - stack--; - /* go through */ - case IFail: - fail: { /* pattern failed: try to backtrack */ - do { /* remove pending calls */ - assert(stack > getstackbase(L, ptop)); - s = (--stack)->s; - } while (s == NULL); - captop = stack->caplevel; - p = stack->p; - continue; - } - case ICloseRunTime: { - int fr = lua_gettop(L) + 1; /* stack index of first result */ - int ncap = runtimecap(L, capture + captop, capture, o, s, ptop); - lua_Integer res = lua_tointeger(L, fr) - 1; /* offset */ - int n = lua_gettop(L) - fr; /* number of new captures */ - if (res == -1) { /* may not be a number */ - if (!lua_toboolean(L, fr)) { /* false value? */ - lua_settop(L, fr - 1); /* remove results */ - goto fail; /* and fail */ - } - else if (lua_isboolean(L, fr)) /* true? */ - res = s - o; /* keep current position */ - } - if (res < s - o || res > e - o) - luaL_error(L, "invalid position returned by match-time capture"); - s = o + res; /* update current position */ - captop -= ncap; /* remove nested captures */ - lua_remove(L, fr); /* remove first result (offset) */ - if (n > 0) { /* captures? */ - if ((captop += n + 1) >= capsize) { - capture = doublecap(L, capture, captop, ptop); - capsize = 2 * captop; - } - adddyncaptures(s, capture + captop - n - 1, n, fr); - } - p++; - continue; - } - case ICloseCapture: { - const char *s1 = s - getoff(p); - assert(captop > 0); - if (capture[captop - 1].siz == 0 && - s1 - capture[captop - 1].s < UCHAR_MAX) { - capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; - p++; - continue; - } - else { - capture[captop].siz = 1; /* mark entry as closed */ - goto capture; - } - } - case IEmptyCapture: case IEmptyCaptureIdx: - capture[captop].siz = 1; /* mark entry as closed */ - goto capture; - case IOpenCapture: - capture[captop].siz = 0; /* mark entry as open */ - goto capture; - case IFullCapture: - capture[captop].siz = getoff(p) + 1; /* save capture size */ - capture: { - capture[captop].s = s - getoff(p); - capture[captop].idx = p->i.offset; - capture[captop].kind = getkind(p); - if (++captop >= capsize) { - capture = doublecap(L, capture, captop, ptop); - capsize = 2 * captop; - } - p++; - continue; - } - case IOpenCall: { - lua_rawgeti(L, penvidx(ptop), p->i.offset); - luaL_error(L, "reference to %s outside a grammar", val2str(L, -1)); - } - default: assert(0); return NULL; - } - } -} - -/* }====================================================== */ - - -/* -** {====================================================== -** Verifier -** ======================================================= -*/ - - -/* -** check whether pattern may go from 'p' to 'e' without consuming any -** input. Raises an error if it detects a left recursion. 'op' points -** the beginning of the pattern. If pattern belongs to a grammar, -** 'rule' is the stack index where is its corresponding key (only for -** error messages) and 'posttable' is the stack index with a table -** mapping rule keys to the position of their code in the pattern. -*/ -static int verify (lua_State *L, Instruction *op, const Instruction *p, - Instruction *e, int postable, int rule) { - static const char dummy[] = ""; - Stack back[MAXBACKVER]; - int backtop = 0; /* point to first empty slot in back */ - while (p != e) { - switch ((Opcode)p->i.code) { - case IRet: { - p = back[--backtop].p; - continue; - } - case IChoice: { - if (backtop >= MAXBACKVER) - return luaL_error(L, "too many pending calls/choices"); - back[backtop].p = dest(0, p); - back[backtop++].s = dummy; - p++; - continue; - } - case ICall: { - assert((p + 1)->i.code != IRet); /* no tail call */ - if (backtop >= MAXBACKVER) - return luaL_error(L, "too many pending calls/choices"); - back[backtop].s = NULL; - back[backtop++].p = p + 1; - goto dojmp; - } - case IOpenCall: { - int i; - if (postable == 0) /* grammar still not fixed? */ - goto fail; /* to be verified later */ - for (i = 0; i < backtop; i++) { - if (back[i].s == NULL && back[i].p == p + 1) - return luaL_error(L, "%s is left recursive", val2str(L, rule)); - } - if (backtop >= MAXBACKVER) - return luaL_error(L, "too many pending calls/choices"); - back[backtop].s = NULL; - back[backtop++].p = p + 1; - p = op + getposition(L, postable, p->i.offset); - continue; - } - case IBackCommit: - case ICommit: { - assert(backtop > 0 && p->i.offset > 0); - backtop--; - goto dojmp; - } - case IPartialCommit: { - assert(backtop > 0); - if (p->i.offset > 0) goto dojmp; /* forward jump */ - else { /* loop will be detected when checking corresponding rule */ - assert(postable != 0); - backtop--; - p++; /* just go on now */ - continue; - } - } - case IBack: { - if (p->i.aux == 1 && isfixcheck(p + 1)) { /* char test? */ - p++; /* skip back instruction */ - p += sizei(p); /* skip char test */ - } - else { /* standard lookbehind code */ - assert((Opcode)(p - 1)->i.code == IChoice); /* look behind */ - backtop--; - p += (p - 1)->i.offset; - assert((Opcode)(p - 1)->i.code == IFail); /* look behind */ - } - continue; - } - case IAny: - case IChar: - case ISet: { - const Instruction *next = p + sizei(p); - if ((Opcode)next->i.code == IBack) - p = next + 1; /* continue after the back instruction */ - else if (p->i.offset == 0) goto fail; - else /* jump */ - p += p->i.offset; - continue; - } - case IJmp: - dojmp: { - p += p->i.offset; - continue; - } - case IFailTwice: /* 'not' predicate */ - goto fail; /* body could have failed; try to backtrack it */ - case IFail: { - if (p > op && (p - 1)->i.code == IBackCommit) { /* 'and' predicate? */ - p++; /* pretend it succeeded and go ahead */ - continue; - } - /* else failed: go through */ - } - fail: { /* pattern failed: try to backtrack */ - do { - if (backtop-- == 0) - return 1; /* no more backtracking */ - } while (back[backtop].s == NULL); - p = back[backtop].p; - continue; - } - case ISpan: - case IOpenCapture: case ICloseCapture: - case IEmptyCapture: case IEmptyCaptureIdx: - case IFullCapture: { - p += sizei(p); - continue; - } - case ICloseRunTime: { - goto fail; /* be liberal in this case */ - } - case IFunc: { - const char *r = (p+1)->f(dummy, dummy, dummy, (p+2)->buff); - if (r != NULL) { p += funcinstsize(p); } - else condfailed(p); - continue; - } - case IEnd: /* cannot happen (should stop before it) */ - default: assert(0); return 0; - } - } - assert(backtop == 0); - return 0; -} - - -static void checkrule (lua_State *L, Instruction *op, int from, int to, - int postable, int rule) { - int i; - int lastopen = 0; /* more recent OpenCall seen in the code */ - for (i = from; i < to; i += sizei(op + i)) { - if (op[i].i.code == IPartialCommit && op[i].i.offset < 0) { /* loop? */ - int start = dest(op, i); - assert(op[start - 1].i.code == IChoice && dest(op, start - 1) == i + 1); - if (start <= lastopen) { /* loop does contain an open call? */ - if (!verify(L, op, op + start, op + i, postable, rule)) /* check body */ - luaL_error(L, "possible infinite loop in %s", val2str(L, rule)); - } - } - else if (op[i].i.code == IOpenCall) - lastopen = i; - } - assert(op[i - 1].i.code == IRet); - verify(L, op, op + from, op + to - 1, postable, rule); -} - - - - -/* }====================================================== */ - - -/* -** {====================================================== -** Building Patterns -** ======================================================= -*/ - -enum charsetanswer { NOINFO, ISCHARSET, VALIDSTARTS }; - -typedef struct CharsetTag { - enum charsetanswer tag; - Charset cs; -} CharsetTag; - - -static Instruction *getpatt (lua_State *L, int idx, int *size); - - -static void check2test (Instruction *p, int n) { - assert(ischeck(p) && n != 0); - p->i.offset = n; -} - - -/* -** invert array slice p[0]-p[e] (both inclusive) -*/ -static void invert (Instruction *p, int e) { - int i; - for (i = 0; i < e; i++, e--) { - Instruction temp = p[i]; - p[i] = p[e]; - p[e] = temp; - } -} - - -/* -** rotate array slice p[0]-p[e] (both inclusive) 'n' steps -** to the 'left' -*/ -static void rotate (Instruction *p, int e, int n) { - invert(p, n - 1); - invert(p + n, e - n); - invert(p, e); -} - - -#define op_step(p) ((p)->i.code == IAny ? (p)->i.aux : 1) - - -static int skipchecks (Instruction *p, int up, int *pn) { - int i, n = 0; - for (i = 0; isfixcheck(p + i); i += sizei(p + i)) { - int st = op_step(p + i); - if (n + st > MAXOFF - up) break; - n += st; - } - *pn = n; - return i; -} - - -#define ismovablecap(op) (ismovable(op) && getoff(op) < MAXOFF) - -static void optimizecaptures (Instruction *p) { - int i; - int limit = 0; - for (i = 0; p[i].i.code != IEnd; i += sizei(p + i)) { - if (isjmp(p + i) && dest(p, i) >= limit) - limit = dest(p, i) + 1; /* do not optimize jump targets */ - else if (i >= limit && ismovablecap(p + i) && isfixcheck(p + i + 1)) { - int end, n, j; /* found a border capture|check */ - int maxoff = getoff(p + i); - int start = i; - /* find first capture in the group */ - while (start > limit && ismovablecap(p + start - 1)) { - start--; - if (getoff(p + start) > maxoff) maxoff = getoff(p + start); - } - end = skipchecks(p + i + 1, maxoff, &n) + i; /* find last check */ - if (n == 0) continue; /* first check is too big to move across */ - assert(n <= MAXOFF && start <= i && i < end); - for (j = start; j <= i; j++) - p[j].i.aux += (n << 4); /* correct offset of captures to be moved */ - rotate(p + start, end - start, i - start + 1); /* move them up */ - i = end; - assert(isfixcheck(p + start) && iscapture(p + i)); - } - } -} - - -static int target (Instruction *p, int i) { - while (p[i].i.code == IJmp) i += p[i].i.offset; - return i; -} - - -static void optimizejumps (Instruction *p) { - int i; - for (i = 0; p[i].i.code != IEnd; i += sizei(p + i)) { - if (isjmp(p + i)) - p[i].i.offset = target(p, dest(p, i)) - i; - } -} - - -static void optimizechoice (Instruction *p) { - assert(p->i.code == IChoice); - if (isfixcheck(p + 1)) { - int lc = sizei(p + 1); - rotate(p, lc, 1); - assert(isfixcheck(p) && (p + lc)->i.code == IChoice); - (p + lc)->i.aux = op_step(p); - check2test(p, (p + lc)->i.offset); - (p + lc)->i.offset -= lc; - } -} - - -/* -** A 'headfail' pattern is a pattern that can only fails in its first -** instruction, which must be a check. -*/ -static int isheadfail (Instruction *p) { - if (!ischeck(p)) return 0; - /* check that other operations cannot fail */ - for (p += sizei(p); p->i.code != IEnd; p += sizei(p)) - if (!isnofail(p)) return 0; - return 1; -} - - -#define checkpattern(L, idx) ((Instruction *)luaL_checkudata(L, idx, PATTERN_T)) - - -/* -** Return the number of elements in the ktable of a pattern. -** in Lua 5.2, default "environment" for patterns is nil, not -** a table. Treat it as an empty table. -*/ -static int ktablelen (lua_State *L, int idx) { - if (!lua_istable(L, idx)) return 0; - else return lua_objlen(L, idx); -} - - -/* -** join the elements of the ktable from pattern 'p1' into the ktable of -** the pattern at the top of the stack ('p'). If 'p1' has no elements, -** 'p' keeps its original ktable. If 'p' has no elements, it shares -** 'p1' ktable. Otherwise, this function creates a new ktable for 'p'. -** Return the offset of original 'p' elements in the new ktable. -*/ -static int jointable (lua_State *L, int p1) { - int n, n1, i; - lua_getfenv(L, p1); - n1 = ktablelen(L, -1); /* number of elements in p1's env */ - lua_getfenv(L, -2); - if (n1 == 0 || lua_equal(L, -2, -1)) { - lua_pop(L, 2); - return 0; /* no need to change anything */ - } - n = ktablelen(L, -1); /* number of elements in p's env */ - if (n == 0) { - lua_pop(L, 1); /* removes p env */ - lua_setfenv(L, -2); /* p now shares p1's env */ - return 0; /* no need to correct anything */ - } - lua_createtable(L, n + n1, 0); - /* stack: p; p1 env; p env; new p env */ - for (i = 1; i <= n; i++) { - lua_rawgeti(L, -2, i); - lua_rawseti(L, -2, i); - } - for (i = 1; i <= n1; i++) { - lua_rawgeti(L, -3, i); - lua_rawseti(L, -2, n + i); - } - lua_setfenv(L, -4); /* new table becomes p env */ - lua_pop(L, 2); /* remove p1 env and old p env */ - return n; -} - - -#define copypatt(p1,p2,sz) memcpy(p1, p2, (sz) * sizeof(Instruction)); - -#define pattsize(L,idx) (lua_objlen(L, idx)/sizeof(Instruction) - 1) - - -static int addpatt (lua_State *L, Instruction *p, int p1idx) { - Instruction *p1 = (Instruction *)lua_touserdata(L, p1idx); - int sz = pattsize(L, p1idx); - int corr = jointable(L, p1idx); - copypatt(p, p1, sz + 1); - if (corr != 0) { - Instruction *px; - for (px = p; px < p + sz; px += sizei(px)) { - if (isfenvoff(px) && px->i.offset != 0) - px->i.offset += corr; - } - } - return sz; -} - - -static void setinstaux (Instruction *i, Opcode op, int offset, int aux) { - assert(aux <= MAXAUX); - i->i.code = op; - i->i.offset = offset; - i->i.aux = aux; -} - -#define setinst(i,op,off) setinstaux(i,op,off,0) - -#define setinstcap(i,op,idx,k,n) setinstaux(i,op,idx,((k) | ((n) << 4))) - - -/* -** create a new ktable for pattern at the stack top, mapping -** '1' to the value at stack position 'vidx'. -*/ -static int value2fenv (lua_State *L, int vidx) { - lua_createtable(L, 1, 0); - lua_pushvalue(L, vidx); - lua_rawseti(L, -2, 1); - lua_setfenv(L, -2); - return 1; -} - - -static Instruction *newpatt (lua_State *L, size_t n) { - Instruction *p; - if (n >= MAXPATTSIZE - 1) - luaL_error(L, "pattern too big"); - p = (Instruction *)lua_newuserdata(L, (n + 1) * sizeof(Instruction)); - luaL_getmetatable(L, PATTERN_T); - lua_setmetatable(L, -2); - setinst(p + n, IEnd, 0); - return p; -} - - -static void fillcharset (Instruction *p, Charset cs) { - switch (p[0].i.code) { - case ISet: { - loopset(i, cs[i] = p[1].buff[i]); - break; - } - case IChar: { - loopset(i, cs[i] = 0); - setchar(cs, p[0].i.aux); - break; - } - default: { /* any char may start unhandled instructions */ - loopset(i, cs[i] = 0xff); - break; - } - } -} - - -/* -** Function 'tocharset' gets information about which chars may be a -** valid start for a pattern. -*/ - -static enum charsetanswer tocharset (Instruction *p, CharsetTag *c) { - if (isfixcheck(p)) { - fillcharset(p, c->cs); - if ((p + sizei(p))->i.code == IEnd && op_step(p) == 1) - c->tag = ISCHARSET; - else - c->tag = VALIDSTARTS; - } - else - c->tag = NOINFO; - return c->tag; -} - - -static int exclusiveset (Charset c1, Charset c2) { - /* non-empty intersection? */ - loopset(i, {if ((c1[i] & c2[i]) != 0) return 0;}); - return 1; /* no intersection */ -} - - -static int exclusive (CharsetTag *c1, CharsetTag *c2) { - if (c1->tag == NOINFO || c2->tag == NOINFO) - return 0; /* one of them is not filled */ - else return exclusiveset(c1->cs, c2->cs); -} - - -static Instruction *newcharset (lua_State *L) { - Instruction *p = newpatt(L, CHARSETINSTSIZE); - p[0].i.code = ISet; - p[0].i.offset = 0; - loopset(i, p[1].buff[i] = 0); - return p; -} - - -static int set_l (lua_State *L) { - size_t l; - const char *s = luaL_checklstring(L, 1, &l); - if (l == 1) - getpatt(L, 1, NULL); /* a unit set is equivalent to a literal */ - else { - Instruction *p = newcharset(L); - while (l--) { - setchar(p[1].buff, (byte)(*s)); - s++; - } - } - return 1; -} - - -static int range_l (lua_State *L) { - int arg; - int top = lua_gettop(L); - Instruction *p = newcharset(L); - for (arg = 1; arg <= top; arg++) { - int c; - size_t l; - const char *r = luaL_checklstring(L, arg, &l); - luaL_argcheck(L, l == 2, arg, "range must have two characters"); - for (c = (byte)r[0]; c <= (byte)r[1]; c++) - setchar(p[1].buff, c); - } - return 1; -} - - -static int nter_l (lua_State *L) { - Instruction *p; - luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); - p = newpatt(L, 1); - setinst(p, IOpenCall, value2fenv(L, 1)); - return 1; -} - - - -static int testpattern (lua_State *L, int idx) { - if (lua_touserdata(L, idx)) { /* value is a userdata? */ - if (lua_getmetatable(L, idx)) { /* does it have a metatable? */ - luaL_getmetatable(L, PATTERN_T); - if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */ - lua_pop(L, 2); /* remove both metatables */ - return 1; - } - } - } - return 0; -} - - -static Instruction *fix_l (lua_State *L, int t) { - Instruction *p; - int i; - int totalsize = 2; /* include initial call and jump */ - int n = 0; /* number of rules */ - int base = lua_gettop(L); - lua_newtable(L); /* to store relative positions of each rule */ - lua_pushinteger(L, 1); /* default initial rule */ - /* collect patterns and compute sizes */ - lua_pushnil(L); - while (lua_next(L, t) != 0) { - int l; - if (lua_tonumber(L, -2) == 1 && lua_isstring(L, -1)) { - lua_replace(L, base + 2); /* use this value as initial rule */ - continue; - } - if (!testpattern(L, -1)) - luaL_error(L, "%s is not a pattern", val2str(L, -2)); - l = pattsize(L, -1) + 1; /* space for pattern + ret */ - if (totalsize >= MAXPATTSIZE - l) - luaL_error(L, "grammar too large"); - luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules"); - lua_insert(L, -2); /* put key on top */ - lua_pushvalue(L, -1); /* duplicate key (for lua_next) */ - lua_pushvalue(L, -1); /* duplicate key (to index positions table)) */ - lua_pushinteger(L, totalsize); /* position for this rule */ - lua_settable(L, base + 1); /* store key=>position in positions table */ - totalsize += l; - n++; - } - luaL_argcheck(L, n > 0, t, "empty grammar"); - p = newpatt(L, totalsize); /* create new pattern */ - p++; /* save space for call */ - setinst(p++, IJmp, totalsize - 1); /* after call, jumps to the end */ - for (i = 1; i <= n; i++) { /* copy all rules into new pattern */ - p += addpatt(L, p, base + 1 + i*2); - setinst(p++, IRet, 0); - } - p -= totalsize; /* back to first position */ - totalsize = 2; /* go through each rule's position */ - for (i = 1; i <= n; i++) { /* check all rules */ - int l = pattsize(L, base + 1 + i*2) + 1; - checkrule(L, p, totalsize, totalsize + l, base + 1, base + 2 + i*2); - totalsize += l; - } - lua_pushvalue(L, base + 2); /* get initial rule */ - lua_gettable(L, base + 1); /* get its position in postions table */ - i = lua_tonumber(L, -1); /* convert to number */ - lua_pop(L, 1); - if (i == 0) /* is it defined? */ - luaL_error(L, "initial rule not defined in given grammar"); - setinst(p, ICall, i); /* first instruction calls initial rule */ - /* correct calls */ - for (i = 0; i < totalsize; i += sizei(p + i)) { - if (p[i].i.code == IOpenCall) { - int pos = getposition(L, base + 1, p[i].i.offset); - p[i].i.code = (p[target(p, i + 1)].i.code == IRet) ? IJmp : ICall; - p[i].i.offset = pos - i; - } - } - optimizejumps(p); - lua_replace(L, t); /* put new pattern in old's position */ - lua_settop(L, base); /* remove rules and positions table */ - return p; -} - - -static Instruction *any (lua_State *L, int n, int extra, int *offsetp) { - int offset = offsetp ? *offsetp : 0; - Instruction *p = newpatt(L, (n - 1)/UCHAR_MAX + extra + 1); - Instruction *p1 = p + offset; - for (; n > UCHAR_MAX; n -= UCHAR_MAX) - setinstaux(p1++, IAny, 0, UCHAR_MAX); - setinstaux(p1++, IAny, 0, n); - if (offsetp) *offsetp = p1 - p; - return p; -} - - -static Instruction *getpatt (lua_State *L, int idx, int *size) { - Instruction *p; - switch (lua_type(L, idx)) { - case LUA_TSTRING: { - size_t i, len; - const char *s = lua_tolstring(L, idx, &len); - p = newpatt(L, len); - for (i = 0; i < len; i++) - setinstaux(p + i, IChar, 0, (byte)s[i]); - lua_replace(L, idx); - break; - } - case LUA_TNUMBER: { - int n = lua_tointeger(L, idx); - if (n == 0) /* empty pattern? */ - p = newpatt(L, 0); - else if (n > 0) - p = any(L, n, 0, NULL); - else if (-n <= UCHAR_MAX) { - p = newpatt(L, 2); - setinstaux(p, IAny, 2, -n); - setinst(p + 1, IFail, 0); - } - else { - int offset = 2; /* space for ITestAny & IChoice */ - p = any(L, -n - UCHAR_MAX, 3, &offset); - setinstaux(p, IAny, offset + 1, UCHAR_MAX); - setinstaux(p + 1, IChoice, offset, UCHAR_MAX); - setinst(p + offset, IFailTwice, 0); - } - lua_replace(L, idx); - break; - } - case LUA_TBOOLEAN: { - if (lua_toboolean(L, idx)) /* true? */ - p = newpatt(L, 0); /* empty pattern (always succeeds) */ - else { - p = newpatt(L, 1); - setinst(p, IFail, 0); - } - lua_replace(L, idx); - break; - } - case LUA_TTABLE: { - p = fix_l(L, idx); - break; - } - case LUA_TFUNCTION: { - p = newpatt(L, 2); - setinstcap(p, IOpenCapture, value2fenv(L, idx), Cruntime, 0); - setinstcap(p + 1, ICloseRunTime, 0, Cclose, 0); - lua_replace(L, idx); - break; - } - default: { - p = checkpattern(L, idx); - break; - } - } - if (size) *size = pattsize(L, idx); - return p; -} - - -static int getpattl (lua_State *L, int idx) { - int size; - getpatt(L, idx, &size); - return size; -} - - -static int pattern_l (lua_State *L) { - lua_settop(L, 1); - getpatt(L, 1, NULL); - return 1; -} - - -#define isany(p) ((p)->i.code == IAny && ((p) + 1)->i.code == IEnd) -#define isfail(p) ((p)->i.code == IFail) -#define issucc(p) ((p)->i.code == IEnd) - -static int concat_l (lua_State *L) { - /* p1; p2; */ - int l1, l2; - Instruction *p1 = getpatt(L, 1, &l1); - Instruction *p2 = getpatt(L, 2, &l2); - if (isfail(p1) || issucc(p2)) - lua_pushvalue(L, 1); /* fail * x == fail; x * true == x */ - else if (isfail(p2) || issucc(p1)) - lua_pushvalue(L, 2); /* x * fail == fail; true * x == x */ - else if (isany(p1) && isany(p2)) - any(L, p1->i.aux + p2->i.aux, 0, NULL); - else { - Instruction *op = newpatt(L, l1 + l2); - Instruction *p = op + addpatt(L, op, 1); - addpatt(L, p, 2); - optimizecaptures(op); - } - return 1; -} - - -static int diff_l (lua_State *L) { - int l1, l2; - Instruction *p1 = getpatt(L, 1, &l1); - Instruction *p2 = getpatt(L, 2, &l2); - CharsetTag st1, st2; - if (tocharset(p1, &st1) == ISCHARSET && tocharset(p2, &st2) == ISCHARSET) { - Instruction *p = newcharset(L); - loopset(i, p[1].buff[i] = st1.cs[i] & ~st2.cs[i]); - } - else if (isheadfail(p2)) { - Instruction *p = newpatt(L, l2 + 1 + l1); - p += addpatt(L, p, 2); - check2test(p - l2, l2 + 1); - setinst(p++, IFail, 0); - addpatt(L, p, 1); - } - else { /* !e2 . e1 */ - /* !e -> choice L1; e; failtwice; L1: ... */ - Instruction *p = newpatt(L, 1 + l2 + 1 + l1); - Instruction *pi = p; - setinst(p++, IChoice, 1 + l2 + 1); - p += addpatt(L, p, 2); - setinst(p++, IFailTwice, 0); - addpatt(L, p, 1); - optimizechoice(pi); - } - return 1; -} - - -static int unm_l (lua_State *L) { - Instruction *p = getpatt(L, 1, NULL); - if (isfail(p)) { /* -false? */ - newpatt(L, 0); /* true */ - return 1; - } - else if (issucc(p)) { /* -true? */ - Instruction *p1 = newpatt(L, 1); /* false */ - setinst(p1, IFail, 0); - return 1; - } - else { /* -A == '' - A */ - lua_pushliteral(L, ""); - lua_insert(L, 1); - return diff_l(L); - } -} - - -static int pattand_l (lua_State *L) { - int l1; - CharsetTag st1; - Instruction *p1 = getpatt(L, 1, &l1); - if (isfail(p1) || issucc(p1)) - lua_pushvalue(L, 1); /* &fail == fail; &true == true */ - else if (tocharset(p1, &st1) == ISCHARSET) { - Instruction *p = newpatt(L, l1 + 1); - copypatt(p, p1, l1); p += l1; - setinstaux(p, IBack, 0, 1); - } - else { /* Choice L1; p1; BackCommit L2; L1: Fail; L2: */ - Instruction *p = newpatt(L, 1 + l1 + 2); - setinst(p++, IChoice, 1 + l1 + 1); - p += addpatt(L, p, 1); - setinst(p++, IBackCommit, 2); - setinst(p, IFail, 0); - } - return 1; -} - - -static int nocalls (const Instruction *p) { - for (; (Opcode)p->i.code != IEnd; p += sizei(p)) - if ((Opcode)p->i.code == IOpenCall) return 0; - return 1; -} - - -static int pattbehind (lua_State *L) { - int l1; - CharsetTag st1; - Instruction *p1 = getpatt(L, 1, &l1); - int n = luaL_optint(L, 2, 1); - luaL_argcheck(L, n <= MAXAUX, 2, "lookbehind delta too large"); - if (!nocalls(p1)) - luaL_error(L, "lookbehind pattern cannot contain non terminals"); - if (isfail(p1) || issucc(p1)) - lua_pushvalue(L, 1); /* tag == NOINFO) return 1; - assert(p1->i.offset != 0); - switch (p1->i.code) { - case IChar: return testchar(st2->cs, p1->i.aux); - case ISet: return !exclusiveset(st2->cs, (p1 + 1)->buff); - default: assert(p1->i.code == IAny); return 1; - } -} - - -static Instruction *basicUnion (lua_State *L, Instruction *p1, int l1, - int l2, int *size, CharsetTag *st2) { - Instruction *op; - CharsetTag st1; - tocharset(p1, &st1); - if (st1.tag == ISCHARSET && st2->tag == ISCHARSET) { - Instruction *p = auxnew(L, &op, size, CHARSETINSTSIZE); - setinst(p, ISet, 0); - loopset(i, p[1].buff[i] = st1.cs[i] | st2->cs[i]); - } - else if (exclusive(&st1, st2) || isheadfail(p1)) { - Instruction *p = auxnew(L, &op, size, l1 + 1 + l2); - copypatt(p, p1, l1); - check2test(p, l1 + 1); - p += l1; - setinst(p++, IJmp, l2 + 1); - addpatt(L, p, 2); - } - else { - /* choice L1; e1; commit L2; L1: e2; L2: ... */ - Instruction *p = auxnew(L, &op, size, 1 + l1 + 1 + l2); - setinst(p++, IChoice, 1 + l1 + 1); - copypatt(p, p1, l1); p += l1; - setinst(p++, ICommit, 1 + l2); - addpatt(L, p, 2); - optimizechoice(p - (1 + l1 + 1)); - } - return op; -} - - -static Instruction *separateparts (lua_State *L, Instruction *p1, int l1, - int l2, int *size, CharsetTag *st2) { - int sp = firstpart(p1, l1); - if (sp == 0) /* first part is entire p1? */ - return basicUnion(L, p1, l1, l2, size, st2); - else if ((p1 + sp - 1)->i.code == ICommit || !interfere(p1, sp, st2)) { - Instruction *p; - int init = *size; - int end = init + sp; - *size = end; - p = separateparts(L, p1 + sp, l1 - sp, l2, size, st2); - copypatt(p + init, p1, sp); - (p + end - 1)->i.offset = *size - (end - 1); - return p; - } - else { /* must change back to non-optimized choice */ - Instruction *p; - int init = *size; - int end = init + sp + 1; /* needs one extra instruction (choice) */ - int sizefirst = sizei(p1); /* size of p1's first instruction (the test) */ - *size = end; - p = separateparts(L, p1 + sp, l1 - sp, l2, size, st2); - copypatt(p + init, p1, sizefirst); /* copy the test */ - (p + init)->i.offset++; /* correct jump (because of new instruction) */ - init += sizefirst; - setinstaux(p + init, IChoice, sp - sizefirst + 1, 1); init++; - copypatt(p + init, p1 + sizefirst, sp - sizefirst - 1); - init += sp - sizefirst - 1; - setinst(p + init, ICommit, *size - (end - 1)); - return p; - } -} - - -static int union_l (lua_State *L) { - int l1, l2; - int size = 0; - Instruction *p1 = getpatt(L, 1, &l1); - Instruction *p2 = getpatt(L, 2, &l2); - CharsetTag st2; - if (isfail(p1)) /* check for simple identities */ - lua_pushvalue(L, 2); /* fail / a == a */ - else if (isfail(p2) || issucc(p1)) - lua_pushvalue(L, 1); /* a / fail == a; true / a == true */ - else { - tocharset(p2, &st2); - separateparts(L, p1, l1, l2, &size, &st2); - } - return 1; -} - - -static int repeatcharset (lua_State *L, Charset cs, int l1, int n) { - /* e; ...; e; span; */ - int i; - Instruction *p = newpatt(L, n*l1 + CHARSETINSTSIZE); - for (i = 0; i < n; i++) { - p += addpatt(L, p, 1); - } - setinst(p, ISpan, 0); - loopset(k, p[1].buff[k] = cs[k]); - return 1; -} - - -static Instruction *repeatheadfail (lua_State *L, int l1, int n) { - /* e; ...; e; L2: e'(L1); jump L2; L1: ... */ - int i; - Instruction *p = newpatt(L, (n + 1)*l1 + 1); - Instruction *op = p; - for (i = 0; i < n; i++) { - p += addpatt(L, p, 1); - } - p += addpatt(L, p, 1); - check2test(p - l1, l1 + 1); - setinst(p, IJmp, -l1); - return op; -} - - -static Instruction *repeats (lua_State *L, Instruction *p1, int l1, int n) { - /* e; ...; e; choice L1; L2: e; partialcommit L2; L1: ... */ - int i; - Instruction *op = newpatt(L, (n + 1)*l1 + 2); - Instruction *p = op; - if (!verify(L, p1, p1, p1 + l1, 0, 0)) - luaL_error(L, "loop body may accept empty string"); - for (i = 0; i < n; i++) { - p += addpatt(L, p, 1); - } - setinst(p++, IChoice, 1 + l1 + 1); - p += addpatt(L, p, 1); - setinst(p, IPartialCommit, -l1); - return op; -} - - -static void optionalheadfail (lua_State *L, int l1, int n) { - Instruction *op = newpatt(L, n * l1); - Instruction *p = op; - int i; - for (i = 0; i < n; i++) { - p += addpatt(L, p, 1); - check2test(p - l1, (n - i)*l1); - } -} - - -static void optionals (lua_State *L, int l1, int n) { - /* choice L1; e; partialcommit L2; L2: ... e; L1: commit L3; L3: ... */ - int i; - Instruction *op = newpatt(L, n*(l1 + 1) + 1); - Instruction *p = op; - setinst(p++, IChoice, 1 + n*(l1 + 1)); - for (i = 0; i < n; i++) { - p += addpatt(L, p, 1); - setinst(p++, IPartialCommit, 1); - } - setinst(p - 1, ICommit, 1); /* correct last commit */ - optimizechoice(op); -} - - -static int star_l (lua_State *L) { - int l1; - int n = luaL_checkint(L, 2); - Instruction *p1 = getpatt(L, 1, &l1); - if (n >= 0) { - CharsetTag st; - Instruction *op; - if (tocharset(p1, &st) == ISCHARSET) - return repeatcharset(L, st.cs, l1, n); - if (isheadfail(p1)) - op = repeatheadfail(L, l1, n); - else - op = repeats(L, p1, l1, n); - optimizecaptures(op); - optimizejumps(op); - } - else { - if (isheadfail(p1)) - optionalheadfail(L, l1, -n); - else - optionals(L, l1, -n); - } - return 1; -} - - -static int getlabel (lua_State *L, int labelidx) { - if (labelidx == 0) return 0; - else return value2fenv(L, labelidx); -} - - -static int capture_aux (lua_State *L, int kind, int labelidx) { - int l1, n; - Instruction *p1 = getpatt(L, 1, &l1); - int lc = skipchecks(p1, 0, &n); - if (lc == l1) { /* got whole pattern? */ - /* may use a IFullCapture instruction at its end */ - Instruction *p = newpatt(L, l1 + 1); - int label = getlabel(L, labelidx); - p += addpatt(L, p, 1); - setinstcap(p, IFullCapture, label, kind, n); - } - else { /* must use open-close pair */ - Instruction *op = newpatt(L, 1 + l1 + 1); - Instruction *p = op; - setinstcap(p++, IOpenCapture, getlabel(L, labelidx), kind, 0); - p += addpatt(L, p, 1); - setinstcap(p, ICloseCapture, 0, Cclose, 0); - optimizecaptures(op); - } - return 1; -} - - -static int capture_l (lua_State *L) { return capture_aux(L, Csimple, 0); } -static int tcapture_l (lua_State *L) { return capture_aux(L, Ctable, 0); } -static int capsubst_l (lua_State *L) { return capture_aux(L, Csubst, 0); } - -static int rcapture_l (lua_State *L) { - switch (lua_type(L, 2)) { - case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2); - case LUA_TTABLE: return capture_aux(L, Cquery, 2); - case LUA_TSTRING: return capture_aux(L, Cstring, 2); - default: return luaL_argerror(L, 2, "invalid replacement value"); - } -} - - -static int fold_l (lua_State *L) { - luaL_checktype(L, 2, LUA_TFUNCTION); - return capture_aux(L, Cfold, 2); -} - - -static int group_l (lua_State *L) { - if (lua_isnoneornil(L, 2)) - return capture_aux(L, Cgroup, 0); - else { - luaL_checkstring(L, 2); - return capture_aux(L, Cgroup, 2); - } -} - - -static int position_l (lua_State *L) { - Instruction *p = newpatt(L, 1); - setinstcap(p, IEmptyCapture, 0, Cposition, 0); - return 1; -} - - -static int backref_l (lua_State *L) { - Instruction *p = newpatt(L, 1); - int n = getlabel(L, 1); - setinstcap(p, IEmptyCaptureIdx, n, Cbackref, 0); - return 1; -} - - -static int argcap_l (lua_State *L) { - int n = luaL_checkint(L, 1); - Instruction *p = newpatt(L, 1); - luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); - setinstcap(p, IEmptyCapture, n, Carg, 0); - return 1; -} - - -static int matchtime_l (lua_State *L) { - int l1 = getpattl(L, 1); - Instruction *op = newpatt(L, 1 + l1 + 1); - Instruction *p = op; - luaL_checktype(L, 2, LUA_TFUNCTION); - setinstcap(p++, IOpenCapture, value2fenv(L, 2), Cruntime, 0); - p += addpatt(L, p, 1); - setinstcap(p, ICloseRunTime, 0, Cclose, 0); - optimizecaptures(op); - return 1; -} - - -static int capconst_l (lua_State *L) { - int i, j; - int n = lua_gettop(L); - Instruction *p = newpatt(L, n > 1 ? n + 2 : n); - lua_createtable(L, n, 0); /* new environment for the new pattern */ - if (n > 1) setinstcap(p++, IOpenCapture, 0, Cgroup, 0); - for (i = j = 1; i <= n; i++) { - if (lua_isnil(L, i)) - setinstcap(p++, IEmptyCaptureIdx, 0, Cconst, 0); - else { - setinstcap(p++, IEmptyCaptureIdx, j, Cconst, 0); - lua_pushvalue(L, i); - lua_rawseti(L, -2, j++); - } - } - if (n > 1) setinstcap(p++, ICloseCapture, 0, Cclose, 0); - lua_setfenv(L, -2); /* set environment */ - return 1; -} - - -/* }====================================================== */ - - -/* -** {====================================================== -** User-Defined Patterns -** ======================================================= -*/ - -static void l_newpf (lua_State *L, PattFunc f, const void *ud, size_t l) { - int n = instsize(l) + 1; - Instruction *p = newpatt(L, n); - if (n > MAXAUX) luaL_error(L, "pattern data too long"); - p[0].i.code = IFunc; - p[0].i.aux = n - 2; - p[0].i.offset = 0; - p[1].f = f; - memcpy(p[2].buff, ud, l); -} - -/* }====================================================== */ - - -/* -** {====================================================== -** Captures -** ======================================================= -*/ - - -typedef struct CapState { - Capture *cap; /* current capture */ - Capture *ocap; /* (original) capture list */ - lua_State *L; - int ptop; /* index of last argument to 'match' */ - const char *s; /* original string */ - int valuecached; /* value stored in cache slot */ -} CapState; - - -#define captype(cap) ((cap)->kind) - -#define isclosecap(cap) (captype(cap) == Cclose) - -#define closeaddr(c) ((c)->s + (c)->siz - 1) - -#define isfullcap(cap) ((cap)->siz != 0) - -#define getfromenv(cs,v) lua_rawgeti((cs)->L, penvidx((cs)->ptop), v) -#define pushluaval(cs) getfromenv(cs, (cs)->cap->idx) - -#define pushsubject(cs, c) lua_pushlstring((cs)->L, (c)->s, (c)->siz - 1) - - -#define updatecache(cs,v) { if ((v) != (cs)->valuecached) updatecache_(cs,v); } - - -static void updatecache_ (CapState *cs, int v) { - getfromenv(cs, v); - lua_replace(cs->L, subscache(cs)); - cs->valuecached = v; -} - - -static int pushcapture (CapState *cs); - - -static Capture *findopen (Capture *cap) { - int n = 0; - for (;;) { - cap--; - if (isclosecap(cap)) n++; - else if (!isfullcap(cap)) - if (n-- == 0) return cap; - } -} - - -static Capture *nextcap (Capture *cap) { - if (isfullcap(cap)) return cap + 1; - else { - int n = 0; - for (;;) { - cap++; - if (isclosecap(cap)) { - if (n-- == 0) return cap + 1; - } - else if (!isfullcap(cap)) n++; - } - } -} - - -static int pushallvalues (CapState *cs, int addextra) { - Capture *co = cs->cap; - int n = 0; - if (isfullcap(cs->cap++)) { - pushsubject(cs, co); /* push whole match */ - return 1; - } - while (!isclosecap(cs->cap)) - n += pushcapture(cs); - if (addextra || n == 0) { /* need extra? */ - lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ - n++; - } - cs->cap++; /* skip close entry */ - return n; -} - - -static Capture *findback (CapState *cs, Capture *cap) { - lua_State *L = cs->L; - for (;;) { - if (cap == cs->ocap) { /* not found */ - const char *s = lua_tostring(L, -1); - if (s == NULL) s = lua_pushfstring(L, "(a %s)", luaL_typename(L, -1)); - luaL_error(L, "back reference '%s' not found", s); - } - cap--; - if (isclosecap(cap)) - cap = findopen(cap); - else if (!isfullcap(cap)) - continue; /* opening an enclosing capture: skip and get previous */ - if (captype(cap) == Cgroup) { - getfromenv(cs, cap->idx); /* get group name */ - if (lua_equal(L, -2, -1)) { /* right group? */ - lua_pop(L, 2); /* remove reference name and group name */ - return cap; - } - else lua_pop(L, 1); /* remove group name */ - } - } -} - - -static int backrefcap (CapState *cs) { - int n; - Capture *curr = cs->cap; - pushluaval(cs); /* reference name */ - cs->cap = findback(cs, curr); - n = pushallvalues(cs, 0); - cs->cap = curr + 1; - return n; -} - - -static int tablecap (CapState *cs) { - lua_State *L = cs->L; - int n = 0; - lua_newtable(L); - if (isfullcap(cs->cap++)) - return 1; /* table is empty */ - while (!isclosecap(cs->cap)) { - if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ - int k; - pushluaval(cs); /* push group name */ - k = pushallvalues(cs, 0); - if (k == 0) { /* no value? */ - lua_pop(L, 1); /* remove group name */ - continue; /* and go on */ - } - else if (k > 1) - lua_pop(L, k - 1); /* keep just one value */ - lua_settable(L, -3); - } - else { - int i; - int k = pushcapture(cs); - for (i = k; i > 0; i--) - lua_rawseti(L, -(i + 1), n + i); - n += k; - } - } - cs->cap++; /* skip close entry */ - return 1; -} - - -static int querycap (CapState *cs) { - int idx = cs->cap->idx; - int n = pushallvalues(cs, 0); - if (n > 1) /* extra captures? */ - lua_pop(cs->L, n - 1); /* throw them away */ - updatecache(cs, idx); - lua_gettable(cs->L, subscache(cs)); - if (!lua_isnil(cs->L, -1)) - return 1; - else { - lua_pop(cs->L, 1); /* remove value */ - return 0; - } -} - - -static int foldcap (CapState *cs) { - int n; - lua_State *L = cs->L; - int idx = cs->cap->idx; - if (isfullcap(cs->cap++) || isclosecap(cs->cap) || (n = pushcapture(cs)) == 0) - return luaL_error(L, "no initial value for fold capture"); - if (n > 1) - lua_pop(L, n - 1); /* leave only one result */ - while (!isclosecap(cs->cap)) { - updatecache(cs, idx); - lua_pushvalue(L, subscache(cs)); /* get folding function */ - lua_insert(L, -2); /* put it before accumulator */ - n = pushcapture(cs); /* get other captures */ - lua_call(L, n + 1, 1); /* call folding function */ - } - cs->cap++; /* skip close entry */ - return 1; -} - - -static int functioncap (CapState *cs) { - int n; - int top = lua_gettop(cs->L); - pushluaval(cs); - n = pushallvalues(cs, 0); - lua_call(cs->L, n, LUA_MULTRET); - return lua_gettop(cs->L) - top; -} - - -static int runtimecap (lua_State *L, Capture *close, Capture *ocap, - const char *o, const char *s, int ptop) { - CapState cs; - int n; - Capture *open = findopen(close); - assert(captype(open) == Cruntime); - close->kind = Cclose; - close->s = s; - cs.ocap = ocap; cs.cap = open; cs.L = L; - cs.s = o; cs.valuecached = 0; cs.ptop = ptop; - luaL_checkstack(L, 4, "too many runtime captures"); - pushluaval(&cs); - lua_pushvalue(L, SUBJIDX); /* push original subject */ - lua_pushinteger(L, s - o + 1); /* current position */ - n = pushallvalues(&cs, 0); - lua_call(L, n + 2, LUA_MULTRET); - return close - open; -} - - - -typedef struct StrAux { - int isstring; - union { - Capture *cp; - struct { - const char *s; - const char *e; - } s; - } u; -} StrAux; - -#define MAXSTRCAPS 10 - -static int getstrcaps (CapState *cs, StrAux *cps, int n) { - int k = n++; - cps[k].isstring = 1; - cps[k].u.s.s = cs->cap->s; - if (!isfullcap(cs->cap++)) { - while (!isclosecap(cs->cap)) { - if (n >= MAXSTRCAPS) /* too many captures? */ - cs->cap = nextcap(cs->cap); /* skip it */ - else if (captype(cs->cap) == Csimple) - n = getstrcaps(cs, cps, n); - else { - cps[n].isstring = 0; - cps[n].u.cp = cs->cap; - cs->cap = nextcap(cs->cap); - n++; - } - } - cs->cap++; /* skip close */ - } - cps[k].u.s.e = closeaddr(cs->cap - 1); - return n; -} - - -/* -** add next capture (which should be a string) to buffer -*/ -static int addonestring (luaL_Buffer *b, CapState *cs, const char *what); - - -static void stringcap (luaL_Buffer *b, CapState *cs) { - StrAux cps[MAXSTRCAPS]; - int n; - size_t len, i; - const char *c; - updatecache(cs, cs->cap->idx); - c = lua_tolstring(cs->L, subscache(cs), &len); - n = getstrcaps(cs, cps, 0) - 1; - for (i = 0; i < len; i++) { - if (c[i] != '%' || c[++i] < '0' || c[i] > '9') - luaL_addchar(b, c[i]); - else { - int l = c[i] - '0'; - if (l > n) - luaL_error(cs->L, "invalid capture index (%d)", l); - else if (cps[l].isstring) - luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s); - else { - Capture *curr = cs->cap; - cs->cap = cps[l].u.cp; - if (addonestring(b, cs, "capture") == 0) - luaL_error(cs->L, "no values in capture index %d", l); - cs->cap = curr; - } - } - } -} - - -static void substcap (luaL_Buffer *b, CapState *cs) { - const char *curr = cs->cap->s; - if (isfullcap(cs->cap)) /* no nested captures? */ - luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ - else { - cs->cap++; - while (!isclosecap(cs->cap)) { - const char *next = cs->cap->s; - luaL_addlstring(b, curr, next - curr); /* add text up to capture */ - if (addonestring(b, cs, "replacement") == 0) /* no capture value? */ - curr = next; /* keep original text in final result */ - else - curr = closeaddr(cs->cap - 1); /* continue after match */ - } - luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ - } - cs->cap++; /* go to next capture */ -} - - -static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { - switch (captype(cs->cap)) { - case Cstring: - stringcap(b, cs); /* add capture directly to buffer */ - return 1; - case Csubst: - substcap(b, cs); /* add capture directly to buffer */ - return 1; - default: { - lua_State *L = cs->L; - int n = pushcapture(cs); - if (n > 0) { - if (n > 1) lua_pop(L, n - 1); /* only one result */ - if (!lua_isstring(L, -1)) - luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1)); - luaL_addvalue(b); - } - return n; - } - } -} - - -static int pushcapture (CapState *cs) { - luaL_checkstack(cs->L, 4, "too many captures"); - switch (captype(cs->cap)) { - case Cposition: { - lua_pushinteger(cs->L, cs->cap->s - cs->s + 1); - cs->cap++; - return 1; - } - case Cconst: { - pushluaval(cs); - cs->cap++; - return 1; - } - case Carg: { - int arg = (cs->cap++)->idx; - if (arg + FIXEDARGS > cs->ptop) - return luaL_error(cs->L, "reference to absent argument #%d", arg); - lua_pushvalue(cs->L, arg + FIXEDARGS); - return 1; - } - case Csimple: { - int k = pushallvalues(cs, 1); - if (k > 1) - lua_insert(cs->L, -k); /* whole match is first result */ - return k; - } - case Cruntime: { - int n = 0; - while (!isclosecap(cs->cap++)) { - luaL_checkstack(cs->L, 4, "too many captures"); - lua_pushvalue(cs->L, (cs->cap - 1)->idx); - n++; - } - return n; - } - case Cstring: { - luaL_Buffer b; - luaL_buffinit(cs->L, &b); - stringcap(&b, cs); - luaL_pushresult(&b); - return 1; - } - case Csubst: { - luaL_Buffer b; - luaL_buffinit(cs->L, &b); - substcap(&b, cs); - luaL_pushresult(&b); - return 1; - } - case Cgroup: { - if (cs->cap->idx == 0) /* anonymous group? */ - return pushallvalues(cs, 0); /* add all nested values */ - else { /* named group: add no values */ - cs->cap = nextcap(cs->cap); /* skip capture */ - return 0; - } - } - case Cbackref: return backrefcap(cs); - case Ctable: return tablecap(cs); - case Cfunction: return functioncap(cs); - case Cquery: return querycap(cs); - case Cfold: return foldcap(cs); - default: assert(0); return 0; - } -} - - -static int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { - Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); - int n = 0; - if (!isclosecap(capture)) { /* is there any capture? */ - CapState cs; - cs.ocap = cs.cap = capture; cs.L = L; - cs.s = s; cs.valuecached = 0; cs.ptop = ptop; - do { /* collect their values */ - n += pushcapture(&cs); - } while (!isclosecap(cs.cap)); - } - if (n == 0) { /* no capture values? */ - lua_pushinteger(L, r - s + 1); /* return only end position */ - n = 1; - } - return n; -} - -/* }====================================================== */ - - -static int version_l (lua_State *L) { - lua_pushstring(L, VERSION); - return 1; -} - - -static int type_l (lua_State *L) { - if (testpattern(L, 1)) - lua_pushliteral(L, "pattern"); - else - lua_pushnil(L); - return 1; -} - - -static void createcat (lua_State *L, const char *catname, int (catf) (int)) { - Instruction *p = newcharset(L); - int i; - for (i = 0; i < CHAR_MAX; i++) - if (catf(i)) setchar(p[1].buff, i); - lua_setfield(L, -2, catname); -} - - -static int locale_l (lua_State *L) { - if (lua_isnoneornil(L, 1)) { - lua_settop(L, 0); - lua_createtable(L, 0, 12); - } - else { - luaL_checktype(L, 1, LUA_TTABLE); - lua_settop(L, 1); - } - createcat(L, "alnum", isalnum); - createcat(L, "alpha", isalpha); - createcat(L, "cntrl", iscntrl); - createcat(L, "digit", isdigit); - createcat(L, "graph", isgraph); - createcat(L, "lower", islower); - createcat(L, "print", isprint); - createcat(L, "punct", ispunct); - createcat(L, "space", isspace); - createcat(L, "upper", isupper); - createcat(L, "xdigit", isxdigit); - return 1; -} - - -static int setmax (lua_State *L) { - luaL_optinteger(L, 1, -1); - lua_settop(L, 1); - lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); - return 0; -} - - -static int printpat_l (lua_State *L) { - Instruction *p = getpatt(L, 1, NULL); - int n, i; - lua_getfenv(L, 1); - n = ktablelen(L, -1); - printf("["); - for (i = 1; i <= n; i++) { - printf("%d = ", i); - lua_rawgeti(L, -1, i); - if (lua_isstring(L, -1)) - printf("%s ", lua_tostring(L, -1)); - else - printf("%s ", lua_typename(L, lua_type(L, -1))); - lua_pop(L, 1); - } - printf("]\n"); - printpatt(p); - return 0; -} - - -static int matchl (lua_State *L) { - Capture capture[INITCAPSIZE]; - const char *r; - size_t l; - Instruction *p = getpatt(L, 1, NULL); - const char *s = luaL_checklstring(L, SUBJIDX, &l); - int ptop = lua_gettop(L); - lua_Integer ii = luaL_optinteger(L, 3, 1); - size_t i = (ii > 0) ? - (((size_t)ii <= l) ? (size_t)ii - 1 : l) : - (((size_t)-ii <= l) ? l - ((size_t)-ii) : 0); - lua_pushnil(L); /* subscache */ - lua_pushlightuserdata(L, capture); /* caplistidx */ - lua_getfenv(L, 1); /* penvidx */ - r = match(L, s, s + i, s + l, p, capture, ptop); - if (r == NULL) { - lua_pushnil(L); - return 1; - } - return getcaptures(L, s, r, ptop); -} - - -static struct luaL_Reg pattreg[] = { - {"match", matchl}, - {"print", printpat_l}, - {"locale", locale_l}, - {"setmaxstack", setmax}, - {"B", pattbehind}, - {"C", capture_l}, - {"Cf", fold_l}, - {"Cc", capconst_l}, - {"Cg", group_l}, - {"Cp", position_l}, - {"Cb", backref_l}, - {"Carg", argcap_l}, - {"Cmt", matchtime_l}, - {"Cs", capsubst_l}, - {"Ct", tcapture_l}, - {"P", pattern_l}, - {"R", range_l}, - {"S", set_l}, - {"V", nter_l}, - {"type", type_l}, - {"version", version_l}, - {NULL, NULL} -}; - - -static struct luaL_Reg metapattreg[] = { - {"__add", union_l}, - {"__pow", star_l}, - {"__sub", diff_l}, - {"__mul", concat_l}, - {"__div", rcapture_l}, - {"__unm", unm_l}, - {"__len", pattand_l}, - {NULL, NULL} -}; - - -int luaopen_lpeg (lua_State *L); -int luaopen_lpeg (lua_State *L) { - lua_pushcfunction(L, (lua_CFunction)&l_newpf); /* new-pattern function */ - lua_setfield(L, LUA_REGISTRYINDEX, KEYNEWPATT); /* register it */ - luaL_newmetatable(L, PATTERN_T); - lua_pushnumber(L, MAXBACK); - lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); - luaL_register(L, NULL, metapattreg); - luaL_register(L, "lpeg", pattreg); - lua_pushliteral(L, "__index"); - lua_pushvalue(L, -2); - lua_settable(L, -4); - return 1; -} - diff --git a/scripting/lpeg.h b/scripting/lpeg.h deleted file mode 100644 index 1902ab74..00000000 --- a/scripting/lpeg.h +++ /dev/null @@ -1,42 +0,0 @@ -/* -** $Id: lpeg.h,v 1.1 2009/12/23 16:15:36 roberto Exp $ -** LPeg - PEG pattern matching for Lua -** Copyright 2009, Lua.org & PUC-Rio (see 'lpeg.html' for license) -** written by Roberto Ierusalimschy -*/ - -#ifndef lpeg_h -#define lpeg_h - -#ifdef LUA_52 - #include "..\..\lua52\src\lua.h" -#else - #include "..\lua.h" -#endif - - -#define KEYNEWPATT "lpeg.newpf" - - -/* -** type of extension functions that define new "patterns" for LPEG -** It should return the new current position or NULL if match fails -*/ -typedef const char *(*PattFunc) (const char *s, /* current position */ - const char *e, /* string end */ - const char *o, /* string start */ - const void *ud); /* user data */ - -/* -** function to create new patterns based on 'PattFunc' functions. -** This function is available at *registry[KEYNEWPATT]. (Notice -** the extra indirection; the userdata at the registry points to -** a variable that points to the function. In ANSI C a void* cannot -** point to a function.) -*/ -typedef void (*Newpf) (lua_State *L, - PattFunc f, /* pattern */ - const void *ud, /* (user) data to be passed to 'f' */ - size_t l); /* size of data to be passed to 'f' */ - -#endif diff --git a/scripting/lpeg/lpcap.c b/scripting/lpeg/lpcap.c new file mode 100644 index 00000000..15d8936b --- /dev/null +++ b/scripting/lpeg/lpcap.c @@ -0,0 +1,544 @@ +#pragma warning( disable : 4244) // conversion from 'int ' to 'short ', possible loss of data + +/* +** $Id: lpcap.c,v 1.5 2014/12/12 16:58:47 roberto Exp $ +** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) +*/ + +#ifdef LUA_52 + #include "..\..\..\lua52\src\lua.h" + #include "..\..\..\lua52\src\lauxlib.h" +#else + #include "..\..\lua.h" + #include "..\..\lauxlib.h" +#endif + +#include "lpcap.h" +#include "lptypes.h" + + +#define captype(cap) ((cap)->kind) + +#define isclosecap(cap) (captype(cap) == Cclose) + +#define closeaddr(c) ((c)->s + (c)->siz - 1) + +#define isfullcap(cap) ((cap)->siz != 0) + +#define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v) + +#define pushluaval(cs) getfromktable(cs, (cs)->cap->idx) + + + +/* +** Put at the cache for Lua values the value indexed by 'v' in ktable +** of the running pattern (if it is not there yet); returns its index. +*/ +static int updatecache (CapState *cs, int v) { + int idx = cs->ptop + 1; /* stack index of cache for Lua values */ + if (v != cs->valuecached) { /* not there? */ + getfromktable(cs, v); /* get value from 'ktable' */ + lua_replace(cs->L, idx); /* put it at reserved stack position */ + cs->valuecached = v; /* keep track of what is there */ + } + return idx; +} + + +static int pushcapture (CapState *cs); + + +/* +** Goes back in a list of captures looking for an open capture +** corresponding to a close +*/ +static Capture *findopen (Capture *cap) { + int n = 0; /* number of closes waiting an open */ + for (;;) { + cap--; + if (isclosecap(cap)) n++; /* one more open to skip */ + else if (!isfullcap(cap)) + if (n-- == 0) return cap; + } +} + + +/* +** Go to the next capture +*/ +static void nextcap (CapState *cs) { + Capture *cap = cs->cap; + if (!isfullcap(cap)) { /* not a single capture? */ + int n = 0; /* number of opens waiting a close */ + for (;;) { /* look for corresponding close */ + cap++; + if (isclosecap(cap)) { + if (n-- == 0) break; + } + else if (!isfullcap(cap)) n++; + } + } + cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */ +} + + +/* +** Push on the Lua stack all values generated by nested captures inside +** the current capture. Returns number of values pushed. 'addextra' +** makes it push the entire match after all captured values. The +** entire match is pushed also if there are no other nested values, +** so the function never returns zero. +*/ +static int pushnestedvalues (CapState *cs, int addextra) { + Capture *co = cs->cap; + if (isfullcap(cs->cap++)) { /* no nested captures? */ + lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */ + return 1; /* that is it */ + } + else { + int n = 0; + while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ + n += pushcapture(cs); + if (addextra || n == 0) { /* need extra? */ + lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ + n++; + } + cs->cap++; /* skip close entry */ + return n; + } +} + + +/* +** Push only the first value generated by nested captures +*/ +static void pushonenestedvalue (CapState *cs) { + int n = pushnestedvalues(cs, 0); + if (n > 1) + lua_pop(cs->L, n - 1); /* pop extra values */ +} + + +/* +** Try to find a named group capture with the name given at the top of +** the stack; goes backward from 'cap'. +*/ +static Capture *findback (CapState *cs, Capture *cap) { + lua_State *L = cs->L; + while (cap-- > cs->ocap) { /* repeat until end of list */ + if (isclosecap(cap)) + cap = findopen(cap); /* skip nested captures */ + else if (!isfullcap(cap)) + continue; /* opening an enclosing capture: skip and get previous */ + if (captype(cap) == Cgroup) { + getfromktable(cs, cap->idx); /* get group name */ + if (lua_equal(L, -2, -1)) { /* right group? */ + lua_pop(L, 2); /* remove reference name and group name */ + return cap; + } + else lua_pop(L, 1); /* remove group name */ + } + } + luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1)); + return NULL; /* to avoid warnings */ +} + + +/* +** Back-reference capture. Return number of values pushed. +*/ +static int backrefcap (CapState *cs) { + int n; + Capture *curr = cs->cap; + pushluaval(cs); /* reference name */ + cs->cap = findback(cs, curr); /* find corresponding group */ + n = pushnestedvalues(cs, 0); /* push group's values */ + cs->cap = curr + 1; + return n; +} + + +/* +** Table capture: creates a new table and populates it with nested +** captures. +*/ +static int tablecap (CapState *cs) { + lua_State *L = cs->L; + int n = 0; + lua_newtable(L); + if (isfullcap(cs->cap++)) + return 1; /* table is empty */ + while (!isclosecap(cs->cap)) { + if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ + pushluaval(cs); /* push group name */ + pushonenestedvalue(cs); + lua_settable(L, -3); + } + else { /* not a named group */ + int i; + int k = pushcapture(cs); + for (i = k; i > 0; i--) /* store all values into table */ + lua_rawseti(L, -(i + 1), n + i); + n += k; + } + } + cs->cap++; /* skip close entry */ + return 1; /* number of values pushed (only the table) */ +} + + +/* +** Table-query capture +*/ +static int querycap (CapState *cs) { + int idx = cs->cap->idx; + pushonenestedvalue(cs); /* get nested capture */ + lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */ + if (!lua_isnil(cs->L, -1)) + return 1; + else { /* no value */ + lua_pop(cs->L, 1); /* remove nil */ + return 0; + } +} + + +/* +** Fold capture +*/ +static int foldcap (CapState *cs) { + int n; + lua_State *L = cs->L; + int idx = cs->cap->idx; + if (isfullcap(cs->cap++) || /* no nested captures? */ + isclosecap(cs->cap) || /* no nested captures (large subject)? */ + (n = pushcapture(cs)) == 0) /* nested captures with no values? */ + return luaL_error(L, "no initial value for fold capture"); + if (n > 1) + lua_pop(L, n - 1); /* leave only one result for accumulator */ + while (!isclosecap(cs->cap)) { + lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */ + lua_insert(L, -2); /* put it before accumulator */ + n = pushcapture(cs); /* get next capture's values */ + lua_call(L, n + 1, 1); /* call folding function */ + } + cs->cap++; /* skip close entry */ + return 1; /* only accumulator left on the stack */ +} + + +/* +** Function capture +*/ +static int functioncap (CapState *cs) { + int n; + int top = lua_gettop(cs->L); + pushluaval(cs); /* push function */ + n = pushnestedvalues(cs, 0); /* push nested captures */ + lua_call(cs->L, n, LUA_MULTRET); /* call function */ + return lua_gettop(cs->L) - top; /* return function's results */ +} + + +/* +** Select capture +*/ +static int numcap (CapState *cs) { + int idx = cs->cap->idx; /* value to select */ + if (idx == 0) { /* no values? */ + nextcap(cs); /* skip entire capture */ + return 0; /* no value produced */ + } + else { + int n = pushnestedvalues(cs, 0); + if (n < idx) /* invalid index? */ + return luaL_error(cs->L, "no capture '%d'", idx); + else { + lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */ + lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */ + lua_pop(cs->L, n - 1); /* remove other captures */ + return 1; + } + } +} + + +/* +** Return the stack index of the first runtime capture in the given +** list of captures (or zero if no runtime captures) +*/ +int finddyncap (Capture *cap, Capture *last) { + for (; cap < last; cap++) { + if (cap->kind == Cruntime) + return cap->idx; /* stack position of first capture */ + } + return 0; /* no dynamic captures in this segment */ +} + + +/* +** Calls a runtime capture. Returns number of captures removed by +** the call, including the initial Cgroup. (Captures to be added are +** on the Lua stack.) +*/ +int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { + int n, id; + lua_State *L = cs->L; + int otop = lua_gettop(L); + Capture *open = findopen(close); + assert(captype(open) == Cgroup); + id = finddyncap(open, close); /* get first dynamic capture argument */ + close->kind = Cclose; /* closes the group */ + close->s = s; + cs->cap = open; cs->valuecached = 0; /* prepare capture state */ + luaL_checkstack(L, 4, "too many runtime captures"); + pushluaval(cs); /* push function to be called */ + lua_pushvalue(L, SUBJIDX); /* push original subject */ + lua_pushinteger(L, s - cs->s + 1); /* push current position */ + n = pushnestedvalues(cs, 0); /* push nested captures */ + lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */ + if (id > 0) { /* are there old dynamic captures to be removed? */ + int i; + for (i = id; i <= otop; i++) + lua_remove(L, id); /* remove old dynamic captures */ + *rem = otop - id + 1; /* total number of dynamic captures removed */ + } + else + *rem = 0; /* no dynamic captures removed */ + return close - open; /* number of captures of all kinds removed */ +} + + +/* +** Auxiliary structure for substitution and string captures: keep +** information about nested captures for future use, avoiding to push +** string results into Lua +*/ +typedef struct StrAux { + int isstring; /* whether capture is a string */ + union { + Capture *cp; /* if not a string, respective capture */ + struct { /* if it is a string... */ + const char *s; /* ... starts here */ + const char *e; /* ... ends here */ + } s; + } u; +} StrAux; + +#define MAXSTRCAPS 10 + +/* +** Collect values from current capture into array 'cps'. Current +** capture must be Cstring (first call) or Csimple (recursive calls). +** (In first call, fills %0 with whole match for Cstring.) +** Returns number of elements in the array that were filled. +*/ +static int getstrcaps (CapState *cs, StrAux *cps, int n) { + int k = n++; + cps[k].isstring = 1; /* get string value */ + cps[k].u.s.s = cs->cap->s; /* starts here */ + if (!isfullcap(cs->cap++)) { /* nested captures? */ + while (!isclosecap(cs->cap)) { /* traverse them */ + if (n >= MAXSTRCAPS) /* too many captures? */ + nextcap(cs); /* skip extra captures (will not need them) */ + else if (captype(cs->cap) == Csimple) /* string? */ + n = getstrcaps(cs, cps, n); /* put info. into array */ + else { + cps[n].isstring = 0; /* not a string */ + cps[n].u.cp = cs->cap; /* keep original capture */ + nextcap(cs); + n++; + } + } + cs->cap++; /* skip close */ + } + cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ + return n; +} + + +/* +** add next capture value (which should be a string) to buffer 'b' +*/ +static int addonestring (luaL_Buffer *b, CapState *cs, const char *what); + + +/* +** String capture: add result to buffer 'b' (instead of pushing +** it into the stack) +*/ +static void stringcap (luaL_Buffer *b, CapState *cs) { + StrAux cps[MAXSTRCAPS]; + int n; + size_t len, i; + const char *fmt; /* format string */ + fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len); + n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */ + for (i = 0; i < len; i++) { /* traverse them */ + if (fmt[i] != '%') /* not an escape? */ + luaL_addchar(b, fmt[i]); /* add it to buffer */ + else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */ + luaL_addchar(b, fmt[i]); /* add to buffer */ + else { + int l = fmt[i] - '0'; /* capture index */ + if (l > n) + luaL_error(cs->L, "invalid capture index (%d)", l); + else if (cps[l].isstring) + luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s); + else { + Capture *curr = cs->cap; + cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */ + if (!addonestring(b, cs, "capture")) + luaL_error(cs->L, "no values in capture index %d", l); + cs->cap = curr; /* continue from where it stopped */ + } + } + } +} + + +/* +** Substitution capture: add result to buffer 'b' +*/ +static void substcap (luaL_Buffer *b, CapState *cs) { + const char *curr = cs->cap->s; + if (isfullcap(cs->cap)) /* no nested captures? */ + luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ + else { + cs->cap++; /* skip open entry */ + while (!isclosecap(cs->cap)) { /* traverse nested captures */ + const char *next = cs->cap->s; + luaL_addlstring(b, curr, next - curr); /* add text up to capture */ + if (addonestring(b, cs, "replacement")) + curr = closeaddr(cs->cap - 1); /* continue after match */ + else /* no capture value */ + curr = next; /* keep original text in final result */ + } + luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ + } + cs->cap++; /* go to next capture */ +} + + +/* +** Evaluates a capture and adds its first value to buffer 'b'; returns +** whether there was a value +*/ +static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { + switch (captype(cs->cap)) { + case Cstring: + stringcap(b, cs); /* add capture directly to buffer */ + return 1; + case Csubst: + substcap(b, cs); /* add capture directly to buffer */ + return 1; + default: { + lua_State *L = cs->L; + int n = pushcapture(cs); + if (n > 0) { + if (n > 1) lua_pop(L, n - 1); /* only one result */ + if (!lua_isstring(L, -1)) + luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1)); + luaL_addvalue(b); + } + return n; + } + } +} + + +/* +** Push all values of the current capture into the stack; returns +** number of values pushed +*/ +static int pushcapture (CapState *cs) { + lua_State *L = cs->L; + luaL_checkstack(L, 4, "too many captures"); + switch (captype(cs->cap)) { + case Cposition: { + lua_pushinteger(L, cs->cap->s - cs->s + 1); + cs->cap++; + return 1; + } + case Cconst: { + pushluaval(cs); + cs->cap++; + return 1; + } + case Carg: { + int arg = (cs->cap++)->idx; + if (arg + FIXEDARGS > cs->ptop) + return luaL_error(L, "reference to absent extra argument #%d", arg); + lua_pushvalue(L, arg + FIXEDARGS); + return 1; + } + case Csimple: { + int k = pushnestedvalues(cs, 1); + lua_insert(L, -k); /* make whole match be first result */ + return k; + } + case Cruntime: { + lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ + return 1; + } + case Cstring: { + luaL_Buffer b; + luaL_buffinit(L, &b); + stringcap(&b, cs); + luaL_pushresult(&b); + return 1; + } + case Csubst: { + luaL_Buffer b; + luaL_buffinit(L, &b); + substcap(&b, cs); + luaL_pushresult(&b); + return 1; + } + case Cgroup: { + if (cs->cap->idx == 0) /* anonymous group? */ + return pushnestedvalues(cs, 0); /* add all nested values */ + else { /* named group: add no values */ + nextcap(cs); /* skip capture */ + return 0; + } + } + case Cbackref: return backrefcap(cs); + case Ctable: return tablecap(cs); + case Cfunction: return functioncap(cs); + case Cnum: return numcap(cs); + case Cquery: return querycap(cs); + case Cfold: return foldcap(cs); + default: assert(0); return 0; + } +} + + +/* +** Prepare a CapState structure and traverse the entire list of +** captures in the stack pushing its results. 's' is the subject +** string, 'r' is the final position of the match, and 'ptop' +** the index in the stack where some useful values were pushed. +** Returns the number of results pushed. (If the list produces no +** results, push the final position of the match.) +*/ +int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { + Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); + int n = 0; + if (!isclosecap(capture)) { /* is there any capture? */ + CapState cs; + cs.ocap = cs.cap = capture; cs.L = L; + cs.s = s; cs.valuecached = 0; cs.ptop = ptop; + do { /* collect their values */ + n += pushcapture(&cs); + } while (!isclosecap(cs.cap)); + } + if (n == 0) { /* no capture values? */ + lua_pushinteger(L, r - s + 1); /* return only end position */ + n = 1; + } + return n; +} + + diff --git a/scripting/lpeg/lpcap.h b/scripting/lpeg/lpcap.h new file mode 100644 index 00000000..d762fdcf --- /dev/null +++ b/scripting/lpeg/lpcap.h @@ -0,0 +1,43 @@ +/* +** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $ +*/ + +#if !defined(lpcap_h) +#define lpcap_h + + +#include "lptypes.h" + + +/* kinds of captures */ +typedef enum CapKind { + Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, + Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup +} CapKind; + + +typedef struct Capture { + const char *s; /* subject position */ + unsigned short idx; /* extra info (group name, arg index, etc.) */ + byte kind; /* kind of capture */ + byte siz; /* size of full capture + 1 (0 = not a full capture) */ +} Capture; + + +typedef struct CapState { + Capture *cap; /* current capture */ + Capture *ocap; /* (original) capture list */ + lua_State *L; + int ptop; /* index of last argument to 'match' */ + const char *s; /* original string */ + int valuecached; /* value stored in cache slot */ +} CapState; + + +int runtimecap (CapState *cs, Capture *close, const char *s, int *rem); +int getcaptures (lua_State *L, const char *s, const char *r, int ptop); +int finddyncap (Capture *cap, Capture *last); + +#endif + + diff --git a/scripting/lpeg/lpcode.c b/scripting/lpeg/lpcode.c new file mode 100644 index 00000000..09ecbeef --- /dev/null +++ b/scripting/lpeg/lpcode.c @@ -0,0 +1,993 @@ +#pragma warning( disable : 4244) // conversion from 'int ' to 'short ', possible loss of data + +/* +** $Id: lpcode.c,v 1.21 2014/12/12 17:01:29 roberto Exp $ +** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) +*/ + +#include + + +#ifdef LUA_52 + #include "..\..\..\lua52\src\lua.h" + #include "..\..\..\lua52\src\lauxlib.h" +#else + #include "..\..\lua.h" + #include "..\..\lauxlib.h" +#endif + +#include "lptypes.h" +#include "lpcode.h" + + +/* signals a "no-instruction */ +#define NOINST -1 + + + +static const Charset fullset_ = + {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}; + +static const Charset *fullset = &fullset_; + +/* +** {====================================================== +** Analysis and some optimizations +** ======================================================= +*/ + +/* +** Check whether a charset is empty (returns IFail), singleton (IChar), +** full (IAny), or none of those (ISet). When singleton, '*c' returns +** which character it is. (When generic set, the set was the input, +** so there is no need to return it.) +*/ +static Opcode charsettype (const byte *cs, int *c) { + int count = 0; /* number of characters in the set */ + int i; + int candidate = -1; /* candidate position for the singleton char */ + for (i = 0; i < CHARSETSIZE; i++) { /* for each byte */ + int b = cs[i]; + if (b == 0) { /* is byte empty? */ + if (count > 1) /* was set neither empty nor singleton? */ + return ISet; /* neither full nor empty nor singleton */ + /* else set is still empty or singleton */ + } + else if (b == 0xFF) { /* is byte full? */ + if (count < (i * BITSPERCHAR)) /* was set not full? */ + return ISet; /* neither full nor empty nor singleton */ + else count += BITSPERCHAR; /* set is still full */ + } + else if ((b & (b - 1)) == 0) { /* has byte only one bit? */ + if (count > 0) /* was set not empty? */ + return ISet; /* neither full nor empty nor singleton */ + else { /* set has only one char till now; track it */ + count++; + candidate = i; + } + } + else return ISet; /* byte is neither empty, full, nor singleton */ + } + switch (count) { + case 0: return IFail; /* empty set */ + case 1: { /* singleton; find character bit inside byte */ + int b = cs[candidate]; + *c = candidate * BITSPERCHAR; + if ((b & 0xF0) != 0) { *c += 4; b >>= 4; } + if ((b & 0x0C) != 0) { *c += 2; b >>= 2; } + if ((b & 0x02) != 0) { *c += 1; } + return IChar; + } + default: { + assert(count == CHARSETSIZE * BITSPERCHAR); /* full set */ + return IAny; + } + } +} + + +/* +** A few basic operations on Charsets +*/ +static void cs_complement (Charset *cs) { + loopset(i, cs->cs[i] = ~cs->cs[i]); +} + +static int cs_equal (const byte *cs1, const byte *cs2) { + loopset(i, if (cs1[i] != cs2[i]) return 0); + return 1; +} + +static int cs_disjoint (const Charset *cs1, const Charset *cs2) { + loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) + return 1; +} + + +/* +** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a +** charset and return 1; else return 0. +*/ +int tocharset (TTree *tree, Charset *cs) { + switch (tree->tag) { + case TSet: { /* copy set */ + loopset(i, cs->cs[i] = treebuffer(tree)[i]); + return 1; + } + case TChar: { /* only one char */ + assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); + loopset(i, cs->cs[i] = 0); /* erase all chars */ + setchar(cs->cs, tree->u.n); /* add that one */ + return 1; + } + case TAny: { + loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */ + return 1; + } + default: return 0; + } +} + + +/* +** Check whether a pattern tree has captures +*/ +int hascaptures (TTree *tree) { + tailcall: + switch (tree->tag) { + case TCapture: case TRunTime: + return 1; + case TCall: + tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */ + case TOpenCall: assert(0); + default: { + switch (numsiblings[tree->tag]) { + case 1: /* return hascaptures(sib1(tree)); */ + tree = sib1(tree); goto tailcall; + case 2: + if (hascaptures(sib1(tree))) return 1; + /* else return hascaptures(sib2(tree)); */ + tree = sib2(tree); goto tailcall; + default: assert(numsiblings[tree->tag] == 0); return 0; + } + } + } +} + + +/* +** Checks how a pattern behaves regarding the empty string, +** in one of two different ways: +** A pattern is *nullable* if it can match without consuming any character; +** A pattern is *nofail* if it never fails for any string +** (including the empty string). +** The difference is only for predicates and run-time captures; +** for other patterns, the two properties are equivalent. +** (With predicates, &'a' is nullable but not nofail. Of course, +** nofail => nullable.) +** These functions are all convervative in the following way: +** p is nullable => nullable(p) +** nofail(p) => p cannot fail +** The function assumes that TOpenCall is not nullable; +** this will be checked again when the grammar is fixed. +** Run-time captures can do whatever they want, so the result +** is conservative. +*/ +int checkaux (TTree *tree, int pred) { + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: + case TFalse: case TOpenCall: + return 0; /* not nullable */ + case TRep: case TTrue: + return 1; /* no fail */ + case TNot: case TBehind: /* can match empty, but can fail */ + if (pred == PEnofail) return 0; + else return 1; /* PEnullable */ + case TAnd: /* can match empty; fail iff body does */ + if (pred == PEnullable) return 1; + /* else return checkaux(sib1(tree), pred); */ + tree = sib1(tree); goto tailcall; + case TRunTime: /* can fail; match empty iff body does */ + if (pred == PEnofail) return 0; + /* else return checkaux(sib1(tree), pred); */ + tree = sib1(tree); goto tailcall; + case TSeq: + if (!checkaux(sib1(tree), pred)) return 0; + /* else return checkaux(sib2(tree), pred); */ + tree = sib2(tree); goto tailcall; + case TChoice: + if (checkaux(sib2(tree), pred)) return 1; + /* else return checkaux(sib1(tree), pred); */ + tree = sib1(tree); goto tailcall; + case TCapture: case TGrammar: case TRule: + /* return checkaux(sib1(tree), pred); */ + tree = sib1(tree); goto tailcall; + case TCall: /* return checkaux(sib2(tree), pred); */ + tree = sib2(tree); goto tailcall; + default: assert(0); return 0; + } +} + + +/* +** number of characters to match a pattern (or -1 if variable) +** ('count' avoids infinite loops for grammars) +*/ +int fixedlenx (TTree *tree, int count, int len) { + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: + return len + 1; + case TFalse: case TTrue: case TNot: case TAnd: case TBehind: + return len; + case TRep: case TRunTime: case TOpenCall: + return -1; + case TCapture: case TRule: case TGrammar: + /* return fixedlenx(sib1(tree), count); */ + tree = sib1(tree); goto tailcall; + case TCall: + if (count++ >= MAXRULES) + return -1; /* may be a loop */ + /* else return fixedlenx(sib2(tree), count); */ + tree = sib2(tree); goto tailcall; + case TSeq: { + len = fixedlenx(sib1(tree), count, len); + if (len < 0) return -1; + /* else return fixedlenx(sib2(tree), count, len); */ + tree = sib2(tree); goto tailcall; + } + case TChoice: { + int n1, n2; + n1 = fixedlenx(sib1(tree), count, len); + if (n1 < 0) return -1; + n2 = fixedlenx(sib2(tree), count, len); + if (n1 == n2) return n1; + else return -1; + } + default: assert(0); return 0; + }; +} + + +/* +** Computes the 'first set' of a pattern. +** The result is a conservative aproximation: +** match p ax -> x (for some x) ==> a belongs to first(p) +** or +** a not in first(p) ==> match p ax -> fail (for all x) +** +** The set 'follow' is the first set of what follows the +** pattern (full set if nothing follows it). +** +** The function returns 0 when this resulting set can be used for +** test instructions that avoid the pattern altogether. +** A non-zero return can happen for two reasons: +** 1) match p '' -> '' ==> return has bit 1 set +** (tests cannot be used because they would always fail for an empty input); +** 2) there is a match-time capture ==> return has bit 2 set +** (optimizations should not bypass match-time captures). +*/ +static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: { + tocharset(tree, firstset); + return 0; + } + case TTrue: { + loopset(i, firstset->cs[i] = follow->cs[i]); + return 1; /* accepts the empty string */ + } + case TFalse: { + loopset(i, firstset->cs[i] = 0); + return 0; + } + case TChoice: { + Charset csaux; + int e1 = getfirst(sib1(tree), follow, firstset); + int e2 = getfirst(sib2(tree), follow, &csaux); + loopset(i, firstset->cs[i] |= csaux.cs[i]); + return e1 | e2; + } + case TSeq: { + if (!nullable(sib1(tree))) { + /* when p1 is not nullable, p2 has nothing to contribute; + return getfirst(sib1(tree), fullset, firstset); */ + tree = sib1(tree); follow = fullset; goto tailcall; + } + else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */ + Charset csaux; + int e2 = getfirst(sib2(tree), follow, &csaux); + int e1 = getfirst(sib1(tree), &csaux, firstset); + if (e1 == 0) return 0; /* 'e1' ensures that first can be used */ + else if ((e1 | e2) & 2) /* one of the children has a matchtime? */ + return 2; /* pattern has a matchtime capture */ + else return e2; /* else depends on 'e2' */ + } + } + case TRep: { + getfirst(sib1(tree), follow, firstset); + loopset(i, firstset->cs[i] |= follow->cs[i]); + return 1; /* accept the empty string */ + } + case TCapture: case TGrammar: case TRule: { + /* return getfirst(sib1(tree), follow, firstset); */ + tree = sib1(tree); goto tailcall; + } + case TRunTime: { /* function invalidates any follow info. */ + int e = getfirst(sib1(tree), fullset, firstset); + if (e) return 2; /* function is not "protected"? */ + else return 0; /* pattern inside capture ensures first can be used */ + } + case TCall: { + /* return getfirst(sib2(tree), follow, firstset); */ + tree = sib2(tree); goto tailcall; + } + case TAnd: { + int e = getfirst(sib1(tree), follow, firstset); + loopset(i, firstset->cs[i] &= follow->cs[i]); + return e; + } + case TNot: { + if (tocharset(sib1(tree), firstset)) { + cs_complement(firstset); + return 1; + } + /* else go through */ + } + case TBehind: { /* instruction gives no new information */ + /* call 'getfirst' only to check for math-time captures */ + int e = getfirst(sib1(tree), follow, firstset); + loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */ + return e | 1; /* always can accept the empty string */ + } + default: assert(0); return 0; + } +} + + +/* +** If 'headfail(tree)' true, then 'tree' can fail only depending on the +** next character of the subject. +*/ +static int headfail (TTree *tree) { + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: case TFalse: + return 1; + case TTrue: case TRep: case TRunTime: case TNot: + case TBehind: + return 0; + case TCapture: case TGrammar: case TRule: case TAnd: + tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */ + case TCall: + tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */ + case TSeq: + if (!nofail(sib2(tree))) return 0; + /* else return headfail(sib1(tree)); */ + tree = sib1(tree); goto tailcall; + case TChoice: + if (!headfail(sib1(tree))) return 0; + /* else return headfail(sib2(tree)); */ + tree = sib2(tree); goto tailcall; + default: assert(0); return 0; + } +} + + +/* +** Check whether the code generation for the given tree can benefit +** from a follow set (to avoid computing the follow set when it is +** not needed) +*/ +static int needfollow (TTree *tree) { + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: + case TFalse: case TTrue: case TAnd: case TNot: + case TRunTime: case TGrammar: case TCall: case TBehind: + return 0; + case TChoice: case TRep: + return 1; + case TCapture: + tree = sib1(tree); goto tailcall; + case TSeq: + tree = sib2(tree); goto tailcall; + default: assert(0); return 0; + } +} + +/* }====================================================== */ + + + +/* +** {====================================================== +** Code generation +** ======================================================= +*/ + + +/* +** size of an instruction +*/ +int sizei (const Instruction *i) { + switch((Opcode)i->i.code) { + case ISet: case ISpan: return CHARSETINSTSIZE; + case ITestSet: return CHARSETINSTSIZE + 1; + case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall: + case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit: + return 2; + default: return 1; + } +} + + +/* +** state for the compiler +*/ +typedef struct CompileState { + Pattern *p; /* pattern being compiled */ + int ncode; /* next position in p->code to be filled */ + lua_State *L; +} CompileState; + + +/* +** code generation is recursive; 'opt' indicates that the code is +** being generated under a 'IChoice' operator jumping to its end +** (that is, the match is "optional"). +** 'tt' points to a previous test protecting this code. 'fl' is +** the follow set of the pattern. +*/ +static void codegen (CompileState *compst, TTree *tree, int opt, int tt, + const Charset *fl); + + +void realloccode (lua_State *L, Pattern *p, int nsize) { + void *ud; + lua_Alloc f = lua_getallocf(L, &ud); + void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), + nsize * sizeof(Instruction)); + if (newblock == NULL && nsize > 0) + luaL_error(L, "not enough memory"); + p->code = (Instruction *)newblock; + p->codesize = nsize; +} + + +static int nextinstruction (CompileState *compst) { + int size = compst->p->codesize; + if (compst->ncode >= size) + realloccode(compst->L, compst->p, size * 2); + return compst->ncode++; +} + + +#define getinstr(cs,i) ((cs)->p->code[i]) + + +static int addinstruction (CompileState *compst, Opcode op, int aux) { + int i = nextinstruction(compst); + getinstr(compst, i).i.code = op; + getinstr(compst, i).i.aux = aux; + return i; +} + + +/* +** Add an instruction followed by space for an offset (to be set later) +*/ +static int addoffsetinst (CompileState *compst, Opcode op) { + int i = addinstruction(compst, op, 0); /* instruction */ + addinstruction(compst, (Opcode)0, 0); /* open space for offset */ + assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2); + return i; +} + + +/* +** Set the offset of an instruction +*/ +static void setoffset (CompileState *compst, int instruction, int offset) { + getinstr(compst, instruction + 1).offset = offset; +} + + +/* +** Add a capture instruction: +** 'op' is the capture instruction; 'cap' the capture kind; +** 'key' the key into ktable; 'aux' is the optional capture offset +** +*/ +static int addinstcap (CompileState *compst, Opcode op, int cap, int key, + int aux) { + int i = addinstruction(compst, op, joinkindoff(cap, aux)); + getinstr(compst, i).i.key = key; + return i; +} + + +#define gethere(compst) ((compst)->ncode) + +#define target(code,i) ((i) + code[i + 1].offset) + + +/* +** Patch 'instruction' to jump to 'target' +*/ +static void jumptothere (CompileState *compst, int instruction, int target) { + if (instruction >= 0) + setoffset(compst, instruction, target - instruction); +} + + +/* +** Patch 'instruction' to jump to current position +*/ +static void jumptohere (CompileState *compst, int instruction) { + jumptothere(compst, instruction, gethere(compst)); +} + + +/* +** Code an IChar instruction, or IAny if there is an equivalent +** test dominating it +*/ +static void codechar (CompileState *compst, int c, int tt) { + if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar && + getinstr(compst, tt).i.aux == c) + addinstruction(compst, IAny, 0); + else + addinstruction(compst, IChar, c); +} + + +/* +** Add a charset posfix to an instruction +*/ +static void addcharset (CompileState *compst, const byte *cs) { + int p = gethere(compst); + int i; + for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++) + nextinstruction(compst); /* space for buffer */ + /* fill buffer with charset */ + loopset(j, getinstr(compst, p).buff[j] = cs[j]); +} + + +/* +** code a char set, optimizing unit sets for IChar, "complete" +** sets for IAny, and empty sets for IFail; also use an IAny +** when instruction is dominated by an equivalent test. +*/ +static void codecharset (CompileState *compst, const byte *cs, int tt) { + int c = 0; /* (=) to avoid warnings */ + Opcode op = charsettype(cs, &c); + switch (op) { + case IChar: codechar(compst, c, tt); break; + case ISet: { /* non-trivial set? */ + if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet && + cs_equal(cs, getinstr(compst, tt + 2).buff)) + addinstruction(compst, IAny, 0); + else { + addinstruction(compst, ISet, 0); + addcharset(compst, cs); + } + break; + } + default: addinstruction(compst, op, c); break; + } +} + + +/* +** code a test set, optimizing unit sets for ITestChar, "complete" +** sets for ITestAny, and empty sets for IJmp (always fails). +** 'e' is true iff test should accept the empty string. (Test +** instructions in the current VM never accept the empty string.) +*/ +static int codetestset (CompileState *compst, Charset *cs, int e) { + if (e) return NOINST; /* no test */ + else { + int c = 0; + Opcode op = charsettype(cs->cs, &c); + switch (op) { + case IFail: return addoffsetinst(compst, IJmp); /* always jump */ + case IAny: return addoffsetinst(compst, ITestAny); + case IChar: { + int i = addoffsetinst(compst, ITestChar); + getinstr(compst, i).i.aux = c; + return i; + } + case ISet: { + int i = addoffsetinst(compst, ITestSet); + addcharset(compst, cs->cs); + return i; + } + default: assert(0); return 0; + } + } +} + + +/* +** Find the final destination of a sequence of jumps +*/ +static int finaltarget (Instruction *code, int i) { + while (code[i].i.code == IJmp) + i = target(code, i); + return i; +} + + +/* +** final label (after traversing any jumps) +*/ +static int finallabel (Instruction *code, int i) { + return finaltarget(code, target(code, i)); +} + + +/* +** == behind n;

(where n = fixedlen(p)) +*/ +static void codebehind (CompileState *compst, TTree *tree) { + if (tree->u.n > 0) + addinstruction(compst, IBehind, tree->u.n); + codegen(compst, sib1(tree), 0, NOINST, fullset); +} + + +/* +** Choice; optimizations: +** - when p1 is headfail +** - when first(p1) and first(p2) are disjoint; than +** a character not in first(p1) cannot go to p1, and a character +** in first(p1) cannot go to p2 (at it is not in first(p2)). +** (The optimization is not valid if p1 accepts the empty string, +** as then there is no character at all...) +** - when p2 is empty and opt is true; a IPartialCommit can resuse +** the Choice already active in the stack. +*/ +static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, + const Charset *fl) { + int emptyp2 = (p2->tag == TTrue); + Charset cs1, cs2; + int e1 = getfirst(p1, fullset, &cs1); + if (headfail(p1) || + (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) { + /* == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */ + int test = codetestset(compst, &cs1, 0); + int jmp = NOINST; + codegen(compst, p1, 0, test, fl); + if (!emptyp2) + jmp = addoffsetinst(compst, IJmp); + jumptohere(compst, test); + codegen(compst, p2, opt, NOINST, fl); + jumptohere(compst, jmp); + } + else if (opt && emptyp2) { + /* p1? == IPartialCommit; p1 */ + jumptohere(compst, addoffsetinst(compst, IPartialCommit)); + codegen(compst, p1, 1, NOINST, fullset); + } + else { + /* == + test(fail(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: */ + int pcommit; + int test = codetestset(compst, &cs1, e1); + int pchoice = addoffsetinst(compst, IChoice); + codegen(compst, p1, emptyp2, test, fullset); + pcommit = addoffsetinst(compst, ICommit); + jumptohere(compst, pchoice); + jumptohere(compst, test); + codegen(compst, p2, opt, NOINST, fl); + jumptohere(compst, pcommit); + } +} + + +/* +** And predicate +** optimization: fixedlen(p) = n ==> <&p> ==

; behind n +** (valid only when 'p' has no captures) +*/ +static void codeand (CompileState *compst, TTree *tree, int tt) { + int n = fixedlen(tree); + if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) { + codegen(compst, tree, 0, tt, fullset); + if (n > 0) + addinstruction(compst, IBehind, n); + } + else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */ + int pcommit; + int pchoice = addoffsetinst(compst, IChoice); + codegen(compst, tree, 0, tt, fullset); + pcommit = addoffsetinst(compst, IBackCommit); + jumptohere(compst, pchoice); + addinstruction(compst, IFail, 0); + jumptohere(compst, pcommit); + } +} + + +/* +** Captures: if pattern has fixed (and not too big) length, use +** a single IFullCapture instruction after the match; otherwise, +** enclose the pattern with OpenCapture - CloseCapture. +*/ +static void codecapture (CompileState *compst, TTree *tree, int tt, + const Charset *fl) { + int len = fixedlen(sib1(tree)); + if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) { + codegen(compst, sib1(tree), 0, tt, fl); + addinstcap(compst, IFullCapture, tree->cap, tree->key, len); + } + else { + addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0); + codegen(compst, sib1(tree), 0, tt, fl); + addinstcap(compst, ICloseCapture, Cclose, 0, 0); + } +} + + +static void coderuntime (CompileState *compst, TTree *tree, int tt) { + addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0); + codegen(compst, sib1(tree), 0, tt, fullset); + addinstcap(compst, ICloseRunTime, Cclose, 0, 0); +} + + +/* +** Repetion; optimizations: +** When pattern is a charset, can use special instruction ISpan. +** When pattern is head fail, or if it starts with characters that +** are disjoint from what follows the repetions, a simple test +** is enough (a fail inside the repetition would backtrack to fail +** again in the following pattern, so there is no need for a choice). +** When 'opt' is true, the repetion can reuse the Choice already +** active in the stack. +*/ +static void coderep (CompileState *compst, TTree *tree, int opt, + const Charset *fl) { + Charset st; + if (tocharset(tree, &st)) { + addinstruction(compst, ISpan, 0); + addcharset(compst, st.cs); + } + else { + int e1 = getfirst(tree, fullset, &st); + if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) { + /* L1: test (fail(p1)) -> L2;

; jmp L1; L2: */ + int jmp; + int test = codetestset(compst, &st, 0); + codegen(compst, tree, opt, test, fullset); + jmp = addoffsetinst(compst, IJmp); + jumptohere(compst, test); + jumptothere(compst, jmp, test); + } + else { + /* test(fail(p1)) -> L2; choice L2; L1:

; partialcommit L1; L2: */ + /* or (if 'opt'): partialcommit L1; L1:

; partialcommit L1; */ + int commit, l2; + int test = codetestset(compst, &st, e1); + int pchoice = NOINST; + if (opt) + jumptohere(compst, addoffsetinst(compst, IPartialCommit)); + else + pchoice = addoffsetinst(compst, IChoice); + l2 = gethere(compst); + codegen(compst, tree, 0, NOINST, fullset); + commit = addoffsetinst(compst, IPartialCommit); + jumptothere(compst, commit, l2); + jumptohere(compst, pchoice); + jumptohere(compst, test); + } + } +} + + +/* +** Not predicate; optimizations: +** In any case, if first test fails, 'not' succeeds, so it can jump to +** the end. If pattern is headfail, that is all (it cannot fail +** in other parts); this case includes 'not' of simple sets. Otherwise, +** use the default code (a choice plus a failtwice). +*/ +static void codenot (CompileState *compst, TTree *tree) { + Charset st; + int e = getfirst(tree, fullset, &st); + int test = codetestset(compst, &st, e); + if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */ + addinstruction(compst, IFail, 0); + else { + /* test(fail(p))-> L1; choice L1;

; failtwice; L1: */ + int pchoice = addoffsetinst(compst, IChoice); + codegen(compst, tree, 0, NOINST, fullset); + addinstruction(compst, IFailTwice, 0); + jumptohere(compst, pchoice); + } + jumptohere(compst, test); +} + + +/* +** change open calls to calls, using list 'positions' to find +** correct offsets; also optimize tail calls +*/ +static void correctcalls (CompileState *compst, int *positions, + int from, int to) { + int i; + Instruction *code = compst->p->code; + for (i = from; i < to; i += sizei(&code[i])) { + if (code[i].i.code == IOpenCall) { + int n = code[i].i.key; /* rule number */ + int rule = positions[n]; /* rule position */ + assert(rule == from || code[rule - 1].i.code == IRet); + if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */ + code[i].i.code = IJmp; /* tail call */ + else + code[i].i.code = ICall; + jumptothere(compst, i, rule); /* call jumps to respective rule */ + } + } + assert(i == to); +} + + +/* +** Code for a grammar: +** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2: +*/ +static void codegrammar (CompileState *compst, TTree *grammar) { + int positions[MAXRULES]; + int rulenumber = 0; + TTree *rule; + int firstcall = addoffsetinst(compst, ICall); /* call initial rule */ + int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */ + int start = gethere(compst); /* here starts the initial rule */ + jumptohere(compst, firstcall); + for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { + positions[rulenumber++] = gethere(compst); /* save rule position */ + codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */ + addinstruction(compst, IRet, 0); + } + assert(rule->tag == TTrue); + jumptohere(compst, jumptoend); + correctcalls(compst, positions, start, gethere(compst)); +} + + +static void codecall (CompileState *compst, TTree *call) { + int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ + getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */ + assert(sib2(call)->tag == TRule); +} + + +/* +** Code first child of a sequence +** (second child is called in-place to allow tail call) +** Return 'tt' for second child +*/ +static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2, + int tt, const Charset *fl) { + if (needfollow(p1)) { + Charset fl1; + getfirst(p2, fl, &fl1); /* p1 follow is p2 first */ + codegen(compst, p1, 0, tt, &fl1); + } + else /* use 'fullset' as follow */ + codegen(compst, p1, 0, tt, fullset); + if (fixedlen(p1) != 0) /* can 'p1' consume anything? */ + return NOINST; /* invalidate test */ + else return tt; /* else 'tt' still protects sib2 */ +} + + +/* +** Main code-generation function: dispatch to auxiliar functions +** according to kind of tree. ('needfollow' should return true +** only for consructions that use 'fl'.) +*/ +static void codegen (CompileState *compst, TTree *tree, int opt, int tt, + const Charset *fl) { + tailcall: + switch (tree->tag) { + case TChar: codechar(compst, tree->u.n, tt); break; + case TAny: addinstruction(compst, IAny, 0); break; + case TSet: codecharset(compst, treebuffer(tree), tt); break; + case TTrue: break; + case TFalse: addinstruction(compst, IFail, 0); break; + case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break; + case TRep: coderep(compst, sib1(tree), opt, fl); break; + case TBehind: codebehind(compst, tree); break; + case TNot: codenot(compst, sib1(tree)); break; + case TAnd: codeand(compst, sib1(tree), tt); break; + case TCapture: codecapture(compst, tree, tt, fl); break; + case TRunTime: coderuntime(compst, tree, tt); break; + case TGrammar: codegrammar(compst, tree); break; + case TCall: codecall(compst, tree); break; + case TSeq: { + tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); /* code 'p1' */ + /* codegen(compst, p2, opt, tt, fl); */ + tree = sib2(tree); goto tailcall; + } + default: assert(0); + } +} + + +/* +** Optimize jumps and other jump-like instructions. +** * Update labels of instructions with labels to their final +** destinations (e.g., choice L1; ... L1: jmp L2: becomes +** choice L2) +** * Jumps to other instructions that do jumps become those +** instructions (e.g., jump to return becomes a return; jump +** to commit becomes a commit) +*/ +static void peephole (CompileState *compst) { + Instruction *code = compst->p->code; + int i; + for (i = 0; i < compst->ncode; i += sizei(&code[i])) { + redo: + switch (code[i].i.code) { + case IChoice: case ICall: case ICommit: case IPartialCommit: + case IBackCommit: case ITestChar: case ITestSet: + case ITestAny: { /* instructions with labels */ + jumptothere(compst, i, finallabel(code, i)); /* optimize label */ + break; + } + case IJmp: { + int ft = finaltarget(code, i); + switch (code[ft].i.code) { /* jumping to what? */ + case IRet: case IFail: case IFailTwice: + case IEnd: { /* instructions with unconditional implicit jumps */ + code[i] = code[ft]; /* jump becomes that instruction */ + code[i + 1].i.code = IAny; /* 'no-op' for target position */ + break; + } + case ICommit: case IPartialCommit: + case IBackCommit: { /* inst. with unconditional explicit jumps */ + int fft = finallabel(code, ft); + code[i] = code[ft]; /* jump becomes that instruction... */ + jumptothere(compst, i, fft); /* but must correct its offset */ + goto redo; /* reoptimize its label */ + } + default: { + jumptothere(compst, i, ft); /* optimize label */ + break; + } + } + break; + } + default: break; + } + } + assert(code[i - 1].i.code == IEnd); +} + + +/* +** Compile a pattern +*/ +Instruction *compile (lua_State *L, Pattern *p) { + CompileState compst; + compst.p = p; compst.ncode = 0; compst.L = L; + realloccode(L, p, 2); /* minimum initial size */ + codegen(&compst, p->tree, 0, NOINST, fullset); + addinstruction(&compst, IEnd, 0); + realloccode(L, p, compst.ncode); /* set final size */ + peephole(&compst); + return p->code; +} + + +/* }====================================================== */ + diff --git a/scripting/lpeg/lpcode.h b/scripting/lpeg/lpcode.h new file mode 100644 index 00000000..808abc27 --- /dev/null +++ b/scripting/lpeg/lpcode.h @@ -0,0 +1,40 @@ +/* +** $Id: lpcode.h,v 1.6 2013/11/28 14:56:02 roberto Exp $ +*/ + +#if !defined(lpcode_h) +#define lpcode_h + +#ifdef LUA_52 + #include "..\..\..\lua52\src\lua.h" + #include "..\..\..\lua52\src\lauxlib.h" +#else + #include "..\..\lua.h" + #include "..\..\lauxlib.h" +#endif + +#include "lptypes.h" +#include "lptree.h" +#include "lpvm.h" + +int tocharset (TTree *tree, Charset *cs); +int checkaux (TTree *tree, int pred); +int fixedlenx (TTree *tree, int count, int len); +int hascaptures (TTree *tree); +int lp_gc (lua_State *L); +Instruction *compile (lua_State *L, Pattern *p); +void realloccode (lua_State *L, Pattern *p, int nsize); +int sizei (const Instruction *i); + + +#define PEnullable 0 +#define PEnofail 1 + +#define nofail(t) checkaux(t, PEnofail) +#define nullable(t) checkaux(t, PEnullable) + +#define fixedlen(t) fixedlenx(t, 0, 0) + + + +#endif diff --git a/scripting/lpeg/lpprint.h b/scripting/lpeg/lpprint.h new file mode 100644 index 00000000..e640f744 --- /dev/null +++ b/scripting/lpeg/lpprint.h @@ -0,0 +1,35 @@ +/* +** $Id: lpprint.h,v 1.1 2013/03/21 20:25:12 roberto Exp $ +*/ + + +#if !defined(lpprint_h) +#define lpprint_h + + +#include "lptree.h" +#include "lpvm.h" + + +#if defined(LPEG_DEBUG) + +void printpatt (Instruction *p, int n); +void printtree (TTree *tree, int ident); +void printktable (lua_State *L, int idx); +void printcharset (const byte *st); +void printcaplist (Capture *cap, Capture *limit); + +#else + +#define printktable(L,idx) \ + luaL_error(L, "function only implemented in debug mode") +#define printtree(tree,i) \ + luaL_error(L, "function only implemented in debug mode") +#define printpatt(p,n) \ + luaL_error(L, "function only implemented in debug mode") + +#endif + + +#endif + diff --git a/scripting/lpeg/lptree.c b/scripting/lpeg/lptree.c new file mode 100644 index 00000000..65b51bfb --- /dev/null +++ b/scripting/lpeg/lptree.c @@ -0,0 +1,1307 @@ +#pragma warning( disable : 4244) // conversion from 'int ' to 'short ', possible loss of data + +/* +** $Id: lptree.c,v 1.15 2015/03/04 17:23:00 roberto Exp $ +** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) +*/ + +#include +#include +#include + + +#ifdef LUA_52 + #include "..\..\..\lua52\src\lua.h" + #include "..\..\..\lua52\src\lauxlib.h" +#else + #include "..\..\lua.h" + #include "..\..\lauxlib.h" +#endif + + +#include "lptypes.h" +#include "lpcap.h" +#include "lpcode.h" +#include "lpprint.h" +#include "lptree.h" + + +/* number of siblings for each tree */ +const byte numsiblings[] = { + 0, 0, 0, /* char, set, any */ + 0, 0, /* true, false */ + 1, /* rep */ + 2, 2, /* seq, choice */ + 1, 1, /* not, and */ + 0, 0, 2, 1, /* call, opencall, rule, grammar */ + 1, /* behind */ + 1, 1 /* capture, runtime capture */ +}; + + +static TTree *newgrammar (lua_State *L, int arg); + + +/* +** returns a reasonable name for value at index 'idx' on the stack +*/ +static const char *val2str (lua_State *L, int idx) { + const char *k = lua_tostring(L, idx); + if (k != NULL) + return lua_pushfstring(L, "%s", k); + else + return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx)); +} + + +/* +** Fix a TOpenCall into a TCall node, using table 'postable' to +** translate a key to its rule address in the tree. Raises an +** error if key does not exist. +*/ +static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) { + int n; + lua_rawgeti(L, -1, t->key); /* get rule's name */ + lua_gettable(L, postable); /* query name in position table */ + n = lua_tonumber(L, -1); /* get (absolute) position */ + lua_pop(L, 1); /* remove position */ + if (n == 0) { /* no position? */ + lua_rawgeti(L, -1, t->key); /* get rule's name again */ + luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1)); + } + t->tag = TCall; + t->u.ps = n - (t - g); /* position relative to node */ + assert(sib2(t)->tag == TRule); + sib2(t)->key = t->key; +} + + +/* +** Transform left associative constructions into right +** associative ones, for sequence and choice; that is: +** (t11 + t12) + t2 => t11 + (t12 + t2) +** (t11 * t12) * t2 => t11 * (t12 * t2) +** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2)) +*/ +static void correctassociativity (TTree *tree) { + TTree *t1 = sib1(tree); + assert(tree->tag == TChoice || tree->tag == TSeq); + while (t1->tag == tree->tag) { + int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */ + int n11size = t1->u.ps - 1; + int n12size = n1size - n11size - 1; + memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */ + tree->u.ps = n11size + 1; + sib2(tree)->tag = tree->tag; + sib2(tree)->u.ps = n12size + 1; + } +} + + +/* +** Make final adjustments in a tree. Fix open calls in tree 't', +** making them refer to their respective rules or raising appropriate +** errors (if not inside a grammar). Correct associativity of associative +** constructions (making them right associative). Assume that tree's +** ktable is at the top of the stack (for error messages). +*/ +static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) { + tailcall: + switch (t->tag) { + case TGrammar: /* subgrammars were already fixed */ + return; + case TOpenCall: { + if (g != NULL) /* inside a grammar? */ + fixonecall(L, postable, g, t); + else { /* open call outside grammar */ + lua_rawgeti(L, -1, t->key); + luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1)); + } + break; + } + case TSeq: case TChoice: + correctassociativity(t); + break; + } + switch (numsiblings[t->tag]) { + case 1: /* finalfix(L, postable, g, sib1(t)); */ + t = sib1(t); goto tailcall; + case 2: + finalfix(L, postable, g, sib1(t)); + t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */ + default: assert(numsiblings[t->tag] == 0); break; + } +} + + + +/* +** {=================================================================== +** KTable manipulation +** +** - The ktable of a pattern 'p' can be shared by other patterns that +** contain 'p' and no other constants. Because of this sharing, we +** should not add elements to a 'ktable' unless it was freshly created +** for the new pattern. +** +** - The maximum index in a ktable is USHRT_MAX, because trees and +** patterns use unsigned shorts to store those indices. +** ==================================================================== +*/ + +/* +** Create a new 'ktable' to the pattern at the top of the stack. +*/ +static void newktable (lua_State *L, int n) { + lua_createtable(L, n, 0); /* create a fresh table */ + lua_setfenv(L, -2); /* set it as 'ktable' for pattern */ +} + + +/* +** Add element 'idx' to 'ktable' of pattern at the top of the stack; +** Return index of new element. +** If new element is nil, does not add it to table (as it would be +** useless) and returns 0, as ktable[0] is always nil. +*/ +static int addtoktable (lua_State *L, int idx) { + if (lua_isnil(L, idx)) /* nil value? */ + return 0; + else { + int n; + lua_getfenv(L, -1); /* get ktable from pattern */ + n = lua_objlen(L, -1); + if (n >= USHRT_MAX) + luaL_error(L, "too many Lua values in pattern"); + lua_pushvalue(L, idx); /* element to be added */ + lua_rawseti(L, -2, ++n); + lua_pop(L, 1); /* remove 'ktable' */ + return n; + } +} + + +/* +** Return the number of elements in the ktable at 'idx'. +** In Lua 5.2/5.3, default "environment" for patterns is nil, not +** a table. Treat it as an empty table. In Lua 5.1, assumes that +** the environment has no numeric indices (len == 0) +*/ +static int ktablelen (lua_State *L, int idx) { + if (!lua_istable(L, idx)) return 0; + else return lua_objlen(L, idx); +} + + +/* +** Concatentate the contents of table 'idx1' into table 'idx2'. +** (Assume that both indices are negative.) +** Return the original length of table 'idx2' (or 0, if no +** element was added, as there is no need to correct any index). +*/ +static int concattable (lua_State *L, int idx1, int idx2) { + int i; + int n1 = ktablelen(L, idx1); + int n2 = ktablelen(L, idx2); + if (n1 + n2 > USHRT_MAX) + luaL_error(L, "too many Lua values in pattern"); + if (n1 == 0) return 0; /* nothing to correct */ + for (i = 1; i <= n1; i++) { + lua_rawgeti(L, idx1, i); + lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ + } + return n2; +} + + +/* +** When joining 'ktables', constants from one of the subpatterns must +** be renumbered; 'correctkeys' corrects their indices (adding 'n' +** to each of them) +*/ +static void correctkeys (TTree *tree, int n) { + if (n == 0) return; /* no correction? */ + tailcall: + switch (tree->tag) { + case TOpenCall: case TCall: case TRunTime: case TRule: { + if (tree->key > 0) + tree->key += n; + break; + } + case TCapture: { + if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) + tree->key += n; + break; + } + default: break; + } + switch (numsiblings[tree->tag]) { + case 1: /* correctkeys(sib1(tree), n); */ + tree = sib1(tree); goto tailcall; + case 2: + correctkeys(sib1(tree), n); + tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ + default: assert(numsiblings[tree->tag] == 0); break; + } +} + + +/* +** Join the ktables from p1 and p2 the ktable for the new pattern at the +** top of the stack, reusing them when possible. +*/ +static void joinktables (lua_State *L, int p1, TTree *t2, int p2) { + int n1, n2; + lua_getfenv(L, p1); /* get ktables */ + lua_getfenv(L, p2); + n1 = ktablelen(L, -2); + n2 = ktablelen(L, -1); + if (n1 == 0 && n2 == 0) /* are both tables empty? */ + lua_pop(L, 2); /* nothing to be done; pop tables */ + else if (n2 == 0 || lua_equal(L, -2, -1)) { /* 2nd table empty or equal? */ + lua_pop(L, 1); /* pop 2nd table */ + lua_setfenv(L, -2); /* set 1st ktable into new pattern */ + } + else if (n1 == 0) { /* first table is empty? */ + lua_setfenv(L, -3); /* set 2nd table into new pattern */ + lua_pop(L, 1); /* pop 1st table */ + } + else { + lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ + /* stack: new p; ktable p1; ktable p2; new ktable */ + concattable(L, -3, -1); /* from p1 into new ktable */ + concattable(L, -2, -1); /* from p2 into new ktable */ + lua_setfenv(L, -4); /* new ktable becomes 'p' environment */ + lua_pop(L, 2); /* pop other ktables */ + correctkeys(t2, n1); /* correction for indices from p2 */ + } +} + + +/* +** copy 'ktable' of element 'idx' to new tree (on top of stack) +*/ +static void copyktable (lua_State *L, int idx) { + lua_getfenv(L, idx); + lua_setfenv(L, -2); +} + + +/* +** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable' +** from tree at the top of the stack, and correct corresponding +** tree. +*/ +static void mergektable (lua_State *L, int idx, TTree *stree) { + int n; + lua_getfenv(L, -1); /* get ktables */ + lua_getfenv(L, idx); + n = concattable(L, -1, -2); + lua_pop(L, 2); /* remove both ktables */ + correctkeys(stree, n); +} + + +/* +** Create a new 'ktable' to the pattern at the top of the stack, adding +** all elements from pattern 'p' (if not 0) plus element 'idx' to it. +** Return index of new element. +*/ +static int addtonewktable (lua_State *L, int p, int idx) { + newktable(L, 1); + if (p) + mergektable(L, p, NULL); + return addtoktable(L, idx); +} + +/* }====================================================== */ + + +/* +** {====================================================== +** Tree generation +** ======================================================= +*/ + +/* +** In 5.2, could use 'luaL_testudata'... +*/ +static int testpattern (lua_State *L, int idx) { + if (lua_touserdata(L, idx)) { /* value is a userdata? */ + if (lua_getmetatable(L, idx)) { /* does it have a metatable? */ + luaL_getmetatable(L, PATTERN_T); + if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */ + lua_pop(L, 2); /* remove both metatables */ + return 1; + } + } + } + return 0; +} + + +static Pattern *getpattern (lua_State *L, int idx) { + return (Pattern *)luaL_checkudata(L, idx, PATTERN_T); +} + + +static int getsize (lua_State *L, int idx) { + return (lua_objlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; +} + + +static TTree *gettree (lua_State *L, int idx, int *len) { + Pattern *p = getpattern(L, idx); + if (len) + *len = getsize(L, idx); + return p->tree; +} + + +/* +** create a pattern +*/ +static TTree *newtree (lua_State *L, int len) { + size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern); + Pattern *p = (Pattern *)lua_newuserdata(L, size); + luaL_getmetatable(L, PATTERN_T); + lua_setmetatable(L, -2); + p->code = NULL; p->codesize = 0; + return p->tree; +} + + +static TTree *newleaf (lua_State *L, int tag) { + TTree *tree = newtree(L, 1); + tree->tag = tag; + return tree; +} + + +static TTree *newcharset (lua_State *L) { + TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1); + tree->tag = TSet; + loopset(i, treebuffer(tree)[i] = 0); + return tree; +} + + +/* +** add to tree a sequence where first sibling is 'sib' (with size +** 'sibsize'); returns position for second sibling +*/ +static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { + tree->tag = TSeq; tree->u.ps = sibsize + 1; + memcpy(sib1(tree), sib, sibsize * sizeof(TTree)); + return sib2(tree); +} + + +/* +** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got +** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it +** must build a sequence of sequence of sequence...) +*/ +static void fillseq (TTree *tree, int tag, int n, const char *s) { + int i; + for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */ + tree->tag = TSeq; tree->u.ps = 2; + sib1(tree)->tag = tag; + sib1(tree)->u.n = s ? (byte)s[i] : 0; + tree = sib2(tree); + } + tree->tag = tag; /* last one does not need TSeq */ + tree->u.n = s ? (byte)s[i] : 0; +} + + +/* +** Numbers as patterns: +** 0 == true (always match); n == TAny repeated 'n' times; +** -n == not (TAny repeated 'n' times) +*/ +static TTree *numtree (lua_State *L, int n) { + if (n == 0) + return newleaf(L, TTrue); + else { + TTree *tree, *nd; + if (n > 0) + tree = nd = newtree(L, 2 * n - 1); + else { /* negative: code it as !(-n) */ + n = -n; + tree = newtree(L, 2 * n); + tree->tag = TNot; + nd = sib1(tree); + } + fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */ + return tree; + } +} + + +/* +** Convert value at index 'idx' to a pattern +*/ +static TTree *getpatt (lua_State *L, int idx, int *len) { + TTree *tree; + switch (lua_type(L, idx)) { + case LUA_TSTRING: { + size_t slen; + const char *s = lua_tolstring(L, idx, &slen); /* get string */ + if (slen == 0) /* empty? */ + tree = newleaf(L, TTrue); /* always match */ + else { + tree = newtree(L, 2 * (slen - 1) + 1); + fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */ + } + break; + } + case LUA_TNUMBER: { + int n = lua_tointeger(L, idx); + tree = numtree(L, n); + break; + } + case LUA_TBOOLEAN: { + tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse)); + break; + } + case LUA_TTABLE: { + tree = newgrammar(L, idx); + break; + } + case LUA_TFUNCTION: { + tree = newtree(L, 2); + tree->tag = TRunTime; + tree->key = addtonewktable(L, 0, idx); + sib1(tree)->tag = TTrue; + break; + } + default: { + return gettree(L, idx, len); + } + } + lua_replace(L, idx); /* put new tree into 'idx' slot */ + if (len) + *len = getsize(L, idx); + return tree; +} + + +/* +** create a new tree, whith a new root and one sibling. +** Sibling must be on the Lua stack, at index 1. +*/ +static TTree *newroot1sib (lua_State *L, int tag) { + int s1; + TTree *tree1 = getpatt(L, 1, &s1); + TTree *tree = newtree(L, 1 + s1); /* create new tree */ + tree->tag = tag; + memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); + copyktable(L, 1); + return tree; +} + + +/* +** create a new tree, whith a new root and 2 siblings. +** Siblings must be on the Lua stack, first one at index 1. +*/ +static TTree *newroot2sib (lua_State *L, int tag) { + int s1, s2; + TTree *tree1 = getpatt(L, 1, &s1); + TTree *tree2 = getpatt(L, 2, &s2); + TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */ + tree->tag = tag; + tree->u.ps = 1 + s1; + memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); + memcpy(sib2(tree), tree2, s2 * sizeof(TTree)); + joinktables(L, 1, sib2(tree), 2); + return tree; +} + + +static int lp_P (lua_State *L) { + luaL_checkany(L, 1); + getpatt(L, 1, NULL); + lua_settop(L, 1); + return 1; +} + + +/* +** sequence operator; optimizations: +** false x => false, x true => x, true x => x +** (cannot do x . false => false because x may have runtime captures) +*/ +static int lp_seq (lua_State *L) { + TTree *tree1 = getpatt(L, 1, NULL); + TTree *tree2 = getpatt(L, 2, NULL); + if (tree1->tag == TFalse || tree2->tag == TTrue) + lua_pushvalue(L, 1); /* false . x == false, x . true = x */ + else if (tree1->tag == TTrue) + lua_pushvalue(L, 2); /* true . x = x */ + else + newroot2sib(L, TSeq); + return 1; +} + + +/* +** choice operator; optimizations: +** charset / charset => charset +** true / x => true, x / false => x, false / x => x +** (x / true is not equivalent to true) +*/ +static int lp_choice (lua_State *L) { + Charset st1, st2; + TTree *t1 = getpatt(L, 1, NULL); + TTree *t2 = getpatt(L, 2, NULL); + if (tocharset(t1, &st1) && tocharset(t2, &st2)) { + TTree *t = newcharset(L); + loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]); + } + else if (nofail(t1) || t2->tag == TFalse) + lua_pushvalue(L, 1); /* true / x => true, x / false => x */ + else if (t1->tag == TFalse) + lua_pushvalue(L, 2); /* false / x => x */ + else + newroot2sib(L, TChoice); + return 1; +} + + +/* +** p^n +*/ +static int lp_star (lua_State *L) { + int size1; + int n = (int)luaL_checkinteger(L, 2); + TTree *tree1 = getpatt(L, 1, &size1); + if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */ + TTree *tree = newtree(L, (n + 1) * (size1 + 1)); + if (nullable(tree1)) + luaL_error(L, "loop body may accept empty string"); + while (n--) /* repeat 'n' times */ + tree = seqaux(tree, tree1, size1); + tree->tag = TRep; + memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); + } + else { /* choice (seq tree1 ... choice tree1 true ...) true */ + TTree *tree; + n = -n; + /* size = (choice + seq + tree1 + true) * n, but the last has no seq */ + tree = newtree(L, n * (size1 + 3) - 1); + for (; n > 1; n--) { /* repeat (n - 1) times */ + tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2; + sib2(tree)->tag = TTrue; + tree = sib1(tree); + tree = seqaux(tree, tree1, size1); + } + tree->tag = TChoice; tree->u.ps = size1 + 1; + sib2(tree)->tag = TTrue; + memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); + } + copyktable(L, 1); + return 1; +} + + +/* +** #p == &p +*/ +static int lp_and (lua_State *L) { + newroot1sib(L, TAnd); + return 1; +} + + +/* +** -p == !p +*/ +static int lp_not (lua_State *L) { + newroot1sib(L, TNot); + return 1; +} + + +/* +** [t1 - t2] == Seq (Not t2) t1 +** If t1 and t2 are charsets, make their difference. +*/ +static int lp_sub (lua_State *L) { + Charset st1, st2; + int s1, s2; + TTree *t1 = getpatt(L, 1, &s1); + TTree *t2 = getpatt(L, 2, &s2); + if (tocharset(t1, &st1) && tocharset(t2, &st2)) { + TTree *t = newcharset(L); + loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]); + } + else { + TTree *tree = newtree(L, 2 + s1 + s2); + tree->tag = TSeq; /* sequence of... */ + tree->u.ps = 2 + s2; + sib1(tree)->tag = TNot; /* ...not... */ + memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */ + memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */ + joinktables(L, 1, sib1(tree), 2); + } + return 1; +} + + +static int lp_set (lua_State *L) { + size_t l; + const char *s = luaL_checklstring(L, 1, &l); + TTree *tree = newcharset(L); + while (l--) { + setchar(treebuffer(tree), (byte)(*s)); + s++; + } + return 1; +} + + +static int lp_range (lua_State *L) { + int arg; + int top = lua_gettop(L); + TTree *tree = newcharset(L); + for (arg = 1; arg <= top; arg++) { + int c; + size_t l; + const char *r = luaL_checklstring(L, arg, &l); + luaL_argcheck(L, l == 2, arg, "range must have two characters"); + for (c = (byte)r[0]; c <= (byte)r[1]; c++) + setchar(treebuffer(tree), c); + } + return 1; +} + + +/* +** Look-behind predicate +*/ +static int lp_behind (lua_State *L) { + TTree *tree; + TTree *tree1 = getpatt(L, 1, NULL); + int n = fixedlen(tree1); + luaL_argcheck(L, n > 0, 1, "pattern may not have fixed length"); + luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures"); + luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind"); + tree = newroot1sib(L, TBehind); + tree->u.n = n; + return 1; +} + + +/* +** Create a non-terminal +*/ +static int lp_V (lua_State *L) { + TTree *tree = newleaf(L, TOpenCall); + luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); + tree->key = addtonewktable(L, 0, 1); + return 1; +} + + +/* +** Create a tree for a non-empty capture, with a body and +** optionally with an associated Lua value (at index 'labelidx' in the +** stack) +*/ +static int capture_aux (lua_State *L, int cap, int labelidx) { + TTree *tree = newroot1sib(L, TCapture); + tree->cap = cap; + tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx); + return 1; +} + + +/* +** Fill a tree with an empty capture, using an empty (TTrue) sibling. +*/ +static TTree *auxemptycap (TTree *tree, int cap) { + tree->tag = TCapture; + tree->cap = cap; + sib1(tree)->tag = TTrue; + return tree; +} + + +/* +** Create a tree for an empty capture +*/ +static TTree *newemptycap (lua_State *L, int cap) { + return auxemptycap(newtree(L, 2), cap); +} + + +/* +** Create a tree for an empty capture with an associated Lua value +*/ +static TTree *newemptycapkey (lua_State *L, int cap, int idx) { + TTree *tree = auxemptycap(newtree(L, 2), cap); + tree->key = addtonewktable(L, 0, idx); + return tree; +} + + +/* +** Captures with syntax p / v +** (function capture, query capture, string capture, or number capture) +*/ +static int lp_divcapture (lua_State *L) { + switch (lua_type(L, 2)) { + case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2); + case LUA_TTABLE: return capture_aux(L, Cquery, 2); + case LUA_TSTRING: return capture_aux(L, Cstring, 2); + case LUA_TNUMBER: { + int n = lua_tointeger(L, 2); + TTree *tree = newroot1sib(L, TCapture); + luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number"); + tree->cap = Cnum; + tree->key = n; + return 1; + } + default: return luaL_argerror(L, 2, "invalid replacement value"); + } +} + + +static int lp_substcapture (lua_State *L) { + return capture_aux(L, Csubst, 0); +} + + +static int lp_tablecapture (lua_State *L) { + return capture_aux(L, Ctable, 0); +} + + +static int lp_groupcapture (lua_State *L) { + if (lua_isnoneornil(L, 2)) + return capture_aux(L, Cgroup, 0); + else { + luaL_checkstring(L, 2); + return capture_aux(L, Cgroup, 2); + } +} + + +static int lp_foldcapture (lua_State *L) { + luaL_checktype(L, 2, LUA_TFUNCTION); + return capture_aux(L, Cfold, 2); +} + + +static int lp_simplecapture (lua_State *L) { + return capture_aux(L, Csimple, 0); +} + + +static int lp_poscapture (lua_State *L) { + newemptycap(L, Cposition); + return 1; +} + + +static int lp_argcapture (lua_State *L) { + int n = (int)luaL_checkinteger(L, 1); + TTree *tree = newemptycap(L, Carg); + tree->key = n; + luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); + return 1; +} + + +static int lp_backref (lua_State *L) { + luaL_checkstring(L, 1); + newemptycapkey(L, Cbackref, 1); + return 1; +} + + +/* +** Constant capture +*/ +static int lp_constcapture (lua_State *L) { + int i; + int n = lua_gettop(L); /* number of values */ + if (n == 0) /* no values? */ + newleaf(L, TTrue); /* no capture */ + else if (n == 1) + newemptycapkey(L, Cconst, 1); /* single constant capture */ + else { /* create a group capture with all values */ + TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2); + newktable(L, n); /* create a 'ktable' for new tree */ + tree->tag = TCapture; + tree->cap = Cgroup; + tree->key = 0; + tree = sib1(tree); + for (i = 1; i <= n - 1; i++) { + tree->tag = TSeq; + tree->u.ps = 3; /* skip TCapture and its sibling */ + auxemptycap(sib1(tree), Cconst); + sib1(tree)->key = addtoktable(L, i); + tree = sib2(tree); + } + auxemptycap(tree, Cconst); + tree->key = addtoktable(L, i); + } + return 1; +} + + +static int lp_matchtime (lua_State *L) { + TTree *tree; + luaL_checktype(L, 2, LUA_TFUNCTION); + tree = newroot1sib(L, TRunTime); + tree->key = addtonewktable(L, 1, 2); + return 1; +} + +/* }====================================================== */ + + +/* +** {====================================================== +** Grammar - Tree generation +** ======================================================= +*/ + +/* +** push on the stack the index and the pattern for the +** initial rule of grammar at index 'arg' in the stack; +** also add that index into position table. +*/ +static void getfirstrule (lua_State *L, int arg, int postab) { + lua_rawgeti(L, arg, 1); /* access first element */ + if (lua_isstring(L, -1)) { /* is it the name of initial rule? */ + lua_pushvalue(L, -1); /* duplicate it to use as key */ + lua_gettable(L, arg); /* get associated rule */ + } + else { + lua_pushinteger(L, 1); /* key for initial rule */ + lua_insert(L, -2); /* put it before rule */ + } + if (!testpattern(L, -1)) { /* initial rule not a pattern? */ + if (lua_isnil(L, -1)) + luaL_error(L, "grammar has no initial rule"); + else + luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2)); + } + lua_pushvalue(L, -2); /* push key */ + lua_pushinteger(L, 1); /* push rule position (after TGrammar) */ + lua_settable(L, postab); /* insert pair at position table */ +} + +/* +** traverse grammar at index 'arg', pushing all its keys and patterns +** into the stack. Create a new table (before all pairs key-pattern) to +** collect all keys and their associated positions in the final tree +** (the "position table"). +** Return the number of rules and (in 'totalsize') the total size +** for the new tree. +*/ +static int collectrules (lua_State *L, int arg, int *totalsize) { + int n = 1; /* to count number of rules */ + int postab = lua_gettop(L) + 1; /* index of position table */ + int size; /* accumulator for total size */ + lua_newtable(L); /* create position table */ + getfirstrule(L, arg, postab); + size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */ + lua_pushnil(L); /* prepare to traverse grammar table */ + while (lua_next(L, arg) != 0) { + if (lua_tonumber(L, -2) == 1 || + lua_equal(L, -2, postab + 1)) { /* initial rule? */ + lua_pop(L, 1); /* remove value (keep key for lua_next) */ + continue; + } + if (!testpattern(L, -1)) /* value is not a pattern? */ + luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2)); + luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules"); + lua_pushvalue(L, -2); /* push key (to insert into position table) */ + lua_pushinteger(L, size); + lua_settable(L, postab); + size += 1 + getsize(L, -1); /* update size */ + lua_pushvalue(L, -2); /* push key (for next lua_next) */ + n++; + } + *totalsize = size + 1; /* TTrue to finish list of rules */ + return n; +} + + +static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { + int i; + TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */ + for (i = 0; i < n; i++) { /* add each rule into new tree */ + int ridx = frule + 2*i + 1; /* index of i-th rule */ + int rulesize; + TTree *rn = gettree(L, ridx, &rulesize); + nd->tag = TRule; + nd->key = 0; + nd->cap = i; /* rule number */ + nd->u.ps = rulesize + 1; /* point to next rule */ + memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ + mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ + nd = sib2(nd); /* move to next rule */ + } + nd->tag = TTrue; /* finish list of rules */ +} + + +/* +** Check whether a tree has potential infinite loops +*/ +static int checkloops (TTree *tree) { + tailcall: + if (tree->tag == TRep && nullable(sib1(tree))) + return 1; + else if (tree->tag == TGrammar) + return 0; /* sub-grammars already checked */ + else { + switch (numsiblings[tree->tag]) { + case 1: /* return checkloops(sib1(tree)); */ + tree = sib1(tree); goto tailcall; + case 2: + if (checkloops(sib1(tree))) return 1; + /* else return checkloops(sib2(tree)); */ + tree = sib2(tree); goto tailcall; + default: assert(numsiblings[tree->tag] == 0); return 0; + } + } +} + + +static int verifyerror (lua_State *L, int *passed, int npassed) { + int i, j; + for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ + for (j = i - 1; j >= 0; j--) { + if (passed[i] == passed[j]) { + lua_rawgeti(L, -1, passed[i]); /* get rule's key */ + return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); + } + } + } + return luaL_error(L, "too many left calls in grammar"); +} + + +/* +** Check whether a rule can be left recursive; raise an error in that +** case; otherwise return 1 iff pattern is nullable. Assume ktable at +** the top of the stack. +*/ +static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, + int nullable) { + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: + case TFalse: + return nullable; /* cannot pass from here */ + case TTrue: + case TBehind: /* look-behind cannot have calls */ + return 1; + case TNot: case TAnd: case TRep: + /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ + tree = sib1(tree); nullable = 1; goto tailcall; + case TCapture: case TRunTime: + /* return verifyrule(L, sib1(tree), passed, npassed); */ + tree = sib1(tree); goto tailcall; + case TCall: + /* return verifyrule(L, sib2(tree), passed, npassed); */ + tree = sib2(tree); goto tailcall; + case TSeq: /* only check 2nd child if first is nullable */ + if (!verifyrule(L, sib1(tree), passed, npassed, 0)) + return nullable; + /* else return verifyrule(L, sib2(tree), passed, npassed); */ + tree = sib2(tree); goto tailcall; + case TChoice: /* must check both children */ + nullable = verifyrule(L, sib1(tree), passed, npassed, nullable); + /* return verifyrule(L, sib2(tree), passed, npassed, nullable); */ + tree = sib2(tree); goto tailcall; + case TRule: + if (npassed >= MAXRULES) + return verifyerror(L, passed, npassed); + else { + passed[npassed++] = tree->key; + /* return verifyrule(L, sib1(tree), passed, npassed); */ + tree = sib1(tree); goto tailcall; + } + case TGrammar: + return nullable(tree); /* sub-grammar cannot be left recursive */ + default: assert(0); return 0; + } +} + + +static void verifygrammar (lua_State *L, TTree *grammar) { + int passed[MAXRULES]; + TTree *rule; + /* check left-recursive rules */ + for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { + if (rule->key == 0) continue; /* unused rule */ + verifyrule(L, sib1(rule), passed, 0, 0); + } + assert(rule->tag == TTrue); + /* check infinite loops inside rules */ + for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { + if (rule->key == 0) continue; /* unused rule */ + if (checkloops(sib1(rule))) { + lua_rawgeti(L, -1, rule->key); /* get rule's key */ + luaL_error(L, "empty loop in rule '%s'", val2str(L, -1)); + } + } + assert(rule->tag == TTrue); +} + + +/* +** Give a name for the initial rule if it is not referenced +*/ +static void initialrulename (lua_State *L, TTree *grammar, int frule) { + if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */ + int n = lua_objlen(L, -1) + 1; /* index for name */ + lua_pushvalue(L, frule); /* rule's name */ + lua_rawseti(L, -2, n); /* ktable was on the top of the stack */ + sib1(grammar)->key = n; + } +} + + +static TTree *newgrammar (lua_State *L, int arg) { + int treesize; + int frule = lua_gettop(L) + 2; /* position of first rule's key */ + int n = collectrules(L, arg, &treesize); + TTree *g = newtree(L, treesize); + luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules"); + g->tag = TGrammar; g->u.n = n; + lua_newtable(L); /* create 'ktable' */ + lua_setfenv(L, -2); + buildgrammar(L, g, frule, n); + lua_getfenv(L, -1); /* get 'ktable' for new tree */ + finalfix(L, frule - 1, g, sib1(g)); + initialrulename(L, g, frule); + verifygrammar(L, g); + lua_pop(L, 1); /* remove 'ktable' */ + lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */ + lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */ + return g; /* new table at the top of the stack */ +} + +/* }====================================================== */ + + +static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) { + lua_getfenv(L, idx); /* push 'ktable' (may be used by 'finalfix') */ + finalfix(L, 0, NULL, p->tree); + lua_pop(L, 1); /* remove 'ktable' */ + return compile(L, p); +} + + +static int lp_printtree (lua_State *L) { + TTree *tree = getpatt(L, 1, NULL); + int c = lua_toboolean(L, 2); + if (c) { + lua_getfenv(L, 1); /* push 'ktable' (may be used by 'finalfix') */ + finalfix(L, 0, NULL, tree); + lua_pop(L, 1); /* remove 'ktable' */ + } + printktable(L, 1); + printtree(tree, 0); + return 0; +} + + +static int lp_printcode (lua_State *L) { + Pattern *p = getpattern(L, 1); + printktable(L, 1); + if (p->code == NULL) /* not compiled yet? */ + prepcompile(L, p, 1); + printpatt(p->code, p->codesize); + return 0; +} + + +/* +** Get the initial position for the match, interpreting negative +** values from the end of the subject +*/ +static size_t initposition (lua_State *L, size_t len) { + lua_Integer ii = luaL_optinteger(L, 3, 1); + if (ii > 0) { /* positive index? */ + if ((size_t)ii <= len) /* inside the string? */ + return (size_t)ii - 1; /* return it (corrected to 0-base) */ + else return len; /* crop at the end */ + } + else { /* negative index */ + if ((size_t)(-ii) <= len) /* inside the string? */ + return len - ((size_t)(-ii)); /* return position from the end */ + else return 0; /* crop at the beginning */ + } +} + + +/* +** Main match function +*/ +static int lp_match (lua_State *L) { + Capture capture[INITCAPSIZE]; + const char *r; + size_t l; + Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1)); + Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1); + const char *s = luaL_checklstring(L, SUBJIDX, &l); + size_t i = initposition(L, l); + int ptop = lua_gettop(L); + lua_pushnil(L); /* initialize subscache */ + lua_pushlightuserdata(L, capture); /* initialize caplistidx */ + lua_getfenv(L, 1); /* initialize penvidx */ + r = match(L, s, s + i, s + l, code, capture, ptop); + if (r == NULL) { + lua_pushnil(L); + return 1; + } + return getcaptures(L, s, r, ptop); +} + + + +/* +** {====================================================== +** Library creation and functions not related to matching +** ======================================================= +*/ + +static int lp_setmax (lua_State *L) { + luaL_optinteger(L, 1, -1); + lua_settop(L, 1); + lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); + return 0; +} + + +static int lp_version (lua_State *L) { + lua_pushstring(L, VERSION); + return 1; +} + + +static int lp_type (lua_State *L) { + if (testpattern(L, 1)) + lua_pushliteral(L, "pattern"); + else + lua_pushnil(L); + return 1; +} + + +int lp_gc (lua_State *L) { + Pattern *p = getpattern(L, 1); + if (p->codesize > 0) + realloccode(L, p, 0); + return 0; +} + + +static void createcat (lua_State *L, const char *catname, int (catf) (int)) { + TTree *t = newcharset(L); + int i; + for (i = 0; i <= UCHAR_MAX; i++) + if (catf(i)) + setchar(treebuffer(t), i); + lua_setfield(L, -2, catname); +} + +#include + +static int lp_locale (lua_State *L) { + // added these lines about the locale otherwise the re test fails + // not sure why, maybe the Lua DLL and the main code + // have different locales. + char old_locale[256]; + strcpy(old_locale, setlocale(LC_CTYPE, NULL)); /* store the locale */ + setlocale (LC_CTYPE, "C"); // fudge by Nick Gammon + + if (lua_isnoneornil(L, 1)) { + lua_settop(L, 0); + lua_createtable(L, 0, 12); + } + else { + luaL_checktype(L, 1, LUA_TTABLE); + lua_settop(L, 1); + } + + createcat(L, "alnum", isalnum); + createcat(L, "alpha", isalpha); + createcat(L, "cntrl", iscntrl); + createcat(L, "digit", isdigit); + createcat(L, "graph", isgraph); + createcat(L, "lower", islower); + createcat(L, "print", isprint); + createcat(L, "punct", ispunct); + createcat(L, "space", isspace); + createcat(L, "upper", isupper); + createcat(L, "xdigit", isxdigit); + + setlocale(LC_CTYPE, old_locale); /* restore the old locale */ + return 1; +} + + +static struct luaL_Reg pattreg[] = { + {"ptree", lp_printtree}, + {"pcode", lp_printcode}, + {"match", lp_match}, + {"B", lp_behind}, + {"V", lp_V}, + {"C", lp_simplecapture}, + {"Cc", lp_constcapture}, + {"Cmt", lp_matchtime}, + {"Cb", lp_backref}, + {"Carg", lp_argcapture}, + {"Cp", lp_poscapture}, + {"Cs", lp_substcapture}, + {"Ct", lp_tablecapture}, + {"Cf", lp_foldcapture}, + {"Cg", lp_groupcapture}, + {"P", lp_P}, + {"S", lp_set}, + {"R", lp_range}, + {"locale", lp_locale}, + {"version", lp_version}, + {"setmaxstack", lp_setmax}, + {"type", lp_type}, + {NULL, NULL} +}; + + +static struct luaL_Reg metareg[] = { + {"__mul", lp_seq}, + {"__add", lp_choice}, + {"__pow", lp_star}, + {"__gc", lp_gc}, + {"__len", lp_and}, + {"__div", lp_divcapture}, + {"__unm", lp_not}, + {"__sub", lp_sub}, + {NULL, NULL} +}; + + +int luaopen_lpeg (lua_State *L); +int luaopen_lpeg (lua_State *L) { + luaL_newmetatable(L, PATTERN_T); + lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ + lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); + luaL_register(L, NULL, metareg); + luaL_register(L, "lpeg", pattreg); + lua_pushvalue(L, -1); + lua_setfield(L, -3, "__index"); + return 1; +} + +/* }====================================================== */ diff --git a/scripting/lpeg/lptree.h b/scripting/lpeg/lptree.h new file mode 100644 index 00000000..b69528a6 --- /dev/null +++ b/scripting/lpeg/lptree.h @@ -0,0 +1,77 @@ +/* +** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $ +*/ + +#if !defined(lptree_h) +#define lptree_h + + +#include "lptypes.h" + + +/* +** types of trees +*/ +typedef enum TTag { + TChar = 0, TSet, TAny, /* standard PEG elements */ + TTrue, TFalse, + TRep, + TSeq, TChoice, + TNot, TAnd, + TCall, + TOpenCall, + TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */ + TGrammar, /* sib1 is initial (and first) rule */ + TBehind, /* match behind */ + TCapture, /* regular capture */ + TRunTime /* run-time capture */ +} TTag; + +/* number of siblings for each tree */ +extern const byte numsiblings[]; + + +/* +** Tree trees +** The first sibling of a tree (if there is one) is immediately after +** the tree. A reference to a second sibling (ps) is its position +** relative to the position of the tree itself. A key in ktable +** uses the (unique) address of the original tree that created that +** entry. NULL means no data. +*/ +typedef struct TTree { + byte tag; + byte cap; /* kind of capture (if it is a capture) */ + unsigned short key; /* key in ktable for Lua data (0 if no key) */ + union { + int ps; /* occasional second sibling */ + int n; /* occasional counter */ + } u; +} TTree; + + +/* +** A complete pattern has its tree plus, if already compiled, +** its corresponding code +*/ +typedef struct Pattern { + union Instruction *code; + int codesize; + TTree tree[1]; +} Pattern; + + +/* number of siblings for each tree */ +extern const byte numsiblings[]; + +/* access to siblings */ +#define sib1(t) ((t) + 1) +#define sib2(t) ((t) + (t)->u.ps) + + + + + + +#endif + diff --git a/scripting/lpeg/lptypes.h b/scripting/lpeg/lptypes.h new file mode 100644 index 00000000..0ee76cdc --- /dev/null +++ b/scripting/lpeg/lptypes.h @@ -0,0 +1,161 @@ +/* +** $Id: lptypes.h,v 1.11 2015/03/04 16:38:00 roberto Exp $ +** LPeg - PEG pattern matching for Lua +** Copyright 2007-2014, Lua.org & PUC-Rio (see 'lpeg.html' for license) +** written by Roberto Ierusalimschy +*/ + +#if !defined(lptypes_h) +#define lptypes_h + + +/* + + // Omitted by NJG: Not used + +#if !defined(LPEG_DEBUG) +#define NDEBUG +#endif + +*/ + +#include +#include + +#ifdef LUA_52 + #include "..\..\..\lua52\src\lua.h" + #include "..\..\..\lua52\src\lauxlib.h" +#else + #include "..\..\lua.h" + #include "..\..\lauxlib.h" +#endif + + +#define VERSION "0.12.2" + + +#define PATTERN_T "lpeg-pattern" +#define MAXSTACKIDX "lpeg-maxstack" + + +/* +** compatibility with Lua 5.2 +*/ +#if (LUA_VERSION_NUM >= 502) + +#undef lua_equal +#define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) + +#undef lua_getfenv +#define lua_getfenv lua_getuservalue +#undef lua_setfenv +#define lua_setfenv lua_setuservalue + +#undef lua_objlen +#define lua_objlen lua_rawlen + +#undef luaL_register +#define luaL_register(L,n,f) \ + { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); } + +#endif + + +/* default maximum size for call/backtrack stack */ +#if !defined(MAXBACK) +#define MAXBACK 100 +#endif + + +/* maximum number of rules in a grammar */ +#if !defined(MAXRULES) +#define MAXRULES 1000 +#endif + + + +/* initial size for capture's list */ +#define INITCAPSIZE 32 + + +/* index, on Lua stack, for subject */ +#define SUBJIDX 2 + +/* number of fixed arguments to 'match' (before capture arguments) */ +#define FIXEDARGS 3 + +/* index, on Lua stack, for capture list */ +#define caplistidx(ptop) ((ptop) + 2) + +/* index, on Lua stack, for pattern's ktable */ +#define ktableidx(ptop) ((ptop) + 3) + +/* index, on Lua stack, for backtracking stack */ +#define stackidx(ptop) ((ptop) + 4) + + + +typedef unsigned char byte; + + +#define BITSPERCHAR 8 + +#define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1) + + + +typedef struct Charset { + byte cs[CHARSETSIZE]; +} Charset; + + + +#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } + +/* access to charset */ +#define treebuffer(t) ((byte *)((t) + 1)) + +/* number of slots needed for 'n' bytes */ +#define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1) + +/* set 'b' bit in charset 'cs' */ +#define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7))) + + +/* +** in capture instructions, 'kind' of capture and its offset are +** packed in field 'aux', 4 bits for each +*/ +#define getkind(op) ((op)->i.aux & 0xF) +#define getoff(op) (((op)->i.aux >> 4) & 0xF) +#define joinkindoff(k,o) ((k) | ((o) << 4)) + +#define MAXOFF 0xF +#define MAXAUX 0xFF + + +/* maximum number of bytes to look behind */ +#define MAXBEHIND MAXAUX + + +/* maximum size (in elements) for a pattern */ +#define MAXPATTSIZE (SHRT_MAX - 10) + + +/* size (in elements) for an instruction plus extra l bytes */ +#define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1) + + +/* size (in elements) for a ISet instruction */ +#define CHARSETINSTSIZE instsize(CHARSETSIZE) + +/* size (in elements) for a IFunc instruction */ +#define funcinstsize(p) ((p)->i.aux + 2) + + + +#define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7)))) + + +#endif + diff --git a/scripting/lpeg/lpvm.c b/scripting/lpeg/lpvm.c new file mode 100644 index 00000000..e45df1cf --- /dev/null +++ b/scripting/lpeg/lpvm.c @@ -0,0 +1,363 @@ +#pragma warning( disable : 4244) // conversion from 'int ' to 'short ', possible loss of data + +/* +** $Id: lpvm.c,v 1.5 2013/04/12 16:29:49 roberto Exp $ +** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) +*/ + +#include +#include + + +#ifdef LUA_52 + #include "..\..\..\lua52\src\lua.h" + #include "..\..\..\lua52\src\lauxlib.h" +#else + #include "..\..\lua.h" + #include "..\..\lauxlib.h" +#endif + + +#include "lpcap.h" +#include "lptypes.h" +#include "lpvm.h" +#include "lpprint.h" + + +/* initial size for call/backtrack stack */ +#if !defined(INITBACK) +#define INITBACK 100 +#endif + + +#define getoffset(p) (((p) + 1)->offset) + +static const Instruction giveup = {{IGiveup, 0, 0}}; + + +/* +** {====================================================== +** Virtual Machine +** ======================================================= +*/ + + +typedef struct Stack { + const char *s; /* saved position (or NULL for calls) */ + const Instruction *p; /* next instruction */ + int caplevel; +} Stack; + + +#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop))) + + +/* +** Double the size of the array of captures +*/ +static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { + Capture *newc; + if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) + luaL_error(L, "too many captures"); + newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); + memcpy(newc, cap, captop * sizeof(Capture)); + lua_replace(L, caplistidx(ptop)); + return newc; +} + + +/* +** Double the size of the stack +*/ +static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { + Stack *stack = getstackbase(L, ptop); + Stack *newstack; + int n = *stacklimit - stack; /* current stack size */ + int max, newn; + lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); + max = lua_tointeger(L, -1); /* maximum allowed size */ + lua_pop(L, 1); + if (n >= max) /* already at maximum size? */ + luaL_error(L, "too many pending calls/choices"); + newn = 2 * n; /* new size */ + if (newn > max) newn = max; + newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); + memcpy(newstack, stack, n * sizeof(Stack)); + lua_replace(L, stackidx(ptop)); + *stacklimit = newstack + newn; + return newstack + n; /* return next position */ +} + + +/* +** Interpret the result of a dynamic capture: false -> fail; +** true -> keep current position; number -> next position. +** Return new subject position. 'fr' is stack index where +** is the result; 'curr' is current subject position; 'limit' +** is subject's size. +*/ +static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { + lua_Integer res; + if (!lua_toboolean(L, fr)) { /* false value? */ + lua_settop(L, fr - 1); /* remove results */ + return -1; /* and fail */ + } + else if (lua_isboolean(L, fr)) /* true? */ + res = curr; /* keep current position */ + else { + res = lua_tointeger(L, fr) - 1; /* new position */ + if (res < curr || res > limit) + luaL_error(L, "invalid position returned by match-time capture"); + } + lua_remove(L, fr); /* remove first result (offset) */ + return res; +} + + +/* +** Add capture values returned by a dynamic capture to the capture list +** 'base', nested inside a group capture. 'fd' indexes the first capture +** value, 'n' is the number of values (at least 1). +*/ +static void adddyncaptures (const char *s, Capture *base, int n, int fd) { + int i; + /* Cgroup capture is already there */ + assert(base[0].kind == Cgroup && base[0].siz == 0); + base[0].idx = 0; /* make it an anonymous group */ + for (i = 1; i <= n; i++) { /* add runtime captures */ + base[i].kind = Cruntime; + base[i].siz = 1; /* mark it as closed */ + base[i].idx = fd + i - 1; /* stack index of capture value */ + base[i].s = s; + } + base[i].kind = Cclose; /* close group */ + base[i].siz = 1; + base[i].s = s; +} + + +/* +** Remove dynamic captures from the Lua stack (called in case of failure) +*/ +static int removedyncap (lua_State *L, Capture *capture, + int level, int last) { + int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */ + int top = lua_gettop(L); + if (id == 0) return 0; /* no dynamic captures? */ + lua_settop(L, id - 1); /* remove captures */ + return top - id + 1; /* number of values removed */ +} + + +/* +** Opcode interpreter +*/ +const char *match (lua_State *L, const char *o, const char *s, const char *e, + Instruction *op, Capture *capture, int ptop) { + Stack stackbase[INITBACK]; + Stack *stacklimit = stackbase + INITBACK; + Stack *stack = stackbase; /* point to first empty slot in stack */ + int capsize = INITCAPSIZE; + int captop = 0; /* point to first empty slot in captures */ + int ndyncap = 0; /* number of dynamic captures (in Lua stack) */ + const Instruction *p = op; /* current instruction */ + stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; + lua_pushlightuserdata(L, stackbase); + for (;;) { +#if defined(DEBUG) + printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", + s, stack - getstackbase(L, ptop), ndyncap, captop); + printinst(op, p); + printcaplist(capture, capture + captop); +#endif + assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); + switch ((Opcode)p->i.code) { + case IEnd: { + assert(stack == getstackbase(L, ptop) + 1); + capture[captop].kind = Cclose; + capture[captop].s = NULL; + return s; + } + case IGiveup: { + assert(stack == getstackbase(L, ptop)); + return NULL; + } + case IRet: { + assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); + p = (--stack)->p; + continue; + } + case IAny: { + if (s < e) { p++; s++; } + else goto fail; + continue; + } + case ITestAny: { + if (s < e) p += 2; + else p += getoffset(p); + continue; + } + case IChar: { + if ((byte)*s == p->i.aux && s < e) { p++; s++; } + else goto fail; + continue; + } + case ITestChar: { + if ((byte)*s == p->i.aux && s < e) p += 2; + else p += getoffset(p); + continue; + } + case ISet: { + int c = (byte)*s; + if (testchar((p+1)->buff, c) && s < e) + { p += CHARSETINSTSIZE; s++; } + else goto fail; + continue; + } + case ITestSet: { + int c = (byte)*s; + if (testchar((p + 2)->buff, c) && s < e) + p += 1 + CHARSETINSTSIZE; + else p += getoffset(p); + continue; + } + case IBehind: { + int n = p->i.aux; + if (n > s - o) goto fail; + s -= n; p++; + continue; + } + case ISpan: { + for (; s < e; s++) { + int c = (byte)*s; + if (!testchar((p+1)->buff, c)) break; + } + p += CHARSETINSTSIZE; + continue; + } + case IJmp: { + p += getoffset(p); + continue; + } + case IChoice: { + if (stack == stacklimit) + stack = doublestack(L, &stacklimit, ptop); + stack->p = p + getoffset(p); + stack->s = s; + stack->caplevel = captop; + stack++; + p += 2; + continue; + } + case ICall: { + if (stack == stacklimit) + stack = doublestack(L, &stacklimit, ptop); + stack->s = NULL; + stack->p = p + 2; /* save return address */ + stack++; + p += getoffset(p); + continue; + } + case ICommit: { + assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); + stack--; + p += getoffset(p); + continue; + } + case IPartialCommit: { + assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); + (stack - 1)->s = s; + (stack - 1)->caplevel = captop; + p += getoffset(p); + continue; + } + case IBackCommit: { + assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); + s = (--stack)->s; + captop = stack->caplevel; + p += getoffset(p); + continue; + } + case IFailTwice: + assert(stack > getstackbase(L, ptop)); + stack--; + /* go through */ + case IFail: + fail: { /* pattern failed: try to backtrack */ + do { /* remove pending calls */ + assert(stack > getstackbase(L, ptop)); + s = (--stack)->s; + } while (s == NULL); + if (ndyncap > 0) /* is there matchtime captures? */ + ndyncap -= removedyncap(L, capture, stack->caplevel, captop); + captop = stack->caplevel; + p = stack->p; + continue; + } + case ICloseRunTime: { + CapState cs; + int rem, res, n; + int fr = lua_gettop(L) + 1; /* stack index of first result */ + cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; + n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ + captop -= n; /* remove nested captures */ + fr -= rem; /* 'rem' items were popped from Lua stack */ + res = resdyncaptures(L, fr, s - o, e - o); /* get result */ + if (res == -1) /* fail? */ + goto fail; + s = o + res; /* else update current position */ + n = lua_gettop(L) - fr + 1; /* number of new captures */ + ndyncap += n - rem; /* update number of dynamic captures */ + if (n > 0) { /* any new capture? */ + if ((captop += n + 2) >= capsize) { + capture = doublecap(L, capture, captop, ptop); + capsize = 2 * captop; + } + /* add new captures to 'capture' list */ + adddyncaptures(s, capture + captop - n - 2, n, fr); + } + p++; + continue; + } + case ICloseCapture: { + const char *s1 = s; + assert(captop > 0); + /* if possible, turn capture into a full capture */ + if (capture[captop - 1].siz == 0 && + s1 - capture[captop - 1].s < UCHAR_MAX) { + capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; + p++; + continue; + } + else { + capture[captop].siz = 1; /* mark entry as closed */ + capture[captop].s = s; + goto pushcapture; + } + } + case IOpenCapture: + capture[captop].siz = 0; /* mark entry as open */ + capture[captop].s = s; + goto pushcapture; + case IFullCapture: + capture[captop].siz = getoff(p) + 1; /* save capture size */ + capture[captop].s = s - getoff(p); + /* goto pushcapture; */ + pushcapture: { + capture[captop].idx = p->i.key; + capture[captop].kind = getkind(p); + if (++captop >= capsize) { + capture = doublecap(L, capture, captop, ptop); + capsize = 2 * captop; + } + p++; + continue; + } + default: assert(0); return NULL; + } + } +} + +/* }====================================================== */ + + diff --git a/scripting/lpeg/lpvm.h b/scripting/lpeg/lpvm.h new file mode 100644 index 00000000..757b9e13 --- /dev/null +++ b/scripting/lpeg/lpvm.h @@ -0,0 +1,58 @@ +/* +** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $ +*/ + +#if !defined(lpvm_h) +#define lpvm_h + +#include "lpcap.h" + + +/* Virtual Machine's instructions */ +typedef enum Opcode { + IAny, /* if no char, fail */ + IChar, /* if char != aux, fail */ + ISet, /* if char not in buff, fail */ + ITestAny, /* in no char, jump to 'offset' */ + ITestChar, /* if char != aux, jump to 'offset' */ + ITestSet, /* if char not in buff, jump to 'offset' */ + ISpan, /* read a span of chars in buff */ + IBehind, /* walk back 'aux' characters (fail if not possible) */ + IRet, /* return from a rule */ + IEnd, /* end of pattern */ + IChoice, /* stack a choice; next fail will jump to 'offset' */ + IJmp, /* jump to 'offset' */ + ICall, /* call rule at 'offset' */ + IOpenCall, /* call rule number 'key' (must be closed to a ICall) */ + ICommit, /* pop choice and jump to 'offset' */ + IPartialCommit, /* update top choice to current position and jump */ + IBackCommit, /* "fails" but jump to its own 'offset' */ + IFailTwice, /* pop one choice and then fail */ + IFail, /* go back to saved state on choice and jump to saved offset */ + IGiveup, /* internal use */ + IFullCapture, /* complete capture of last 'off' chars */ + IOpenCapture, /* start a capture */ + ICloseCapture, + ICloseRunTime +} Opcode; + + + +typedef union Instruction { + struct Inst { + byte code; + byte aux; + short key; + } i; + int offset; + byte buff[1]; +} Instruction; + + +void printpatt (Instruction *p, int n); +const char *match (lua_State *L, const char *o, const char *s, const char *e, + Instruction *op, Capture *capture, int ptop); + + +#endif +