Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Upgrade lpeg to version 0.11

  • Loading branch information...
commit 3912610ec9262abf920a3b49e21520bf105e18d6 1 parent de48b6b
@mkottman mkottman authored
View
2  CMakeLists.txt
@@ -9,7 +9,7 @@ cmake_minimum_required ( VERSION 2.8 )
include ( cmake/dist.cmake )
include ( lua )
-install_lua_module ( lpeg lpeg.c )
+install_lua_module ( lpeg lpvm.c lpcap.c lptree.c lpcode.c lpprint.c )
install_lua_module ( re re.lua )
install_doc ( lpeg.html re.html lpeg-128.gif )
install_data ( HISTORY )
View
7 HISTORY
@@ -1,5 +1,12 @@
HISTORY for LPeg 0.10
+* Changes from version 0.10 to 0.11
+ -------------------------------
+ + complete reimplementation of the code generator
+ + new syntax for table captures
+ + new functions in module 're'
+ + other small improvements
+
* Changes from version 0.9 to 0.10
-------------------------------
+ backtrack stack has configurable size
View
2  dist.info
@@ -1,7 +1,7 @@
--- This file is part of LuaDist project
name = "lpeg"
-version = "0.10.2"
+version = "0.11"
desc = "Parsing Expression Grammars For Lua"
author = "Roberto Ierusalimschy"
View
537 lpcap.c
@@ -0,0 +1,537 @@
+/*
+** $Id: lpcap.c,v 1.4 2013/03/21 20:25:12 roberto Exp $
+** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
+*/
+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "lpcap.h"
+#include "lptypes.h"
+
+
+#define captype(cap) ((cap)->kind)
+
+#define isclosecap(cap) (captype(cap) == Cclose)
+
+#define closeaddr(c) ((c)->s + (c)->siz - 1)
+
+#define isfullcap(cap) ((cap)->siz != 0)
+
+#define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
+
+#define pushluaval(cs) getfromktable(cs, (cs)->cap->idx)
+
+
+
+/*
+** Put at the cache for Lua values the value indexed by 'v' in ktable
+** of the running pattern (if it is not there yet); returns its index.
+*/
+static int updatecache (CapState *cs, int v) {
+ int idx = cs->ptop + 1; /* stack index of cache for Lua values */
+ if (v != cs->valuecached) { /* not there? */
+ getfromktable(cs, v); /* get value from 'ktable' */
+ lua_replace(cs->L, idx); /* put it at reserved stack position */
+ cs->valuecached = v; /* keep track of what is there */
+ }
+ return idx;
+}
+
+
+static int pushcapture (CapState *cs);
+
+
+/*
+** Goes back in a list of captures looking for an open capture
+** corresponding to a close
+*/
+static Capture *findopen (Capture *cap) {
+ int n = 0; /* number of closes waiting an open */
+ for (;;) {
+ cap--;
+ if (isclosecap(cap)) n++; /* one more open to skip */
+ else if (!isfullcap(cap))
+ if (n-- == 0) return cap;
+ }
+}
+
+
+/*
+** Go to the next capture
+*/
+static void nextcap (CapState *cs) {
+ Capture *cap = cs->cap;
+ if (!isfullcap(cap)) { /* not a single capture? */
+ int n = 0; /* number of opens waiting a close */
+ for (;;) { /* look for corresponding close */
+ cap++;
+ if (isclosecap(cap)) {
+ if (n-- == 0) break;
+ }
+ else if (!isfullcap(cap)) n++;
+ }
+ }
+ cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */
+}
+
+
+/*
+** Push on the Lua stack all values generated by nested captures inside
+** the current capture. Returns number of values pushed. 'addextra'
+** makes it push the entire match after all captured values. The
+** entire match is pushed also if there are no other nested values,
+** so the function never returns zero.
+*/
+static int pushnestedvalues (CapState *cs, int addextra) {
+ Capture *co = cs->cap;
+ if (isfullcap(cs->cap++)) { /* no nested captures? */
+ lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */
+ return 1; /* that is it */
+ }
+ else {
+ int n = 0;
+ while (!isclosecap(cs->cap)) /* repeat for all nested patterns */
+ n += pushcapture(cs);
+ if (addextra || n == 0) { /* need extra? */
+ lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */
+ n++;
+ }
+ cs->cap++; /* skip close entry */
+ return n;
+ }
+}
+
+
+/*
+** Push only the first value generated by nested captures
+*/
+static void pushonenestedvalue (CapState *cs) {
+ int n = pushnestedvalues(cs, 0);
+ if (n > 1)
+ lua_pop(cs->L, n - 1); /* pop extra values */
+}
+
+
+/*
+** Try to find a named group capture with the name given at the top of
+** the stack; goes backward from 'cap'.
+*/
+static Capture *findback (CapState *cs, Capture *cap) {
+ lua_State *L = cs->L;
+ while (cap-- > cs->ocap) { /* repeat until end of list */
+ if (isclosecap(cap))
+ cap = findopen(cap); /* skip nested captures */
+ else if (!isfullcap(cap))
+ continue; /* opening an enclosing capture: skip and get previous */
+ if (captype(cap) == Cgroup) {
+ getfromktable(cs, cap->idx); /* get group name */
+ if (lua_equal(L, -2, -1)) { /* right group? */
+ lua_pop(L, 2); /* remove reference name and group name */
+ return cap;
+ }
+ else lua_pop(L, 1); /* remove group name */
+ }
+ }
+ luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
+ return NULL; /* to avoid warnings */
+}
+
+
+/*
+** Back-reference capture. Return number of values pushed.
+*/
+static int backrefcap (CapState *cs) {
+ int n;
+ Capture *curr = cs->cap;
+ pushluaval(cs); /* reference name */
+ cs->cap = findback(cs, curr); /* find corresponding group */
+ n = pushnestedvalues(cs, 0); /* push group's values */
+ cs->cap = curr + 1;
+ return n;
+}
+
+
+/*
+** Table capture: creates a new table and populates it with nested
+** captures.
+*/
+static int tablecap (CapState *cs) {
+ lua_State *L = cs->L;
+ int n = 0;
+ lua_newtable(L);
+ if (isfullcap(cs->cap++))
+ return 1; /* table is empty */
+ while (!isclosecap(cs->cap)) {
+ if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
+ pushluaval(cs); /* push group name */
+ pushonenestedvalue(cs);
+ lua_settable(L, -3);
+ }
+ else { /* not a named group */
+ int i;
+ int k = pushcapture(cs);
+ for (i = k; i > 0; i--) /* store all values into table */
+ lua_rawseti(L, -(i + 1), n + i);
+ n += k;
+ }
+ }
+ cs->cap++; /* skip close entry */
+ return 1; /* number of values pushed (only the table) */
+}
+
+
+/*
+** Table-query capture
+*/
+static int querycap (CapState *cs) {
+ int idx = cs->cap->idx;
+ pushonenestedvalue(cs); /* get nested capture */
+ lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */
+ if (!lua_isnil(cs->L, -1))
+ return 1;
+ else { /* no value */
+ lua_pop(cs->L, 1); /* remove nil */
+ return 0;
+ }
+}
+
+
+/*
+** Fold capture
+*/
+static int foldcap (CapState *cs) {
+ int n;
+ lua_State *L = cs->L;
+ int idx = cs->cap->idx;
+ if (isfullcap(cs->cap++) || /* no nested captures? */
+ isclosecap(cs->cap) || /* no nested captures (large subject)? */
+ (n = pushcapture(cs)) == 0) /* nested captures with no values? */
+ return luaL_error(L, "no initial value for fold capture");
+ if (n > 1)
+ lua_pop(L, n - 1); /* leave only one result for accumulator */
+ while (!isclosecap(cs->cap)) {
+ lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */
+ lua_insert(L, -2); /* put it before accumulator */
+ n = pushcapture(cs); /* get next capture's values */
+ lua_call(L, n + 1, 1); /* call folding function */
+ }
+ cs->cap++; /* skip close entry */
+ return 1; /* only accumulator left on the stack */
+}
+
+
+/*
+** Function capture
+*/
+static int functioncap (CapState *cs) {
+ int n;
+ int top = lua_gettop(cs->L);
+ pushluaval(cs); /* push function */
+ n = pushnestedvalues(cs, 0); /* push nested captures */
+ lua_call(cs->L, n, LUA_MULTRET); /* call function */
+ return lua_gettop(cs->L) - top; /* return function's results */
+}
+
+
+/*
+** Select capture
+*/
+static int numcap (CapState *cs) {
+ int idx = cs->cap->idx; /* value to select */
+ if (idx == 0) { /* no values? */
+ nextcap(cs); /* skip entire capture */
+ return 0; /* no value produced */
+ }
+ else {
+ int n = pushnestedvalues(cs, 0);
+ if (n < idx) /* invalid index? */
+ return luaL_error(cs->L, "no capture '%d'", idx);
+ else {
+ lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */
+ lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */
+ lua_pop(cs->L, n - 1); /* remove other captures */
+ return 1;
+ }
+ }
+}
+
+
+/*
+** Return the stack index of the first runtime capture in the given
+** list of captures (or zero if no runtime captures)
+*/
+int finddyncap (Capture *cap, Capture *last) {
+ for (; cap < last; cap++) {
+ if (cap->kind == Cruntime)
+ return cap->idx; /* stack position of first capture */
+ }
+ return 0; /* no dynamic captures in this segment */
+}
+
+
+/*
+** Calls a runtime capture. Returns number of captures removed by
+** the call, including the initial Cgroup. (Captures to be added are
+** on the Lua stack.)
+*/
+int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
+ int n, id;
+ lua_State *L = cs->L;
+ int otop = lua_gettop(L);
+ Capture *open = findopen(close);
+ assert(captype(open) == Cgroup);
+ id = finddyncap(open, close); /* get first dynamic capture argument */
+ close->kind = Cclose; /* closes the group */
+ close->s = s;
+ cs->cap = open; cs->valuecached = 0; /* prepare capture state */
+ luaL_checkstack(L, 4, "too many runtime captures");
+ pushluaval(cs); /* push function to be called */
+ lua_pushvalue(L, SUBJIDX); /* push original subject */
+ lua_pushinteger(L, s - cs->s + 1); /* push current position */
+ n = pushnestedvalues(cs, 0); /* push nested captures */
+ lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */
+ if (id > 0) { /* are there old dynamic captures to be removed? */
+ int i;
+ for (i = id; i <= otop; i++)
+ lua_remove(L, id); /* remove old dynamic captures */
+ *rem = otop - id + 1; /* total number of dynamic captures removed */
+ }
+ else
+ *rem = 0; /* no dynamic captures removed */
+ return close - open; /* number of captures of all kinds removed */
+}
+
+
+/*
+** Auxiliary structure for substitution and string captures: keep
+** information about nested captures for future use, avoiding to push
+** string results into Lua
+*/
+typedef struct StrAux {
+ int isstring; /* whether capture is a string */
+ union {
+ Capture *cp; /* if not a string, respective capture */
+ struct { /* if it is a string... */
+ const char *s; /* ... starts here */
+ const char *e; /* ... ends here */
+ } s;
+ } u;
+} StrAux;
+
+#define MAXSTRCAPS 10
+
+/*
+** Collect values from current capture into array 'cps'. Current
+** capture must be Cstring (first call) or Csimple (recursive calls).
+** (In first call, fills %0 with whole match for Cstring.)
+** Returns number of elements in the array that were filled.
+*/
+static int getstrcaps (CapState *cs, StrAux *cps, int n) {
+ int k = n++;
+ cps[k].isstring = 1; /* get string value */
+ cps[k].u.s.s = cs->cap->s; /* starts here */
+ if (!isfullcap(cs->cap++)) { /* nested captures? */
+ while (!isclosecap(cs->cap)) { /* traverse them */
+ if (n >= MAXSTRCAPS) /* too many captures? */
+ nextcap(cs); /* skip extra captures (will not need them) */
+ else if (captype(cs->cap) == Csimple) /* string? */
+ n = getstrcaps(cs, cps, n); /* put info. into array */
+ else {
+ cps[n].isstring = 0; /* not a string */
+ cps[n].u.cp = cs->cap; /* keep original capture */
+ nextcap(cs);
+ n++;
+ }
+ }
+ cs->cap++; /* skip close */
+ }
+ cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */
+ return n;
+}
+
+
+/*
+** add next capture value (which should be a string) to buffer 'b'
+*/
+static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
+
+
+/*
+** String capture: add result to buffer 'b' (instead of pushing
+** it into the stack)
+*/
+static void stringcap (luaL_Buffer *b, CapState *cs) {
+ StrAux cps[MAXSTRCAPS];
+ int n;
+ size_t len, i;
+ const char *fmt; /* format string */
+ fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
+ n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */
+ for (i = 0; i < len; i++) { /* traverse them */
+ if (fmt[i] != '%') /* not an escape? */
+ luaL_addchar(b, fmt[i]); /* add it to buffer */
+ else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */
+ luaL_addchar(b, fmt[i]); /* add to buffer */
+ else {
+ int l = fmt[i] - '0'; /* capture index */
+ if (l > n)
+ luaL_error(cs->L, "invalid capture index (%d)", l);
+ else if (cps[l].isstring)
+ luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s);
+ else {
+ Capture *curr = cs->cap;
+ cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */
+ if (!addonestring(b, cs, "capture"))
+ luaL_error(cs->L, "no values in capture index %d", l);
+ cs->cap = curr; /* continue from where it stopped */
+ }
+ }
+ }
+}
+
+
+/*
+** Substitution capture: add result to buffer 'b'
+*/
+static void substcap (luaL_Buffer *b, CapState *cs) {
+ const char *curr = cs->cap->s;
+ if (isfullcap(cs->cap)) /* no nested captures? */
+ luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
+ else {
+ cs->cap++; /* skip open entry */
+ while (!isclosecap(cs->cap)) { /* traverse nested captures */
+ const char *next = cs->cap->s;
+ luaL_addlstring(b, curr, next - curr); /* add text up to capture */
+ if (addonestring(b, cs, "replacement"))
+ curr = closeaddr(cs->cap - 1); /* continue after match */
+ else /* no capture value */
+ curr = next; /* keep original text in final result */
+ }
+ luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */
+ }
+ cs->cap++; /* go to next capture */
+}
+
+
+/*
+** Evaluates a capture and adds its first value to buffer 'b'; returns
+** whether there was a value
+*/
+static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
+ switch (captype(cs->cap)) {
+ case Cstring:
+ stringcap(b, cs); /* add capture directly to buffer */
+ return 1;
+ case Csubst:
+ substcap(b, cs); /* add capture directly to buffer */
+ return 1;
+ default: {
+ lua_State *L = cs->L;
+ int n = pushcapture(cs);
+ if (n > 0) {
+ if (n > 1) lua_pop(L, n - 1); /* only one result */
+ if (!lua_isstring(L, -1))
+ luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));
+ luaL_addvalue(b);
+ }
+ return n;
+ }
+ }
+}
+
+
+/*
+** Push all values of the current capture into the stack; returns
+** number of values pushed
+*/
+static int pushcapture (CapState *cs) {
+ lua_State *L = cs->L;
+ luaL_checkstack(L, 4, "too many captures");
+ switch (captype(cs->cap)) {
+ case Cposition: {
+ lua_pushinteger(L, cs->cap->s - cs->s + 1);
+ cs->cap++;
+ return 1;
+ }
+ case Cconst: {
+ pushluaval(cs);
+ cs->cap++;
+ return 1;
+ }
+ case Carg: {
+ int arg = (cs->cap++)->idx;
+ if (arg + FIXEDARGS > cs->ptop)
+ return luaL_error(L, "reference to absent argument #%d", arg);
+ lua_pushvalue(L, arg + FIXEDARGS);
+ return 1;
+ }
+ case Csimple: {
+ int k = pushnestedvalues(cs, 1);
+ lua_insert(L, -k); /* make whole match be first result */
+ return k;
+ }
+ case Cruntime: {
+ lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */
+ return 1;
+ }
+ case Cstring: {
+ luaL_Buffer b;
+ luaL_buffinit(L, &b);
+ stringcap(&b, cs);
+ luaL_pushresult(&b);
+ return 1;
+ }
+ case Csubst: {
+ luaL_Buffer b;
+ luaL_buffinit(L, &b);
+ substcap(&b, cs);
+ luaL_pushresult(&b);
+ return 1;
+ }
+ case Cgroup: {
+ if (cs->cap->idx == 0) /* anonymous group? */
+ return pushnestedvalues(cs, 0); /* add all nested values */
+ else { /* named group: add no values */
+ nextcap(cs); /* skip capture */
+ return 0;
+ }
+ }
+ case Cbackref: return backrefcap(cs);
+ case Ctable: return tablecap(cs);
+ case Cfunction: return functioncap(cs);
+ case Cnum: return numcap(cs);
+ case Cquery: return querycap(cs);
+ case Cfold: return foldcap(cs);
+ default: assert(0); return 0;
+ }
+}
+
+
+/*
+** Prepare a CapState structure and traverse the entire list of
+** captures in the stack pushing its results. 's' is the subject
+** string, 'r' is the final position of the match, and 'ptop'
+** the index in the stack where some useful values were pushed.
+** Returns the number of results pushed. (If the list produces no
+** results, push the final position of the match.)
+*/
+int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
+ Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
+ int n = 0;
+ if (!isclosecap(capture)) { /* is there any capture? */
+ CapState cs;
+ cs.ocap = cs.cap = capture; cs.L = L;
+ cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
+ do { /* collect their values */
+ n += pushcapture(&cs);
+ } while (!isclosecap(cs.cap));
+ }
+ if (n == 0) { /* no capture values? */
+ lua_pushinteger(L, r - s + 1); /* return only end position */
+ n = 1;
+ }
+ return n;
+}
+
+
View
43 lpcap.h
@@ -0,0 +1,43 @@
+/*
+** $Id: lpcap.h,v 1.1 2013/03/21 20:25:12 roberto Exp $
+*/
+
+#if !defined(lpcap_h)
+#define lpcap_h
+
+
+#include "lptypes.h"
+
+
+/* kinds of captures */
+typedef enum CapKind {
+ Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction,
+ Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup
+} CapKind;
+
+
+typedef struct Capture {
+ const char *s; /* subject position */
+ short idx; /* extra info about capture (group name, arg index, etc.) */
+ byte kind; /* kind of capture */
+ byte siz; /* size of full capture + 1 (0 = not a full capture) */
+} Capture;
+
+
+typedef struct CapState {
+ Capture *cap; /* current capture */
+ Capture *ocap; /* (original) capture list */
+ lua_State *L;
+ int ptop; /* index of last argument to 'match' */
+ const char *s; /* original string */
+ int valuecached; /* value stored in cache slot */
+} CapState;
+
+
+int runtimecap (CapState *cs, Capture *close, const char *s, int *rem);
+int getcaptures (lua_State *L, const char *s, const char *r, int ptop);
+int finddyncap (Capture *cap, Capture *last);
+
+#endif
+
+
View
901 lpcode.c
@@ -0,0 +1,901 @@
+/*
+** $Id: lpcode.c,v 1.10 2013/03/27 15:49:17 roberto Exp $
+** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
+*/
+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "lptypes.h"
+#include "lpcode.h"
+
+
+/* signals a "no-instruction */
+#define NOINST -1
+
+
+static const Charset fullset =
+ {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
+
+/*
+** {======================================================
+** Analysis and some optimizations
+** =======================================================
+*/
+
+/*
+** Check whether a charset is empty (IFail), singleton (IChar),
+** full (IAny), or none of those (ISet).
+*/
+static int charsettype (const Charset cs, int *c) {
+ int count = 0;
+ int i;
+ int candidate = -1; /* candidate position for a char */
+ for (i = 0; i < CHARSETSIZE; i++) {
+ int b = cs[i];
+ if (b == 0) {
+ if (count > 1) return ISet; /* else set is still empty */
+ }
+ else if (b == 0xFF) {
+ if (count < (i * BITSPERCHAR))
+ return ISet;
+ else count += BITSPERCHAR; /* set is still full */
+ }
+ else if ((b & (b - 1)) == 0) { /* byte has only one bit? */
+ if (count > 0)
+ return ISet; /* set is neither full nor empty */
+ else { /* set has only one char till now; track it */
+ count++;
+ candidate = i;
+ }
+ }
+ else return ISet; /* byte is neither empty, full, nor singleton */
+ }
+ switch (count) {
+ case 0: return IFail; /* empty set */
+ case 1: { /* singleton; find character bit inside byte */
+ int b = cs[candidate];
+ *c = candidate * BITSPERCHAR;
+ if ((b & 0xF0) != 0) { *c += 4; b >>= 4; }
+ if ((b & 0x0C) != 0) { *c += 2; b >>= 2; }
+ if ((b & 0x02) != 0) { *c += 1; }
+ return IChar;
+ }
+ case (CHARSETSIZE * BITSPERCHAR): return IAny; /* full set */
+ default: assert(0); return 0; /* should have returned by now */
+ }
+}
+
+/*
+** A few basic operations on Charsets
+*/
+static void cs_complement (Charset cs) {
+ loopset(i, cs[i] = ~cs[i]);
+}
+
+
+static int cs_equal (const Charset cs1, const Charset cs2) {
+ loopset(i, if (cs1[i] != cs2[i]) return 0);
+ return 1;
+}
+
+
+/*
+** computes whether sets st1 and st2 are disjoint
+*/
+static int cs_disjoint (const Charset st1, const Charset st2) {
+ loopset(i, if ((st1[i] & st2[i]) != 0) return 0;)
+ return 1;
+}
+
+
+/*
+** Convert a 'char' pattern (TSet, TChar, TAny) to a charset
+*/
+int tocharset (TTree *tree, byte *cs) {
+ switch (tree->tag) {
+ case TSet: { /* copy set */
+ loopset(i, cs[i] = treebuffer(tree)[i]);
+ return 1;
+ }
+ case TChar: { /* only one char */
+ loopset(i, cs[i] = 0); /* erase all chars */
+ setchar(cs, tree->u.n); /* add that one */
+ return 1;
+ }
+ case TAny: {
+ loopset(i, cs[i] = 0xFF); /* add all to the set */
+ return 1;
+ }
+ default: return 0;
+ }
+}
+
+
+/*
+** checks whether a pattern has captures
+*/
+int hascaptures (TTree *tree) {
+ tailcall:
+ switch (tree->tag) {
+ case TCapture: case TRunTime:
+ return 1;
+ default: {
+ switch (numsiblings[tree->tag]) {
+ case 0: return 0;
+ case 1: /* return hascaptures(sib1(tree)); */
+ tree = sib1(tree); goto tailcall;
+ case 2:
+ if (hascaptures(sib1(tree))) return 1;
+ /* else return hascaptures(sib2(tree)); */
+ tree = sib2(tree); goto tailcall;
+ default: assert(0); return 0;
+ }
+ }
+ }
+}
+
+
+/*
+** Checks how a pattern behaves regarding the empty string,
+** in one of two different ways:
+** A pattern is *nullable* if it can match without consuming any character;
+** A pattern is *nofail* if it never fails for any string
+** (including the empty string).
+** The difference is only for predicates; for patterns without
+** predicates, the two properties are equivalent.
+** (With predicates, &'a' is nullable but not nofail. Of course,
+** nofail => nullable.)
+** These functions are all convervative in the following way:
+** p is nullable => nullable(p)
+** nofail(p) => p cannot fail
+** (The function assumes that TOpenCall and TRunTime are not nullable:
+** TOpenCall must be checked again when the grammar is fixed;
+** TRunTime is an arbitrary choice.)
+*/
+int checkaux (TTree *tree, int pred) {
+ tailcall:
+ switch (tree->tag) {
+ case TChar: case TSet: case TAny:
+ case TFalse: case TOpenCall: case TRunTime:
+ return 0; /* not nullable */
+ case TRep: case TTrue:
+ return 1; /* no fail */
+ case TNot: case TBehind:
+ /* can match empty, but may fail */
+ if (pred == PEnofail) return 0;
+ else return 1; /* PEnullable */
+ case TAnd:
+ /* can match empty; fail iff body does */
+ if (pred == PEnullable) return 1;
+ /* else return checkaux(sib1(tree), pred); */
+ tree = sib1(tree); goto tailcall;
+ case TSeq:
+ if (!checkaux(sib1(tree), pred)) return 0;
+ /* else return checkaux(sib2(tree), pred); */
+ tree = sib2(tree); goto tailcall;
+ case TChoice:
+ if (checkaux(sib2(tree), pred)) return 1;
+ /* else return checkaux(sib1(tree), pred); */
+ tree = sib1(tree); goto tailcall;
+ case TCapture: case TGrammar: case TRule:
+ /* return checkaux(sib1(tree), pred); */
+ tree = sib1(tree); goto tailcall;
+ case TCall: /* return checkaux(sib2(tree), pred); */
+ tree = sib2(tree); goto tailcall;
+ default: assert(0); return 0;
+ };
+}
+
+
+/*
+** number of characters to match a pattern (or -1 if variable)
+** ('count' avoids infinite loops for grammars)
+*/
+int fixedlenx (TTree *tree, int count, int len) {
+ tailcall:
+ switch (tree->tag) {
+ case TChar: case TSet: case TAny:
+ return len + 1;
+ case TFalse: case TTrue: case TNot: case TAnd: case TBehind:
+ return len;
+ case TRep: case TRunTime: case TOpenCall:
+ return -1;
+ case TCapture: case TRule: case TGrammar:
+ /* return fixedlenx(sib1(tree), count); */
+ tree = sib1(tree); goto tailcall;
+ case TCall:
+ if (count++ >= MAXRULES)
+ return -1; /* may be a loop */
+ /* else return fixedlenx(sib2(tree), count); */
+ tree = sib2(tree); goto tailcall;
+ case TSeq: {
+ len = fixedlenx(sib1(tree), count, len);
+ if (len < 0) return -1;
+ /* else return fixedlenx(sib2(tree), count, len); */
+ tree = sib2(tree); goto tailcall;
+ }
+ case TChoice: {
+ int n1, n2;
+ n1 = fixedlenx(sib1(tree), count, len);
+ if (n1 < 0) return -1;
+ n2 = fixedlenx(sib2(tree), count, len);
+ if (n1 == n2) return n1;
+ else return -1;
+ }
+ default: assert(0); return 0;
+ };
+}
+
+
+/*
+** Computes the 'first set' of a pattern.
+** The result is a conservative aproximation:
+** match p ax -> x' for some x ==> a in first(p).
+** match p '' -> '' ==> returns 1.
+** The set 'follow' is the first set of what follows the
+** pattern (full set if nothing follows it)
+*/
+static int getfirst (TTree *tree, const Charset follow, Charset firstset) {
+ tailcall:
+ switch (tree->tag) {
+ case TChar: case TSet: case TAny: {
+ tocharset(tree, firstset);
+ return 0;
+ }
+ case TTrue: {
+ loopset(i, firstset[i] = follow[i]);
+ return 1;
+ }
+ case TFalse: {
+ loopset(i, firstset[i] = 0);
+ return 0;
+ }
+ case TChoice: {
+ Charset csaux;
+ int e1 = getfirst(sib1(tree), follow, firstset);
+ int e2 = getfirst(sib2(tree), follow, csaux);
+ loopset(i, firstset[i] |= csaux[i]);
+ return e1 | e2;
+ }
+ case TSeq: {
+ if (!nullable(sib1(tree))) {
+ /* return getfirst(sib1(tree), follow, firstset); */
+ tree = sib1(tree); goto tailcall;
+ }
+ else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */
+ Charset csaux;
+ int e2 = getfirst(sib2(tree), follow, csaux);
+ int e1 = getfirst(sib1(tree), csaux, firstset);
+ return e1 & e2;
+ }
+ }
+ case TRep: {
+ getfirst(sib1(tree), follow, firstset);
+ loopset(i, firstset[i] |= follow[i]);
+ return 1; /* accept the empty string */
+ }
+ case TCapture: case TGrammar: case TRule: {
+ /* return getfirst(sib1(tree), follow, firstset); */
+ tree = sib1(tree); goto tailcall;
+ }
+ case TRunTime: { /* function invalidates any follow info. */
+ /* return getfirst(sib1(tree), fullset, firstset); */
+ tree = sib1(tree); follow = fullset; goto tailcall;
+ }
+ case TCall: {
+ /* return getfirst(sib2(tree), follow, firstset); */
+ tree = sib2(tree); goto tailcall;
+ }
+ case TAnd: {
+ int e = getfirst(sib1(tree), follow, firstset);
+ loopset(i, firstset[i] &= follow[i]);
+ return e;
+ }
+ case TNot: {
+ if (tocharset(sib1(tree), firstset)) {
+ cs_complement(firstset);
+ return 1;
+ }
+ /* else go through */
+ }
+ case TBehind: { /* instruction gives no new information */
+ loopset(i, firstset[i] = follow[i]); /* uses follow */
+ return 1; /* can accept the empty string */
+ }
+ default: assert(0); return 0;
+ }
+}
+
+
+/*
+** If it returns true, then pattern can fail only depending on the next
+** character of the subject
+*/
+static int headfail (TTree *tree) {
+ tailcall:
+ switch (tree->tag) {
+ case TChar: case TSet: case TAny: case TFalse:
+ return 1;
+ case TTrue: case TRep: case TRunTime: case TNot:
+ case TBehind:
+ return 0;
+ case TCapture: case TGrammar: case TRule: case TAnd:
+ tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */
+ case TCall:
+ tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */
+ case TSeq:
+ if (!nofail(sib2(tree))) return 0;
+ /* else return headfail(sib1(tree)); */
+ tree = sib1(tree); goto tailcall;
+ case TChoice:
+ if (!headfail(sib1(tree))) return 0;
+ /* else return headfail(sib2(tree)); */
+ tree = sib2(tree); goto tailcall;
+ default: assert(0); return 0;
+ }
+}
+
+
+/*
+** Check whether the code generation for the given tree can benefit
+** from a follow set (to avoid computing the follow set when it is
+** not needed)
+*/
+static int needfollow (TTree *tree) {
+ tailcall:
+ switch (tree->tag) {
+ case TChar: case TSet: case TAny:
+ case TFalse: case TTrue: case TAnd: case TNot:
+ case TRunTime: case TGrammar: case TCall: case TBehind:
+ return 0;
+ case TChoice: case TRep:
+ return 1;
+ case TCapture:
+ tree = sib1(tree); goto tailcall;
+ case TSeq:
+ tree = sib2(tree); goto tailcall;
+ default: assert(0); return 0;
+ }
+}
+
+/* }====================================================== */
+
+
+
+/*
+** {======================================================
+** Code generation
+** =======================================================
+*/
+
+/*
+** state for the compiler
+*/
+typedef struct CompileState {
+ Pattern *p; /* pattern being compiled */
+ int ncode; /* next position in p->code to be filled */
+ lua_State *L;
+} CompileState;
+
+
+/*
+** code generation is recursive; 'opt' indicates that the code is
+** being generated under a 'IChoice' operator jumping to its end.
+** 'tt' points to a previous test protecting this code. 'fl' is
+** the follow set of the pattern.
+*/
+static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
+ const Charset fl);
+
+
+void reallocprog (lua_State *L, Pattern *p, int nsize) {
+ void *ud;
+ lua_Alloc f = lua_getallocf(L, &ud);
+ void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction),
+ nsize * sizeof(Instruction));
+ if (newblock == NULL && nsize > 0)
+ luaL_error(L, "not enough memory");
+ p->code = (Instruction *)newblock;
+ p->codesize = nsize;
+}
+
+
+static int nextinstruction (CompileState *compst) {
+ int size = compst->p->codesize;
+ if (compst->ncode >= size) {
+ if (size < (MAXPATTSIZE / 2))
+ reallocprog(compst->L, compst->p, size * 2);
+ else if (size < MAXPATTSIZE)
+ reallocprog(compst->L, compst->p, MAXPATTSIZE);
+ else luaL_error(compst->L, "pattern too large");
+ }
+ return compst->ncode++;
+}
+
+
+#define getinstr(cs,i) ((cs)->p->code[i])
+
+
+static int addinstruction (CompileState *compst, Opcode op, int aux) {
+ int i = nextinstruction(compst);
+ getinstr(compst, i).i.code = op;
+ getinstr(compst, i).i.aux = aux;
+ return i;
+}
+
+
+static void setoffset (CompileState *compst, int instruction, int offset) {
+ getinstr(compst, instruction).i.offset = offset;
+}
+
+
+/*
+** Add a capture instruction:
+** 'op' is the capture instruction; 'cap' the capture kind;
+** 'key' the key into ktable; 'aux' is optional offset
+**
+*/
+static int addinstcap (CompileState *compst, int op, int cap, int key,
+ int aux) {
+ int i = addinstruction(compst, op, joinkindoff(cap, aux));
+ setoffset(compst, i, key);
+ return i;
+}
+
+
+#define gethere(compst) ((compst)->ncode)
+
+#define target(code,i) ((i) + code[i].i.offset)
+
+
+static void jumptothere (CompileState *compst, int instruction, int target) {
+ if (instruction >= 0)
+ setoffset(compst, instruction, target - instruction);
+}
+
+
+static void jumptohere (CompileState *compst, int instruction) {
+ jumptothere(compst, instruction, gethere(compst));
+}
+
+
+/*
+** Code an IChar instruction, or IAny if there is an equivalent
+** test dominating it
+*/
+static void codechar (CompileState *compst, int c, int tt) {
+ if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar &&
+ getinstr(compst, tt).i.aux == c)
+ addinstruction(compst, IAny, 0);
+ else
+ addinstruction(compst, IChar, c);
+}
+
+
+/*
+** Code an ISet instruction
+*/
+static int coderealcharset (CompileState *compst, byte *cs) {
+ int p = addinstruction(compst, ISet, 0);
+ int i;
+ for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)
+ nextinstruction(compst); /* space for buffer */
+ /* fill buffer with charset */
+ loopset(j, getinstr(compst, p + 1).buff[j] = cs[j]);
+ return p;
+}
+
+
+/*
+** code a char set, optimizing unit sets for IChar, "complete"
+** sets for IAny, and empty sets for IFail; also use an IAny
+** when instruction is dominated by an equivalent test.
+*/
+static void codecharset (CompileState *compst, byte *cs, int tt) {
+ int c = 0; /* (=) to avoid warnings */
+ int op = charsettype(cs, &c);
+ switch (op) {
+ case IChar: codechar(compst, c, tt); break;
+ case ISet: { /* non-trivial set? */
+ if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet &&
+ cs_equal(cs, getinstr(compst, tt + 1).buff))
+ addinstruction(compst, IAny, 0);
+ else
+ coderealcharset(compst, cs);
+ break;
+ }
+ default: addinstruction(compst, op, c); break;
+ }
+}
+
+
+/*
+** code a test set, optimizing unit sets for ITestChar, "complete"
+** sets for ITestAny, and empty sets for IJmp (always fails).
+** 'e' is true iff test should accept the empty string. (Test
+** instructions in the current VM never accept the empty string.)
+*/
+static int codetestset (CompileState *compst, byte *cs, int e) {
+ if (e) return NOINST; /* no test */
+ else {
+ Instruction *inst;
+ int pos = gethere(compst);
+ codecharset(compst, cs, NOINST);
+ inst = &getinstr(compst, pos);
+ switch (inst->i.code) {
+ case IFail: inst->i.code = IJmp; break; /* always jump */
+ case IAny: inst->i.code = ITestAny; break;
+ case IChar: inst->i.code = ITestChar; break;
+ case ISet: inst->i.code = ITestSet; break;
+ default: assert(0);
+ }
+ return pos;
+ }
+}
+
+
+/*
+** Find the final destination of a sequence of jumps
+*/
+static int finaltarget (Instruction *code, int i) {
+ while (code[i].i.code == IJmp)
+ i = target(code, i);
+ return i;
+}
+
+
+/*
+** final label (after traversing any jumps)
+*/
+static int finallabel (Instruction *code, int i) {
+ return finaltarget(code, target(code, i));
+}
+
+
+/*
+** <behind(p)> == behind n; <p> (where n = fixedlen(p))
+*/
+static void codebehind (CompileState *compst, TTree *tree) {
+ if (tree->u.n > 0)
+ addinstruction(compst, IBehind, tree->u.n);
+ codegen(compst, sib1(tree), 0, NOINST, fullset);
+}
+
+
+/*
+** Choice; optimizations:
+** - when p1 is headfail
+** - when first(p1) and first(p2) are disjoint; than
+** a character not in first(p1) cannot go to p1, and a character
+** in first(p1) cannot go to p2 (at it is not in first(p2)).
+** (The optimization is not valid if p1 accepts the empty string,
+** as then there is no character at all...)
+** - when p2 is empty and opt is true; a IPartialCommit can resuse
+** the Choice already active in the stack.
+*/
+static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
+ const Charset fl) {
+ int emptyp2 = (p2->tag == TTrue);
+ Charset st1, st2;
+ int e1 = getfirst(p1, fullset, st1);
+ if (headfail(p1) ||
+ (!e1 && (getfirst(p2, fl, st2), cs_disjoint(st1, st2)))) {
+ /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */
+ int test = codetestset(compst, st1, 0);
+ int jmp = NOINST;
+ codegen(compst, p1, 0, test, fl);
+ if (!emptyp2)
+ jmp = addinstruction(compst, IJmp, 0);
+ jumptohere(compst, test);
+ codegen(compst, p2, opt, NOINST, fl);
+ jumptohere(compst, jmp);
+ }
+ else if (opt && emptyp2) {
+ /* p1? == IPartialCommit; p1 */
+ jumptohere(compst, addinstruction(compst, IPartialCommit, 0));
+ codegen(compst, p1, 1, NOINST, fullset);
+ }
+ else {
+ /* <p1 / p2> ==
+ test(fail(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */
+ int pcommit;
+ int test = codetestset(compst, st1, e1);
+ int pchoice = addinstruction(compst, IChoice, 0);
+ codegen(compst, p1, emptyp2, test, fullset);
+ pcommit = addinstruction(compst, ICommit, 0);
+ jumptohere(compst, pchoice);
+ jumptohere(compst, test);
+ codegen(compst, p2, opt, NOINST, fl);
+ jumptohere(compst, pcommit);
+ }
+}
+
+
+/*
+** And predicate
+** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
+** (valid only when 'p' has no captures)
+*/
+static void codeand (CompileState *compst, TTree *tree, int tt) {
+ int n = fixedlen(tree);
+ if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) {
+ codegen(compst, tree, 0, tt, fullset);
+ if (n > 0)
+ addinstruction(compst, IBehind, n);
+ }
+ else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */
+ int pcommit;
+ int pchoice = addinstruction(compst, IChoice, 0);
+ codegen(compst, tree, 0, tt, fullset);
+ pcommit = addinstruction(compst, IBackCommit, 0);
+ jumptohere(compst, pchoice);
+ addinstruction(compst, IFail, 0);
+ jumptohere(compst, pcommit);
+ }
+}
+
+
+/*
+** Captures: if pattern has fixed (and not too big) length, use
+** a single IFullCapture instruction after the match; otherwise,
+** enclose the pattern with OpenCapture - CloseCapture.
+*/
+static void codecapture (CompileState *compst, TTree *tree, int tt,
+ const Charset fl) {
+ int len = fixedlen(sib1(tree));
+ if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) {
+ codegen(compst, sib1(tree), 0, tt, fl);
+ addinstcap(compst, IFullCapture, tree->cap, tree->key, len);
+ }
+ else {
+ addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0);
+ codegen(compst, sib1(tree), 0, tt, fl);
+ addinstcap(compst, ICloseCapture, Cclose, 0, 0);
+ }
+}
+
+
+static void coderuntime (CompileState *compst, TTree *tree, int tt) {
+ addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0);
+ codegen(compst, sib1(tree), 0, tt, fullset);
+ addinstcap(compst, ICloseRunTime, Cclose, 0, 0);
+}
+
+
+/*
+** Repetion; optimizations:
+** When pattern is a charset, can use special instruction ISpan.
+** When pattern is head fail, or if it starts with characters that
+** are disjoint from what follows the repetions, a simple test
+** is enough (a fail inside the repetition would backtrack to fail
+** again in the following pattern, so there is no need for a choice).
+** When 'opt' is true, the repetion can reuse the Choice already
+** active in the stack.
+*/
+static void coderep (CompileState *compst, TTree *tree, int opt,
+ const Charset fl) {
+ Charset st;
+ if (tocharset(tree, st)) {
+ int op = coderealcharset(compst, st);
+ getinstr(compst, op).i.code = ISpan;
+ }
+ else {
+ int e1 = getfirst(tree, fullset, st);
+ if (headfail(tree) || (!e1 && cs_disjoint(st, fl))) {
+ /* L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: */
+ int jmp;
+ int test = codetestset(compst, st, 0);
+ codegen(compst, tree, opt, test, fullset);
+ jmp = addinstruction(compst, IJmp, 0);
+ jumptohere(compst, test);
+ jumptothere(compst, jmp, test);
+ }
+ else {
+ /* test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: */
+ /* or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; */
+ int commit, l2;
+ int test = codetestset(compst, st, e1);
+ int pchoice = NOINST;
+ if (opt)
+ jumptohere(compst, addinstruction(compst, IPartialCommit, 0));
+ else
+ pchoice = addinstruction(compst, IChoice, 0);
+ l2 = gethere(compst);
+ codegen(compst, tree, 0, NOINST, fullset);
+ commit = addinstruction(compst, IPartialCommit, 0);
+ jumptothere(compst, commit, l2);
+ jumptohere(compst, pchoice);
+ jumptohere(compst, test);
+ }
+ }
+}
+
+
+/*
+** Not predicate; optimizations:
+** In any case, if first test fails, 'not' succeeds, so it can jump to
+** the end. If pattern is headfail, that is all (it cannot fail
+** in other parts); this case includes 'not' of simple sets. Otherwise,
+** use the default code (a choice plus a failtwice).
+*/
+static void codenot (CompileState *compst, TTree *tree) {
+ Charset st;
+ int e = getfirst(tree, fullset, st);
+ int test = codetestset(compst, st, e);
+ if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */
+ addinstruction(compst, IFail, 0);
+ else {
+ /* test(fail(p))-> L1; choice L1; <p>; failtwice; L1: */
+ int pchoice = addinstruction(compst, IChoice, 0);
+ codegen(compst, tree, 0, NOINST, fullset);
+ addinstruction(compst, IFailTwice, 0);
+ jumptohere(compst, pchoice);
+ }
+ jumptohere(compst, test);
+}
+
+
+/*
+** change open calls to calls, using list 'positions' to find
+** correct offsets; also optimize tail calls
+*/
+static void correctcalls (CompileState *compst, int *positions,
+ int from, int to) {
+ int i;
+ Instruction *code = compst->p->code;
+ for (i = from; i < to; i++) {
+ if (code[i].i.code == IOpenCall) {
+ int n = code[i].i.offset; /* rule number */
+ int rule = positions[n]; /* rule position */
+ assert(rule == from || code[rule - 1].i.code == IRet);
+ if (code[finaltarget(code, i + 1)].i.code == IRet) /* call; ret ? */
+ code[i].i.code = IJmp; /* tail call */
+ else
+ code[i].i.code = ICall;
+ jumptothere(compst, i, rule); /* call jumps to respective rule */
+ }
+ }
+}
+
+
+/*
+** Code for a grammar:
+** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
+*/
+static void codegrammar (CompileState *compst, TTree *grammar) {
+ int positions[MAXRULES];
+ int rulenumber = 0;
+ TTree *rule;
+ int firstcall = addinstruction(compst, ICall, 0); /* call initial rule */
+ int jumptoend = addinstruction(compst, IJmp, 0); /* jump to the end */
+ jumptohere(compst, firstcall); /* here starts the initial rule */
+ for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
+ positions[rulenumber++] = gethere(compst); /* save rule position */
+ codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */
+ addinstruction(compst, IRet, 0);
+ }
+ assert(rule->tag == TTrue);
+ jumptohere(compst, jumptoend);
+ correctcalls(compst, positions, firstcall + 2, gethere(compst));
+}
+
+
+static void codecall (CompileState *compst, TTree *call) {
+ int c = addinstruction(compst, IOpenCall, 0); /* to be corrected later */
+ assert(sib2(call)->tag == TRule);
+ setoffset(compst, c, sib2(call)->cap); /* offset = rule number */
+}
+
+
+static void codeseq (CompileState *compst, TTree *p1, TTree *p2,
+ int opt, int tt, const Charset fl) {
+ if (needfollow(p1)) {
+ Charset fl1;
+ getfirst(p2, fl, fl1); /* p1 follow is p2 first */
+ codegen(compst, p1, 0, tt, fl1);
+ }
+ else /* use 'fullset' as follow */
+ codegen(compst, p1, 0, tt, fullset);
+ if (fixedlen(p1) != 0) /* can p1 consume anything? */
+ tt = NOINST; /* invalidate test */
+ codegen(compst, p2, opt, tt, fl);
+}
+
+
+/*
+** Main code-generation function: dispatch to auxiliar functions
+** according to kind of tree
+*/
+static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
+ const Charset fl) {
+ switch (tree->tag) {
+ case TChar: codechar(compst, tree->u.n, tt); break;
+ case TAny: addinstruction(compst, IAny, 0); break;
+ case TSet: codecharset(compst, treebuffer(tree), tt); break;
+ case TTrue: break;
+ case TFalse: addinstruction(compst, IFail, 0); break;
+ case TSeq: codeseq(compst, sib1(tree), sib2(tree), opt, tt, fl); break;
+ case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break;
+ case TRep: coderep(compst, sib1(tree), opt, fl); break;
+ case TBehind: codebehind(compst, tree); break;
+ case TNot: codenot(compst, sib1(tree)); break;
+ case TAnd: codeand(compst, sib1(tree), tt); break;
+ case TCapture: codecapture(compst, tree, tt, fl); break;
+ case TRunTime: coderuntime(compst, tree, tt); break;
+ case TGrammar: codegrammar(compst, tree); break;
+ case TCall: codecall(compst, tree); break;
+ default: assert(0);
+ }
+}
+
+
+/*
+** Optimize jumps and other jump-like instructions.
+** * Update labels of instructions with labels to their final
+** destinations (e.g., choice L1; ... L1: jmp L2: becomes
+** choice L2)
+** * Jumps to other instructions that do jumps become those
+** instructions (e.g., jump to return becomes a return; jump
+** to commit becomes a commit)
+*/
+static void peephole (CompileState *compst) {
+ Instruction *code = compst->p->code;
+ int i;
+ for (i = 0; i < compst->ncode; i++) {
+ switch (code[i].i.code) {
+ case IChoice: case ICall: case ICommit: case IPartialCommit:
+ case IBackCommit: case ITestChar: case ITestSet:
+ case ITestAny: { /* instructions with labels */
+ jumptothere(compst, i, finallabel(code, i)); /* optimize label */
+ break;
+ }
+ case IJmp: {
+ int ft = finaltarget(code, i);
+ switch (code[ft].i.code) { /* jumping to what? */
+ case IRet: case IFail: case IFailTwice:
+ case IEnd: { /* instructions with unconditional implicit jumps */
+ code[i] = code[ft]; /* jump becomes that instruction */
+ break;
+ }
+ case ICommit: case IPartialCommit:
+ case IBackCommit: { /* inst. with unconditional explicit jumps */
+ int fft = finallabel(code, ft);
+ code[i] = code[ft]; /* jump becomes that instruction... */
+ jumptothere(compst, i, fft); /* but must correct its offset */
+ i--; /* reoptimize its label */
+ break;
+ }
+ default: {
+ jumptothere(compst, i, ft); /* optimize label */
+ break;
+ }
+ }
+ break;
+ }
+ default: break;
+ }
+ }
+}
+
+
+/*
+** Compile a pattern
+*/
+Instruction *compile (lua_State *L, Pattern *p) {
+ CompileState compst;
+ compst.p = p; compst.ncode = 0; compst.L = L;
+ reallocprog(L, p, 2); /* minimum initial size */
+ codegen(&compst, p->tree, 0, NOINST, fullset);
+ addinstruction(&compst, IEnd, 0);
+ reallocprog(L, p, compst.ncode); /* set final size */
+ peephole(&compst);
+ return p->code;
+}
+
+
+/* }====================================================== */
+
View
33 lpcode.h
@@ -0,0 +1,33 @@
+/*
+** $Id: lpcode.h,v 1.2 2013/03/27 15:48:56 roberto Exp $
+*/
+
+#if !defined(lpcode_h)
+#define lpcode_h
+
+#include "lua.h"
+
+#include "lptypes.h"
+#include "lptree.h"
+#include "lpvm.h"
+
+int tocharset (TTree *tree, byte *cs);
+int checkaux (TTree *tree, int pred);
+int fixedlenx (TTree *tree, int count, int len);
+int hascaptures (TTree *tree);
+int lp_gc (lua_State *L);
+Instruction *compile (lua_State *L, Pattern *p);
+void reallocprog (lua_State *L, Pattern *p, int nsize);
+
+
+#define PEnullable 0
+#define PEnofail 2
+
+#define nofail(t) checkaux(t, PEnofail)
+#define nullable(t) checkaux(t, PEnullable)
+
+#define fixedlen(t) fixedlenx(t, 0, 0)
+
+
+
+#endif
View
2,405 lpeg.c
@@ -1,2405 +0,0 @@
-/*
-** $Id: lpeg.c,v 1.114 2011/02/16 15:02:20 roberto Exp $
-** LPeg - PEG pattern matching for Lua
-** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
-** written by Roberto Ierusalimschy
-*/
-
-
-#include <assert.h>
-#include <ctype.h>
-#include <limits.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "lua.h"
-#include "lauxlib.h"
-
-#include "lpeg.h"
-
-
-#define VERSION "0.10"
-#define PATTERN_T "lpeg-pattern"
-#define MAXSTACKIDX "lpeg-maxstack"
-
-
-/*
-** compatibility with Lua 5.2
-*/
-#if (LUA_VERSION_NUM == 502)
-
-#undef lua_equal
-#define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
-
-#undef lua_getfenv
-#define lua_getfenv lua_getuservalue
-#undef lua_setfenv
-#define lua_setfenv lua_setuservalue
-
-#undef lua_objlen
-#define lua_objlen lua_rawlen
-
-#undef luaL_register
-#define luaL_register(L,n,f) \
- { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); }
-
-#endif
-
-
-
-/* initial size for call/backtrack stack */
-#define INITBACK 100
-
-/* default maximum size for call/backtrack stack */
-#define MAXBACK INITBACK
-
-/* size for call/backtrack stack for verifier */
-#define MAXBACKVER 200
-
-/* initial size for capture's list */
-#define INITCAPSIZE 32
-
-
-/* index, on Lua stack, for subject */
-#define SUBJIDX 2
-
-/* number of fixed arguments to 'match' (before capture arguments) */
-#define FIXEDARGS 3
-
-/* index, on Lua stack, for substitution value cache */
-#define subscache(cs) ((cs)->ptop + 1)
-
-/* index, on Lua stack, for capture list */
-#define caplistidx(ptop) ((ptop) + 2)
-
-/* index, on Lua stack, for pattern's fenv */
-#define penvidx(ptop) ((ptop) + 3)
-
-/* index, on Lua stack, for backtracking stack */
-#define stackidx(ptop) ((ptop) + 4)
-
-
-
-typedef unsigned char byte;
-
-
-#define CHARSETSIZE ((UCHAR_MAX/CHAR_BIT) + 1)
-
-
-typedef byte Charset[CHARSETSIZE];
-
-
-/* Virtual Machine's instructions */
-typedef enum Opcode {
- IAny, IChar, ISet, ISpan,
- IBack,
- IRet, IEnd,
- IChoice, IJmp, ICall, IOpenCall,
- ICommit, IPartialCommit, IBackCommit, IFailTwice, IFail, IGiveup,
- IFunc,
- IFullCapture, IEmptyCapture, IEmptyCaptureIdx,
- IOpenCapture, ICloseCapture, ICloseRunTime
-} Opcode;
-
-
-#define ISJMP 0x1
-#define ISCHECK 0x2
-#define ISFIXCHECK 0x4
-#define ISNOFAIL 0x8
-#define ISCAPTURE 0x10
-#define ISMOVABLE 0x20
-#define ISFENVOFF 0x40
-
-static const int opproperties[] = {
- /* IAny */ ISCHECK | ISFIXCHECK | ISJMP,
- /* IChar */ ISCHECK | ISFIXCHECK | ISJMP,
- /* ISet */ ISCHECK | ISFIXCHECK | ISJMP,
- /* ISpan */ ISNOFAIL,
- /* IBack */ 0,
- /* IRet */ 0,
- /* IEnd */ 0,
- /* IChoice */ ISJMP,
- /* IJmp */ ISJMP | ISNOFAIL,
- /* ICall */ ISJMP,
- /* IOpenCall */ ISFENVOFF,
- /* ICommit */ ISJMP,
- /* IPartialCommit */ ISJMP,
- /* IBackCommit */ ISJMP,
- /* IFailTwice */ 0,
- /* IFail */ 0,
- /* IGiveup */ 0,
- /* IFunc */ ISCHECK | ISJMP,
- /* IFullCapture */ ISCAPTURE | ISNOFAIL | ISFENVOFF,
- /* IEmptyCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE,
- /* IEmptyCaptureIdx */ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF,
- /* IOpenCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF,
- /* ICloseCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF,
- /* ICloseRunTime */ ISCAPTURE | ISFENVOFF
-};
-
-
-typedef union Instruction {
- struct Inst {
- byte code;
- byte aux;
- short offset;
- } i;
- PattFunc f;
- int iv;
- byte buff[1];
-} Instruction;
-
-static const Instruction giveup = {{IGiveup, 0, 0}};
-
-#define getkind(op) ((op)->i.aux & 0xF)
-#define getoff(op) (((op)->i.aux >> 4) & 0xF)
-
-#define dest(p,x) ((x) + ((p)+(x))->i.offset)
-
-#define MAXOFF 0xF
-#define MAXAUX 0xFF
-
-/* maximum size (in elements) for a pattern */
-#define MAXPATTSIZE (SHRT_MAX - 10)
-
-
-#define isprop(op,p) (opproperties[(op)->i.code] & (p))
-#define isjmp(op) (isprop(op, ISJMP) && (op)->i.offset != 0)
-#define iscapture(op) isprop(op, ISCAPTURE)
-#define ischeck(op) (isprop(op, ISCHECK) && (op)->i.offset == 0)
-#define isfixcheck(op) (isprop(op, ISFIXCHECK) && (op)->i.offset == 0)
-#define istest(op) (isprop(op, ISCHECK) && (op)->i.offset != 0)
-#define isnofail(op) isprop(op, ISNOFAIL)
-#define ismovable(op) isprop(op, ISMOVABLE)
-#define isfenvoff(op) isprop(op, ISFENVOFF)
-
-
-/* kinds of captures */
-typedef enum CapKind {
- Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction,
- Cquery, Cstring, Csubst, Cfold, Cruntime, Cgroup
-} CapKind;
-
-#define iscapnosize(k) ((k) == Cposition || (k) == Cconst)
-
-
-typedef struct Capture {
- const char *s; /* position */
- short idx;
- byte kind;
- byte siz;
-} Capture;
-
-
-/* size (in elements) for an instruction plus extra l bytes */
-#define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1)
-
-
-/* size (in elements) for a ISet instruction */
-#define CHARSETINSTSIZE instsize(CHARSETSIZE)
-
-/* size (in elements) for a IFunc instruction */
-#define funcinstsize(p) ((p)->i.aux + 2)
-
-
-#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) b; }
-
-
-#define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7))))
-#define setchar(st,c) ((st)[(c) >> 3] |= (1 << ((c) & 7)))
-
-
-static int target (Instruction *p, int i);
-
-
-static int sizei (const Instruction *i) {
- switch((Opcode)i->i.code) {
- case ISet: case ISpan: return CHARSETINSTSIZE;
- case IFunc: return funcinstsize(i);
- default: return 1;
- }
-}
-
-
-static const char *val2str (lua_State *L, int idx) {
- const char *k = lua_tostring(L, idx);
- if (k != NULL)
- return lua_pushfstring(L, "rule '%s'", k);
- else
- return lua_pushfstring(L, "rule <a %s>", luaL_typename(L, idx));
-}
-
-
-static int getposition (lua_State *L, int t, int i) {
- int res;
- lua_getfenv(L, -1);
- lua_rawgeti(L, -1, i); /* get key from pattern's environment */
- lua_gettable(L, t); /* get position from positions table */
- res = lua_tointeger(L, -1);
- if (res == 0) { /* key has no registered position? */
- lua_rawgeti(L, -2, i); /* get key again */
- return luaL_error(L, "%s is not defined in given grammar", val2str(L, -1));
- }
- lua_pop(L, 2); /* remove environment and position */
- return res;
-}
-
-
-/*
-** {======================================================
-** Printing patterns (for debugging)
-** =======================================================
-*/
-
-
-static void printcharset (const Charset st) {
- int i;
- printf("[");
- for (i = 0; i <= UCHAR_MAX; i++) {
- int first = i;
- while (testchar(st, i) && i <= UCHAR_MAX) i++;
- if (i - 1 == first) /* unary range? */
- printf("(%02x)", first);
- else if (i - 1 > first) /* non-empty range? */
- printf("(%02x-%02x)", first, i - 1);
- }
- printf("]");
-}
-
-
-static void printcapkind (int kind) {
- const char *const modes[] = {
- "close", "position", "constant", "backref",
- "argument", "simple", "table", "function",
- "query", "string", "substitution", "fold",
- "runtime", "group"};
- printf("%s", modes[kind]);
-}
-
-
-static void printjmp (const Instruction *op, const Instruction *p) {
- printf("-> ");
- if (p->i.offset == 0) printf("FAIL");
- else printf("%d", (int)(dest(0, p) - op));
-}
-
-
-static void printinst (const Instruction *op, const Instruction *p) {
- const char *const names[] = {
- "any", "char", "set", "span", "back",
- "ret", "end",
- "choice", "jmp", "call", "open_call",
- "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
- "func",
- "fullcapture", "emptycapture", "emptycaptureidx", "opencapture",
- "closecapture", "closeruntime"
- };
- printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
- switch ((Opcode)p->i.code) {
- case IChar: {
- printf("'%c'", p->i.aux);
- printjmp(op, p);
- break;
- }
- case IAny: {
- printf("* %d", p->i.aux);
- printjmp(op, p);
- break;
- }
- case IFullCapture: case IOpenCapture:
- case IEmptyCapture: case IEmptyCaptureIdx:
- case ICloseCapture: case ICloseRunTime: {
- printcapkind(getkind(p));
- printf("(n = %d) (off = %d)", getoff(p), p->i.offset);
- break;
- }
- case ISet: {
- printcharset((p+1)->buff);
- printjmp(op, p);
- break;
- }
- case ISpan: {
- printcharset((p+1)->buff);
- break;
- }
- case IOpenCall: {
- printf("-> %d", p->i.offset);
- break;
- }
- case IChoice: {
- printjmp(op, p);
- printf(" (%d)", p->i.aux);
- break;
- }
- case IJmp: case ICall: case ICommit:
- case IPartialCommit: case IBackCommit: {
- printjmp(op, p);
- break;
- }
- default: break;
- }
- printf("\n");
-}
-
-
-static void printpatt (Instruction *p) {
- Instruction *op = p;
- for (;;) {
- printinst(op, p);
- if ((Opcode)p->i.code == IEnd) break;
- p += sizei(p);
- }
-}
-
-
-#if 0
-static void printcap (Capture *cap) {
- printcapkind(cap->kind);
- printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s);
-}
-
-
-static void printcaplist (Capture *cap) {
- for (; cap->s; cap++) printcap(cap);
-}
-#endif
-
-/* }====================================================== */
-
-
-/*
-** {======================================================
-** Virtual Machine
-** =======================================================
-*/
-
-
-typedef struct Stack {
- const char *s;
- const Instruction *p;
- int caplevel;
-} Stack;
-
-
-#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
-
-
-static int runtimecap (lua_State *L, Capture *close, Capture *ocap,
- const char *o, const char *s, int ptop);
-
-
-static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
- Capture *newc;
- if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
- luaL_error(L, "too many captures");
- newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
- memcpy(newc, cap, captop * sizeof(Capture));
- lua_replace(L, caplistidx(ptop));
- return newc;
-}
-
-
-static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
- Stack *stack = getstackbase(L, ptop);
- Stack *newstack;
- int n = *stacklimit - stack;
- int max, newn;
- lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
- max = lua_tointeger(L, -1);
- lua_pop(L, 1);
- if (n >= max)
- luaL_error(L, "too many pending calls/choices");
- newn = 2*n; if (newn > max) newn = max;
- newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
- memcpy(newstack, stack, n * sizeof(Stack));
- lua_replace(L, stackidx(ptop));
- *stacklimit = newstack + newn;
- return newstack + n;
-
-}
-
-
-static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
- int i;
- assert(base[0].kind == Cruntime && base[0].siz == 0);
- base[0].idx = fd; /* first returned capture */
- for (i = 1; i < n; i++) { /* add extra captures */
- base[i].siz = 1; /* mark it as closed */
- base[i].s = s;
- base[i].kind = Cruntime;
- base[i].idx = fd + i; /* stack index */
- }
- base[n].kind = Cclose; /* add closing entry */
- base[n].siz = 1;
- base[n].s = s;
-}
-
-
-#define condfailed(p) { int f = p->i.offset; if (f) p+=f; else goto fail; }
-
-static const char *match (lua_State *L,
- const char *o, const char *s, const char *e,
- Instruction *op, Capture *capture, int ptop) {
- Stack stackbase[INITBACK];
- Stack *stacklimit = stackbase + INITBACK;
- Stack *stack = stackbase; /* point to first empty slot in stack */
- int capsize = INITCAPSIZE;
- int captop = 0; /* point to first empty slot in captures */
- const Instruction *p = op;
- stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
- lua_pushlightuserdata(L, stackbase);
- for (;;) {
-#if defined(DEBUG)
- printf("s: |%s| stck: %d c: %d ",
- s, stack - getstackbase(L, ptop), captop);
- printinst(op, p);
-#endif
- switch ((Opcode)p->i.code) {
- case IEnd: {
- assert(stack == getstackbase(L, ptop) + 1);
- capture[captop].kind = Cclose;
- capture[captop].s = NULL;
- return s;
- }
- case IGiveup: {
- assert(stack == getstackbase(L, ptop));
- return NULL;
- }
- case IRet: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
- p = (--stack)->p;
- continue;
- }
- case IAny: {
- int n = p->i.aux;
- if (n <= e - s) { p++; s += n; }
- else condfailed(p);
- continue;
- }
- case IChar: {
- if ((byte)*s == p->i.aux && s < e) { p++; s++; }
- else condfailed(p);
- continue;
- }
- case ISet: {
- int c = (byte)*s;
- if (testchar((p+1)->buff, c) && s < e)
- { p += CHARSETINSTSIZE; s++; }
- else condfailed(p);
- continue;
- }
- case IBack: {
- int n = p->i.aux;
- if (n > s - o) goto fail;
- s -= n; p++;
- continue;
- }
- case ISpan: {
- for (; s < e; s++) {
- int c = (byte)*s;
- if (!testchar((p+1)->buff, c)) break;
- }
- p += CHARSETINSTSIZE;
- continue;
- }
- case IFunc: {
- const char *r = (p+1)->f(s, e, o, (p+2)->buff);
- if (r != NULL) { s = r; p += funcinstsize(p); }
- else condfailed(p);
- continue;
- }
- case IJmp: {
- p += p->i.offset;
- continue;
- }
- case IChoice: {
- if (stack == stacklimit)
- stack = doublestack(L, &stacklimit, ptop);
- stack->p = dest(0, p);
- stack->s = s - p->i.aux;
- stack->caplevel = captop;
- stack++;
- p++;
- continue;
- }
- case ICall: {
- if (stack == stacklimit)
- stack = doublestack(L, &stacklimit, ptop);
- stack->s = NULL;
- stack->p = p + 1; /* save return address */
- stack++;
- p += p->i.offset;
- continue;
- }
- case ICommit: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
- stack--;
- p += p->i.offset;
- continue;
- }
- case IPartialCommit: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
- (stack - 1)->s = s;
- (stack - 1)->caplevel = captop;
- p += p->i.offset;
- continue;
- }
- case IBackCommit: {
- assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
- s = (--stack)->s;
- captop = stack->caplevel;
- p += p->i.offset;
- continue;
- }
- case IFailTwice:
- assert(stack > getstackbase(L, ptop));
- stack--;
- /* go through */
- case IFail:
- fail: { /* pattern failed: try to backtrack */
- do { /* remove pending calls */
- assert(stack > getstackbase(L, ptop));
- s = (--stack)->s;
- } while (s == NULL);
- captop = stack->caplevel;
- p = stack->p;
- continue;
- }
- case ICloseRunTime: {
- int fr = lua_gettop(L) + 1; /* stack index of first result */
- int ncap = runtimecap(L, capture + captop, capture, o, s, ptop);
- lua_Integer res = lua_tointeger(L, fr) - 1; /* offset */
- int n = lua_gettop(L) - fr; /* number of new captures */
- if (res == -1) { /* may not be a number */
- if (!lua_toboolean(L, fr)) { /* false value? */
- lua_settop(L, fr - 1); /* remove results */
- goto fail; /* and fail */
- }
- else if (lua_isboolean(L, fr)) /* true? */
- res = s - o; /* keep current position */
- }
- if (res < s - o || res > e - o)
- luaL_error(L, "invalid position returned by match-time capture");
- s = o + res; /* update current position */
- captop -= ncap; /* remove nested captures */
- lua_remove(L, fr); /* remove first result (offset) */
- if (n > 0) { /* captures? */
- if ((captop += n + 1) >= capsize) {
- capture = doublecap(L, capture, captop, ptop);
- capsize = 2 * captop;
- }
- adddyncaptures(s, capture + captop - n - 1, n, fr);
- }
- p++;
- continue;
- }
- case ICloseCapture: {
- const char *s1 = s - getoff(p);
- assert(captop > 0);
- if (capture[captop - 1].siz == 0 &&
- s1 - capture[captop - 1].s < UCHAR_MAX) {
- capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
- p++;
- continue;
- }
- else {
- capture[captop].siz = 1; /* mark entry as closed */
- goto capture;
- }
- }
- case IEmptyCapture: case IEmptyCaptureIdx:
- capture[captop].siz = 1; /* mark entry as closed */
- goto capture;
- case IOpenCapture:
- capture[captop].siz = 0; /* mark entry as open */
- goto capture;
- case IFullCapture:
- capture[captop].siz = getoff(p) + 1; /* save capture size */
- capture: {
- capture[captop].s = s - getoff(p);
- capture[captop].idx = p->i.offset;
- capture[captop].kind = getkind(p);
- if (++captop >= capsize) {
- capture = doublecap(L, capture, captop, ptop);
- capsize = 2 * captop;
- }
- p++;
- continue;
- }
- case IOpenCall: {
- lua_rawgeti(L, penvidx(ptop), p->i.offset);
- luaL_error(L, "reference to %s outside a grammar", val2str(L, -1));
- }
- default: assert(0); return NULL;
- }
- }
-}
-
-/* }====================================================== */
-
-
-/*
-** {======================================================
-** Verifier
-** =======================================================
-*/
-
-
-/*
-** check whether pattern may go from 'p' to 'e' without consuming any
-** input. Raises an error if it detects a left recursion. 'op' points
-** the beginning of the pattern. If pattern belongs to a grammar,
-** 'rule' is the stack index where is its corresponding key (only for
-** error messages) and 'posttable' is the stack index with a table
-** mapping rule keys to the position of their code in the pattern.
-*/
-static int verify (lua_State *L, Instruction *op, const Instruction *p,
- Instruction *e, int postable, int rule) {
- static const char dummy[] = "";
- Stack back[MAXBACKVER];
- int backtop = 0; /* point to first empty slot in back */
- while (p != e) {
- switch ((Opcode)p->i.code) {
- case IRet: {
- p = back[--backtop].p;
- continue;
- }
- case IChoice: {
- if (backtop >= MAXBACKVER)
- return luaL_error(L, "too many pending calls/choices");
- back[backtop].p = dest(0, p);
- back[backtop++].s = dummy;
- p++;
- continue;
- }
- case ICall: {
- assert((p + 1)->i.code != IRet); /* no tail call */
- if (backtop >= MAXBACKVER)
- return luaL_error(L, "too many pending calls/choices");
- back[backtop].s = NULL;
- back[backtop++].p = p + 1;
- goto dojmp;
- }
- case IOpenCall: {
- int i;
- if (postable == 0) /* grammar still not fixed? */
- goto fail; /* to be verified later */
- for (i = 0; i < backtop; i++) {
- if (back[i].s == NULL && back[i].p == p + 1)
- return luaL_error(L, "%s is left recursive", val2str(L, rule));
- }
- if (backtop >= MAXBACKVER)
- return luaL_error(L, "too many pending calls/choices");
- back[backtop].s = NULL;
- back[backtop++].p = p + 1;
- p = op + getposition(L, postable, p->i.offset);
- continue;
- }
- case IBackCommit:
- case ICommit: {
- assert(backtop > 0 && p->i.offset > 0);
- backtop--;
- goto dojmp;
- }
- case IPartialCommit: {
- assert(backtop > 0);
- if (p->i.offset > 0) goto dojmp; /* forward jump */
- else { /* loop will be detected when checking corresponding rule */
- assert(postable != 0);
- backtop--;
- p++; /* just go on now */
- continue;
- }
- }
- case IBack: {
- if (p->i.aux == 1 && isfixcheck(p + 1)) { /* char test? */
- p++; /* skip back instruction */
- p += sizei(p); /* skip char test */
- }
- else { /* standard lookbehind code */
- assert((Opcode)(p - 1)->i.code == IChoice); /* look behind */
- backtop--;
- p += (p - 1)->i.offset;
- assert((Opcode)(p - 1)->i.code == IFail); /* look behind */
- }
- continue;
- }
- case IAny:
- case IChar:
- case ISet: {
- const Instruction *next = p + sizei(p);
- if ((Opcode)next->i.code == IBack)
- p = next + 1; /* continue after the back instruction */
- else if (p->i.offset == 0) goto fail;
- else /* jump */
- p += p->i.offset;
- continue;
- }
- case IJmp:
- dojmp: {
- p += p->i.offset;
- continue;
- }
- case IFailTwice: /* 'not' predicate */
- goto fail; /* body could have failed; try to backtrack it */
- case IFail: {
- if (p > op && (p - 1)->i.code == IBackCommit) { /* 'and' predicate? */
- p++; /* pretend it succeeded and go ahead */
- continue;
- }
- /* else failed: go through */
- }
- fail: { /* pattern failed: try to backtrack */
- do {
- if (backtop-- == 0)
- return 1; /* no more backtracking */
- } while (back[backtop].s == NULL);
- p = back[backtop].p;
- continue;
- }
- case ISpan:
- case IOpenCapture: case ICloseCapture:
- case IEmptyCapture: case IEmptyCaptureIdx:
- case IFullCapture: {
- p += sizei(p);
- continue;
- }
- case ICloseRunTime: {
- goto fail; /* be liberal in this case */
- }
- case IFunc: {
- const char *r = (p+1)->f(dummy, dummy, dummy, (p+2)->buff);
- if (r != NULL) { p += funcinstsize(p); }
- else condfailed(p);
- continue;
- }
- case IEnd: /* cannot happen (should stop before it) */
- default: assert(0); return 0;
- }
- }
- assert(backtop == 0);
- return 0;
-}
-
-
-static void checkrule (lua_State *L, Instruction *op, int from, int to,
- int postable, int rule) {
- int i;
- int lastopen = 0; /* more recent OpenCall seen in the code */
- for (i = from; i < to; i += sizei(op + i)) {
- if (op[i].i.code == IPartialCommit && op[i].i.offset < 0) { /* loop? */
- int start = dest(op, i);
- assert(op[start - 1].i.code == IChoice &&
- dest(op, start - 1) == target(op, i + 1));
- if (start <= lastopen) { /* loop does contain an open call? */
- if (!verify(L, op, op + start, op + i, postable, rule)) /* check body */
- luaL_error(L, "possible infinite loop in %s", val2str(L, rule));
- }
- }
- else if (op[i].i.code == IOpenCall)
- lastopen = i;
- }
- assert(op[i - 1].i.code == IRet);
- verify(L, op, op + from, op + to - 1, postable, rule);
-}
-
-
-
-
-/* }====================================================== */
-
-
-/*
-** {======================================================
-** Building Patterns
-** =======================================================
-*/
-
-enum charsetanswer { NOINFO, ISCHARSET, VALIDSTARTS };
-
-typedef struct CharsetTag {
- enum charsetanswer tag;
- Charset cs;
-} CharsetTag;
-
-
-static Instruction *getpatt (lua_State *L, int idx, int *size);
-
-
-static void check2test (Instruction *p, int n) {
- assert(ischeck(p) && n != 0);
- p->i.offset = n;
-}
-
-
-/*
-** invert array slice p[0]-p[e] (both inclusive)
-*/
-static void invert (Instruction *p, int e) {
- int i;
- for (i = 0; i < e; i++, e--) {
- Instruction temp = p[i];
- p[i] = p[e];
- p[e] = temp;
- }
-}
-
-
-/*
-** rotate array slice p[0]-p[e] (both inclusive) 'n' steps
-** to the 'left'
-*/
-static void rotate (Instruction *p, int e, int n) {
- invert(p, n - 1);
- invert(p + n, e - n);
- invert(p, e);
-}
-
-
-#define op_step(p) ((p)->i.code == IAny ? (p)->i.aux : 1)
-
-
-static int skipchecks (Instruction *p, int up, int *pn) {
- int i, n = 0;
- for (i = 0; isfixcheck(p + i); i += sizei(p + i)) {
- int st = op_step(p + i);
- if (n + st > MAXOFF - up) break;
- n += st;
- }
- *pn = n;
- return i;
-}
-
-
-#define ismovablecap(op) (ismovable(op) && getoff(op) < MAXOFF)
-
-static void optimizecaptures (Instruction *p) {
- int i;
- int limit = 0;
- for (i = 0; p[i].i.code != IEnd; i += sizei(p + i)) {
- if (isjmp(p + i) && dest(p, i) >= limit)
- limit = dest(p, i) + 1; /* do not optimize jump targets */
- else if (i >= limit && ismovablecap(p + i) && isfixcheck(p + i + 1)) {
- int end, n, j; /* found a border capture|check */
- int maxoff = getoff(p + i);
- int start = i;
- /* find first capture in the group */
- while (start > limit && ismovablecap(p + start - 1)) {
- start--;
- if (getoff(p + start) > maxoff) maxoff = getoff(p + start);
- }
- end = skipchecks(p + i + 1, maxoff, &n) + i; /* find last check */
- if (n == 0) continue; /* first check is too big to move across */
- assert(n <= MAXOFF && start <= i && i < end);
- for (j = start; j <= i; j++)
- p[j].i.aux += (n << 4); /* correct offset of captures to be moved */
- rotate(p + start, end - start, i - start + 1); /* move them up */
- i = end;
- assert(isfixcheck(p + start) && iscapture(p + i));
- }
- }
-}
-
-
-static int target (Instruction *p, int i) {
- while (p[i].i.code == IJmp) i += p[i].i.offset;
- return i;
-}
-
-
-static void optimizejumps (Instruction *p) {
- int i;
- for (i = 0; p[i].i.code != IEnd; i += sizei(p + i)) {
- if (isjmp(p + i))
- p[i].i.offset = target(p, dest(p, i)) - i;
- }
-}
-
-
-static void optimizechoice (Instruction *p) {
- assert(p->i.code == IChoice);
- if (isfixcheck(p + 1)) {
- int lc = sizei(p + 1);
- rotate(p, lc, 1);
- assert(isfixcheck(p) && (p + lc)->i.code == IChoice);
- (p + lc)->i.aux = op_step(p);
- check2test(p, (p + lc)->i.offset);
- (p + lc)->i.offset -= lc;
- }
-}
-
-
-/*
-** A 'headfail' pattern is a pattern that can only fails in its first
-** instruction, which must be a check.
-*/
-static int isheadfail (Instruction *p) {
- if (!ischeck(p)) return 0;
- /* check that other operations cannot fail */
- for (p += sizei(p); p->i.code != IEnd; p += sizei(p))
- if (!isnofail(p)) return 0;
- return 1;
-}
-
-
-#define checkpattern(L, idx) ((Instruction *)luaL_checkudata(L, idx, PATTERN_T))
-
-
-/*
-** Return the number of elements in the ktable of a pattern.
-** in Lua 5.2, default "environment" for patterns is nil, not
-** a table. Treat it as an empty table.
-*/
-static int ktablelen (lua_State *L, int idx) {
- if (!lua_istable(L, idx)) return 0;
- else return lua_objlen(L, idx);
-}