Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Update to LPeg 0.10

  • Loading branch information...
commit 5019d49ce847a87662983acba1e35c9b252bb936 1 parent 61e4e65
kapec authored
View
1  CMakeLists.txt
@@ -11,6 +11,7 @@ include(dist.cmake)
install_lua_module(lpeg lpeg.c)
install_lua_module(re re.lua)
install_doc(lpeg.html re.html lpeg-128.gif)
+install_data(HISTORY)
install_test(test.lua)
add_lua_test(test.lua)
View
75 HISTORY
@@ -0,0 +1,75 @@
+HISTORY for LPeg 0.10
+
+* Changes from version 0.9 to 0.10
+ -------------------------------
+ + backtrack stack has configurable size
+ + better error messages
+ + Notation for non-terminals in 're' back to A instead o <A>
+ + experimental look-behind pattern
+ + support for external extensions
+ + works with Lua 5.2
+ + consumes less C stack
+
+* Changes from version 0.8 to 0.9
+ -------------------------------
+ + The accumulator capture was replaced by a fold capture;
+ programs that used the old 'lpeg.Ca' will need small changes.
+ + Some support for character classes from old C locales.
+ + A new named-group capture.
+
+* Changes from version 0.7 to 0.8
+ -------------------------------
+ + New "match-time" capture.
+ + New "argument capture" that allows passing arguments into the pattern.
+ + Better documentation for 're'.
+ + Several small improvements for 're'.
+ + The 're' module has an incompatibility with previous versions:
+ now, any use of a non-terminal must be enclosed in angle brackets
+ (like <B>).
+
+* Changes from version 0.6 to 0.7
+ -------------------------------
+ + Several improvements in module 're':
+ - better documentation;
+ - support for most captures (all but accumulator);
+ - limited repetitions p{n,m}.
+ + Small improvements in efficiency.
+ + Several small bugs corrected (special thanks to Hans Hagen
+ and Taco Hoekwater).
+
+* Changes from version 0.5 to 0.6
+ -------------------------------
+ + Support for non-numeric indices in grammars.
+ + Some bug fixes (thanks to the luatex team).
+ + Some new optimizations; (thanks to Mike Pall).
+ + A new page layout (thanks to Andre Carregal).
+ + Minimal documentation for module 're'.
+
+* Changes from version 0.4 to 0.5
+ -------------------------------
+ + Several optimizations.
+ + lpeg.P now accepts booleans.
+ + Some new examples.
+ + A proper license.
+ + Several small improvements.
+
+* Changes from version 0.3 to 0.4
+ -------------------------------
+ + Static check for loops in repetitions and grammars.
+ + Removed label option in captures.
+ + The implementation of captures uses less memory.
+
+* Changes from version 0.2 to 0.3
+ -------------------------------
+ + User-defined patterns in Lua.
+ + Several new captures.
+
+* Changes from version 0.1 to 0.2
+ -------------------------------
+ + Several small corrections.
+ + Handles embedded zeros like any other character.
+ + Capture "name" can be any Lua value.
+ + Unlimited number of captures.
+ + Match gets an optional initial position.
+
+(end of HISTORY)
View
2  dist.info
@@ -1,7 +1,7 @@
--- This file is part of LuaDist project
name = "lpeg"
-version = "0.9"
+version = "0.10"
desc = "Parsing Expression Grammars For Lua"
author = "Roberto Ierusalimschy"
View
380 lpeg.c
@@ -1,5 +1,5 @@
/*
-** $Id: lpeg.c,v 1.98 2008/10/11 20:20:43 roberto Exp $
+** $Id: lpeg.c,v 1.112 2010/11/03 17:07:50 roberto Exp $
** LPeg - PEG pattern matching for Lua
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
** written by Roberto Ierusalimschy
@@ -15,15 +15,49 @@
#include "lua.h"
#include "lauxlib.h"
+#include "lpeg.h"
-#define VERSION "0.9"
-#define PATTERN_T "pattern"
-/* maximum call/backtrack levels */
-#define MAXBACK 400
+#define VERSION "0.10"
+#define PATTERN_T "lpeg-pattern"
+#define MAXSTACKIDX "lpeg-maxstack"
+
+
+/*
+** compatibility with Lua 5.2
+*/
+#if (LUA_VERSION_NUM == 502)
+
+#undef lua_equal
+#define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
+
+#undef lua_getfenv
+#define lua_getfenv lua_getuservalue
+#undef lua_setfenv
+#define lua_setfenv lua_setuservalue
+
+#undef lua_objlen
+#define lua_objlen lua_rawlen
+
+#undef luaL_register
+#define luaL_register(L,n,f) \
+ { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); }
+
+#endif
+
+
+
+/* initial size for call/backtrack stack */
+#define INITBACK 100
+
+/* default maximum size for call/backtrack stack */
+#define MAXBACK INITBACK
+
+/* size for call/backtrack stack for verifier */
+#define MAXBACKVER 200
/* initial size for capture's list */
-#define IMAXCAPTURES 600
+#define INITCAPSIZE 32
/* index, on Lua stack, for subject */
@@ -41,6 +75,9 @@
/* index, on Lua stack, for pattern's fenv */
#define penvidx(ptop) ((ptop) + 3)
+/* index, on Lua stack, for backtracking stack */
+#define stackidx(ptop) ((ptop) + 4)
+
typedef unsigned char byte;
@@ -52,15 +89,10 @@ typedef unsigned char byte;
typedef byte Charset[CHARSETSIZE];
-typedef const char *(*PattFunc) (const void *ud,
- const char *o, /* string start */
- const char *s, /* current position */
- const char *e); /* string end */
-
-
/* Virtual Machine's instructions */
typedef enum Opcode {
IAny, IChar, ISet, ISpan,
+ IBack,
IRet, IEnd,
IChoice, IJmp, ICall, IOpenCall,
ICommit, IPartialCommit, IBackCommit, IFailTwice, IFail, IGiveup,
@@ -70,19 +102,20 @@ typedef enum Opcode {
} Opcode;
-#define ISJMP 1
-#define ISCHECK (ISJMP << 1)
-#define ISNOFAIL (ISCHECK << 1)
-#define ISCAPTURE (ISNOFAIL << 1)
-#define ISMOVABLE (ISCAPTURE << 1)
-#define ISFENVOFF (ISMOVABLE << 1)
-#define HASCHARSET (ISFENVOFF << 1)
-
-static const byte opproperties[] = {
- /* IAny */ ISCHECK,
- /* IChar */ ISCHECK,
- /* ISet */ ISCHECK | HASCHARSET,
- /* ISpan */ ISNOFAIL | HASCHARSET,
+#define ISJMP 0x1
+#define ISCHECK 0x2
+#define ISFIXCHECK 0x4
+#define ISNOFAIL 0x8
+#define ISCAPTURE 0x10
+#define ISMOVABLE 0x20
+#define ISFENVOFF 0x40
+
+static const int opproperties[] = {
+ /* IAny */ ISCHECK | ISFIXCHECK | ISJMP,
+ /* IChar */ ISCHECK | ISFIXCHECK | ISJMP,
+ /* ISet */ ISCHECK | ISFIXCHECK | ISJMP,
+ /* ISpan */ ISNOFAIL,
+ /* IBack */ 0,
/* IRet */ 0,
/* IEnd */ 0,
/* IChoice */ ISJMP,
@@ -95,7 +128,7 @@ static const byte opproperties[] = {
/* IFailTwice */ 0,
/* IFail */ 0,
/* IGiveup */ 0,
- /* IFunc */ 0,
+ /* IFunc */ ISCHECK | ISJMP,
/* IFullCapture */ ISCAPTURE | ISNOFAIL | ISFENVOFF,
/* IEmptyCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE,
/* IEmptyCaptureIdx */ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF,
@@ -112,6 +145,7 @@ typedef union Instruction {
short offset;
} i;
PattFunc f;
+ int iv;
byte buff[1];
} Instruction;
@@ -123,16 +157,21 @@ static const Instruction giveup = {{IGiveup, 0, 0}};
#define dest(p,x) ((x) + ((p)+(x))->i.offset)
#define MAXOFF 0xF
+#define MAXAUX 0xFF
+
+/* maximum size (in elements) for a pattern */
+#define MAXPATTSIZE (SHRT_MAX - 10)
+
#define isprop(op,p) (opproperties[(op)->i.code] & (p))
-#define isjmp(op) isprop(op, ISJMP)
+#define isjmp(op) (isprop(op, ISJMP) && (op)->i.offset != 0)
#define iscapture(op) isprop(op, ISCAPTURE)
#define ischeck(op) (isprop(op, ISCHECK) && (op)->i.offset == 0)
+#define isfixcheck(op) (isprop(op, ISFIXCHECK) && (op)->i.offset == 0)
#define istest(op) (isprop(op, ISCHECK) && (op)->i.offset != 0)
#define isnofail(op) isprop(op, ISNOFAIL)
#define ismovable(op) isprop(op, ISMOVABLE)
#define isfenvoff(op) isprop(op, ISFENVOFF)
-#define hascharset(op) isprop(op, HASCHARSET)
/* kinds of captures */
@@ -152,17 +191,15 @@ typedef struct Capture {
} Capture;
-/* maximum size (in elements) for a pattern */
-#define MAXPATTSIZE (SHRT_MAX - 10)
-
-
/* size (in elements) for an instruction plus extra l bytes */
-#define instsize(l) (((l) - 1)/sizeof(Instruction) + 2)
+#define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1)
/* size (in elements) for a ISet instruction */
#define CHARSETINSTSIZE instsize(CHARSETSIZE)
+/* size (in elements) for a IFunc instruction */
+#define funcinstsize(p) ((p)->i.aux + 2)
#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) b; }
@@ -174,9 +211,11 @@ typedef struct Capture {
static int sizei (const Instruction *i) {
- if (hascharset(i)) return CHARSETINSTSIZE;
- else if (i->i.code == IFunc) return i->i.offset;
- else return 1;
+ switch((Opcode)i->i.code) {
+ case ISet: case ISpan: return CHARSETINSTSIZE;
+ case IFunc: return funcinstsize(i);
+ default: return 1;
+ }
}
@@ -185,7 +224,7 @@ static const char *val2str (lua_State *L, int idx) {
if (k != NULL)
return lua_pushfstring(L, "rule '%s'", k);
else
- return lua_pushfstring(L, "rule <a %s>", luaL_typename(L, -1));
+ return lua_pushfstring(L, "rule <a %s>", luaL_typename(L, idx));
}
@@ -204,7 +243,6 @@ static int getposition (lua_State *L, int t, int i) {
}
-
/*
** {======================================================
** Printing patterns
@@ -246,7 +284,7 @@ static void printjmp (const Instruction *op, const Instruction *p) {
static void printinst (const Instruction *op, const Instruction *p) {
const char *const names[] = {
- "any", "char", "set", "span",
+ "any", "char", "set", "span", "back",
"ret", "end",
"choice", "jmp", "call", "open_call",
"commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
@@ -306,7 +344,7 @@ static void printpatt (Instruction *p) {
Instruction *op = p;
for (;;) {
printinst(op, p);
- if (p->i.code == IEnd) break;
+ if ((Opcode)p->i.code == IEnd) break;
p += sizei(p);
}
}
@@ -325,8 +363,6 @@ static void printcaplist (Capture *cap) {
/* }====================================================== */
-
-
/*
** {======================================================
** Virtual Machine
@@ -341,6 +377,9 @@ typedef struct Stack {
} Stack;
+#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
+
+
static int runtimecap (lua_State *L, Capture *close, Capture *ocap,
const char *o, const char *s, int ptop);
@@ -356,6 +395,26 @@ static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
}
+static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
+ Stack *stack = getstackbase(L, ptop);
+ Stack *newstack;
+ int n = *stacklimit - stack;
+ int max, newn;
+ lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
+ max = lua_tointeger(L, -1);
+ lua_pop(L, 1);
+ if (n >= max)
+ luaL_error(L, "too many pending calls/choices");
+ newn = 2*n; if (newn > max) newn = max;
+ newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
+ memcpy(newstack, stack, n * sizeof(Stack));
+ lua_replace(L, stackidx(ptop));
+ *stacklimit = newstack + newn;
+ return newstack + n;
+
+}
+
+
static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
int i;
assert(base[0].kind == Cruntime && base[0].siz == 0);
@@ -374,35 +433,36 @@ static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
#define condfailed(p) { int f = p->i.offset; if (f) p+=f; else goto fail; }
-
static const char *match (lua_State *L,
const char *o, const char *s, const char *e,
Instruction *op, Capture *capture, int ptop) {
- Stack stackbase[MAXBACK];
- Stack *stacklimit = stackbase + MAXBACK;
+ Stack stackbase[INITBACK];
+ Stack *stacklimit = stackbase + INITBACK;
Stack *stack = stackbase; /* point to first empty slot in stack */
- int capsize = IMAXCAPTURES;
+ int capsize = INITCAPSIZE;
int captop = 0; /* point to first empty slot in captures */
const Instruction *p = op;
stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
+ lua_pushlightuserdata(L, stackbase);
for (;;) {
#if defined(DEBUG)
- printf("s: |%s| stck: %d c: %d ", s, stack - stackbase, captop);
+ printf("s: |%s| stck: %d c: %d ",
+ s, stack - getstackbase(L, ptop), captop);
printinst(op, p);
#endif
switch ((Opcode)p->i.code) {
case IEnd: {
- assert(stack == stackbase + 1);
+ assert(stack == getstackbase(L, ptop) + 1);
capture[captop].kind = Cclose;
capture[captop].s = NULL;
return s;
}
case IGiveup: {
- assert(stack == stackbase);
+ assert(stack == getstackbase(L, ptop));
return NULL;
}
case IRet: {
- assert(stack > stackbase && (stack - 1)->s == NULL);
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
p = (--stack)->p;
continue;
}
@@ -424,6 +484,12 @@ static const char *match (lua_State *L,
else condfailed(p);
continue;
}
+ case IBack: {
+ int n = p->i.aux;
+ if (n > s - o) goto fail;
+ s -= n; p++;
+ continue;
+ }
case ISpan: {
for (; s < e; s++) {
int c = (byte)*s;
@@ -433,10 +499,9 @@ static const char *match (lua_State *L,
continue;
}
case IFunc: {
- const char *r = (p+1)->f((p+2)->buff, o, s, e);
- if (r == NULL) goto fail;
- s = r;
- p += p->i.offset;
+ const char *r = (p+1)->f(s, e, o, (p+2)->buff);
+ if (r != NULL) { s = r; p += funcinstsize(p); }
+ else condfailed(p);
continue;
}
case IJmp: {
@@ -444,8 +509,8 @@ static const char *match (lua_State *L,
continue;
}
case IChoice: {
- if (stack >= stacklimit)
- return (luaL_error(L, "too many pending calls/choices"), (char *)0);
+ if (stack == stacklimit)
+ stack = doublestack(L, &stacklimit, ptop);
stack->p = dest(0, p);
stack->s = s - p->i.aux;
stack->caplevel = captop;
@@ -454,8 +519,8 @@ static const char *match (lua_State *L,
continue;
}
case ICall: {
- if (stack >= stacklimit)
- return (luaL_error(L, "too many pending calls/choices"), (char *)0);
+ if (stack == stacklimit)
+ stack = doublestack(L, &stacklimit, ptop);
stack->s = NULL;
stack->p = p + 1; /* save return address */
stack++;
@@ -463,32 +528,33 @@ static const char *match (lua_State *L,
continue;
}
case ICommit: {
- assert(stack > stackbase && (stack - 1)->s != NULL);
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
stack--;
p += p->i.offset;
continue;
}
case IPartialCommit: {
- assert(stack > stackbase && (stack - 1)->s != NULL);
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
(stack - 1)->s = s;
(stack - 1)->caplevel = captop;
p += p->i.offset;
continue;
}
case IBackCommit: {
- assert(stack > stackbase && (stack - 1)->s != NULL);
+ assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
s = (--stack)->s;
+ captop = stack->caplevel;
p += p->i.offset;
continue;
}
case IFailTwice:
- assert(stack > stackbase);
+ assert(stack > getstackbase(L, ptop));
stack--;
/* go through */
case IFail:
fail: { /* pattern failed: try to backtrack */
do { /* remove pending calls */
- assert(stack > stackbase);
+ assert(stack > getstackbase(L, ptop));
s = (--stack)->s;
} while (s == NULL);
captop = stack->caplevel;
@@ -575,10 +641,18 @@ static const char *match (lua_State *L,
*/
+/*
+** check whether pattern may go from 'p' to 'e' without consuming any
+** input. Raises an error if it detects a left recursion. 'op' points
+** the beginning of the pattern. If pattern belongs to a grammar,
+** 'rule' is the stack index where is its corresponding key (only for
+** error messages) and 'posttable' is the stack index with a table
+** mapping rule keys to the position of their code in the pattern.
+*/
static int verify (lua_State *L, Instruction *op, const Instruction *p,
Instruction *e, int postable, int rule) {
static const char dummy[] = "";
- Stack back[MAXBACK];
+ Stack back[MAXBACKVER];
int backtop = 0; /* point to first empty slot in back */
while (p != e) {
switch ((Opcode)p->i.code) {
@@ -587,7 +661,7 @@ static int verify (lua_State *L, Instruction *op, const Instruction *p,
continue;
}
case IChoice: {
- if (backtop >= MAXBACK)
+ if (backtop >= MAXBACKVER)
return luaL_error(L, "too many pending calls/choices");
back[backtop].p = dest(0, p);
back[backtop++].s = dummy;
@@ -596,7 +670,7 @@ static int verify (lua_State *L, Instruction *op, const Instruction *p,
}
case ICall: {
assert((p + 1)->i.code != IRet); /* no tail call */
- if (backtop >= MAXBACK)
+ if (backtop >= MAXBACKVER)
return luaL_error(L, "too many pending calls/choices");
back[backtop].s = NULL;
back[backtop++].p = p + 1;
@@ -610,7 +684,7 @@ static int verify (lua_State *L, Instruction *op, const Instruction *p,
if (back[i].s == NULL && back[i].p == p + 1)
return luaL_error(L, "%s is left recursive", val2str(L, rule));
}
- if (backtop >= MAXBACK)
+ if (backtop >= MAXBACKVER)
return luaL_error(L, "too many pending calls/choices");
back[backtop].s = NULL;
back[backtop++].p = p + 1;
@@ -633,13 +707,31 @@ static int verify (lua_State *L, Instruction *op, const Instruction *p,
continue;
}
}
+ case IBack: {
+ if (p->i.aux == 1 && isfixcheck(p + 1)) { /* char test? */
+ p++; /* skip back instruction */
+ p += sizei(p); /* skip char test */
+ }
+ else { /* standard lookbehind code */
+ assert((Opcode)(p - 1)->i.code == IChoice); /* look behind */
+ backtop--;
+ p += (p - 1)->i.offset;
+ assert((Opcode)(p - 1)->i.code == IFail); /* look behind */
+ }
+ continue;
+ }
case IAny:
case IChar:
case ISet: {
- if (p->i.offset == 0) goto fail;
- /* else goto dojmp; go through */
+ const Instruction *next = p + sizei(p);
+ if ((Opcode)next->i.code == IBack)
+ p = next + 1; /* continue after the back instruction */
+ else if (p->i.offset == 0) goto fail;
+ else /* jump */
+ p += p->i.offset;
+ continue;
}
- case IJmp:
+ case IJmp:
dojmp: {
p += p->i.offset;
continue;
@@ -672,9 +764,9 @@ static int verify (lua_State *L, Instruction *op, const Instruction *p,
goto fail; /* be liberal in this case */
}
case IFunc: {
- const char *r = (p+1)->f((p+2)->buff, dummy, dummy, dummy);
- if (r == NULL) goto fail;
- p += p->i.offset;
+ const char *r = (p+1)->f(dummy, dummy, dummy, (p+2)->buff);
+ if (r != NULL) { p += funcinstsize(p); }
+ else condfailed(p);
continue;
}
case IEnd: /* cannot happen (should stop before it) */
@@ -712,8 +804,6 @@ static void checkrule (lua_State *L, Instruction *op, int from, int to,
/* }====================================================== */
-
-
/*
** {======================================================
** Building Patterns
@@ -766,7 +856,7 @@ static void rotate (Instruction *p, int e, int n) {
static int skipchecks (Instruction *p, int up, int *pn) {
int i, n = 0;
- for (i = 0; ischeck(p + i); i += sizei(p + i)) {
+ for (i = 0; isfixcheck(p + i); i += sizei(p + i)) {
int st = op_step(p + i);
if (n + st > MAXOFF - up) break;
n += st;
@@ -784,7 +874,7 @@ static void optimizecaptures (Instruction *p) {
for (i = 0; p[i].i.code != IEnd; i += sizei(p + i)) {
if (isjmp(p + i) && dest(p, i) >= limit)
limit = dest(p, i) + 1; /* do not optimize jump targets */
- else if (i >= limit && ismovablecap(p + i) && ischeck(p + i + 1)) {
+ else if (i >= limit && ismovablecap(p + i) && isfixcheck(p + i + 1)) {
int end, n, j; /* found a border capture|check */
int maxoff = getoff(p + i);
int start = i;
@@ -800,7 +890,7 @@ static void optimizecaptures (Instruction *p) {
p[j].i.aux += (n << 4); /* correct offset of captures to be moved */
rotate(p + start, end - start, i - start + 1); /* move them up */
i = end;
- assert(ischeck(p + start) && iscapture(p + i));
+ assert(isfixcheck(p + start) && iscapture(p + i));
}
}
}
@@ -823,10 +913,10 @@ static void optimizejumps (Instruction *p) {
static void optimizechoice (Instruction *p) {
assert(p->i.code == IChoice);
- if (ischeck(p + 1)) {
+ if (isfixcheck(p + 1)) {
int lc = sizei(p + 1);
rotate(p, lc, 1);
- assert(ischeck(p) && (p + lc)->i.code == IChoice);
+ assert(isfixcheck(p) && (p + lc)->i.code == IChoice);
(p + lc)->i.aux = op_step(p);
check2test(p, (p + lc)->i.offset);
(p + lc)->i.offset -= lc;
@@ -850,16 +940,34 @@ static int isheadfail (Instruction *p) {
#define checkpattern(L, idx) ((Instruction *)luaL_checkudata(L, idx, PATTERN_T))
+/*
+** Return the number of elements in the ktable of a pattern.
+** in Lua 5.2, default "environment" for patterns is nil, not
+** a table. Treat it as an empty table.
+*/
+static int ktablelen (lua_State *L, int idx) {
+ if (!lua_istable(L, idx)) return 0;
+ else return lua_objlen(L, idx);
+}
+
+
+/*
+** join the elements of the ktable from pattern 'p1' into the ktable of
+** the pattern at the top of the stack ('p'). If 'p1' has no elements,
+** 'p' keeps its original ktable. If 'p' has no elements, it shares
+** 'p1' ktable. Otherwise, this function creates a new ktable for 'p'.
+** Return the offset of original 'p' elements in the new ktable.
+*/
static int jointable (lua_State *L, int p1) {
int n, n1, i;
lua_getfenv(L, p1);
- n1 = lua_objlen(L, -1); /* number of elements in p1's env */
+ n1 = ktablelen(L, -1); /* number of elements in p1's env */
lua_getfenv(L, -2);
if (n1 == 0 || lua_equal(L, -2, -1)) {
lua_pop(L, 2);
return 0; /* no need to change anything */
}
- n = lua_objlen(L, -1); /* number of elements in p's env */
+ n = ktablelen(L, -1); /* number of elements in p's env */
if (n == 0) {
lua_pop(L, 1); /* removes p env */
lua_setfenv(L, -2); /* p now shares p1's env */
@@ -903,6 +1011,7 @@ static int addpatt (lua_State *L, Instruction *p, int p1idx) {
static void setinstaux (Instruction *i, Opcode op, int offset, int aux) {
+ assert(aux <= MAXAUX);
i->i.code = op;
i->i.offset = offset;
i->i.aux = aux;
@@ -913,6 +1022,10 @@ static void setinstaux (Instruction *i, Opcode op, int offset, int aux) {
#define setinstcap(i,op,idx,k,n) setinstaux(i,op,idx,((k) | ((n) << 4)))
+/*
+** create a new ktable for pattern at the stack top, mapping
+** '1' to the value at stack position 'vidx'.
+*/
static int value2fenv (lua_State *L, int vidx) {
lua_createtable(L, 1, 0);
lua_pushvalue(L, vidx);
@@ -959,7 +1072,7 @@ static void fillcharset (Instruction *p, Charset cs) {
*/
static enum charsetanswer tocharset (Instruction *p, CharsetTag *c) {
- if (ischeck(p)) {
+ if (isfixcheck(p)) {
fillcharset(p, c->cs);
if ((p + sizei(p))->i.code == IEnd && op_step(p) == 1)
c->tag = ISCHARSET;
@@ -1068,7 +1181,7 @@ static Instruction *fix_l (lua_State *L, int t) {
continue;
}
if (!testpattern(L, -1))
- luaL_error(L, "invalid field in grammar");
+ luaL_error(L, "%s is not a pattern", val2str(L, -2));
l = pattsize(L, -1) + 1; /* space for pattern + ret */
if (totalsize >= MAXPATTSIZE - l)
luaL_error(L, "grammar too large");
@@ -1284,17 +1397,16 @@ static int unm_l (lua_State *L) {
static int pattand_l (lua_State *L) {
int l1;
- Instruction *p1 = getpatt(L, 1, &l1);
CharsetTag st1;
+ Instruction *p1 = getpatt(L, 1, &l1);
if (isfail(p1) || issucc(p1))
lua_pushvalue(L, 1); /* &fail == fail; &true == true */
else if (tocharset(p1, &st1) == ISCHARSET) {
- Instruction *p = newpatt(L, CHARSETINSTSIZE + 1);
- setinst(p, ISet, CHARSETINSTSIZE + 1);
- loopset(i, p[1].buff[i] = ~st1.cs[i]);
- setinst(p + CHARSETINSTSIZE, IFail, 0);
+ Instruction *p = newpatt(L, l1 + 1);
+ copypatt(p, p1, l1); p += l1;
+ setinstaux(p, IBack, 0, 1);
}
- else {
+ else { /* Choice L1; p1; BackCommit L2; L1: Fail; L2: */
Instruction *p = newpatt(L, 1 + l1 + 2);
setinst(p++, IChoice, 1 + l1 + 1);
p += addpatt(L, p, 1);
@@ -1305,6 +1417,41 @@ static int pattand_l (lua_State *L) {
}
+static int nocalls (const Instruction *p) {
+ for (; (Opcode)p->i.code != IEnd; p += sizei(p))
+ if ((Opcode)p->i.code == IOpenCall) return 0;
+ return 1;
+}
+
+
+static int pattbehind (lua_State *L) {
+ int l1;
+ CharsetTag st1;
+ Instruction *p1 = getpatt(L, 1, &l1);
+ int n = luaL_optint(L, 2, 1);
+ luaL_argcheck(L, n <= MAXAUX, 2, "lookbehind delta too large");
+ if (!nocalls(p1))
+ luaL_error(L, "lookbehind pattern cannot contain non terminals");
+ if (isfail(p1) || issucc(p1))
+ lua_pushvalue(L, 1); /* <fail == fail; <true == true */
+ else if (n == 1 && tocharset(p1, &st1) == ISCHARSET) {
+ Instruction *p = newpatt(L, 1 + l1);
+ setinstaux(p, IBack, 0, 1); p++;
+ copypatt(p, p1, l1);
+ }
+ else { /* Choice L1; Back; p1; BackCommit L2; L1: fail; L2: */
+ Instruction *p = newpatt(L, 2 + l1 + 2);
+ setinst(p++, IChoice, 2 + l1 + 1);
+ setinstaux(p++, IBack, 0, n);
+ p += addpatt(L, p, 1);
+ setinst(p++, IBackCommit, 2);
+ setinst(p, IFail, 0);
+ }
+ return 1;
+}
+
+
+
static int firstpart (Instruction *p, int l) {
if (istest(p)) {
int e = p[0].i.offset - 1;
@@ -1652,44 +1799,26 @@ static int capconst_l (lua_State *L) {
/* }====================================================== */
-
/*
** {======================================================
** User-Defined Patterns
** =======================================================
*/
-static void newpattfunc (lua_State *L, PattFunc f, const void *ud, size_t l) {
+static void l_newpf (lua_State *L, PattFunc f, const void *ud, size_t l) {
int n = instsize(l) + 1;
Instruction *p = newpatt(L, n);
+ if (n > MAXAUX) luaL_error(L, "pattern data too long");
p[0].i.code = IFunc;
- p[0].i.offset = n;
+ p[0].i.aux = n - 2;
+ p[0].i.offset = 0;
p[1].f = f;
memcpy(p[2].buff, ud, l);
}
-
-#include <ctype.h>
-
-static const char *span (const void *ud, const char *o,
- const char *s,
- const char *e) {
- const char *u = (const char *)ud;
- (void)o; (void)e;
- return s + strspn(s, u);
-}
-
-
-static int span_l (lua_State *L) {
- const char *s = luaL_checkstring(L, 1);
- newpattfunc(L, span, s, strlen(s) + 1);
- return 1;
-}
-
/* }====================================================== */
-
/*
** {======================================================
** Captures
@@ -1755,7 +1884,7 @@ static Capture *nextcap (Capture *cap) {
if (n-- == 0) return cap + 1;
}
else if (!isfullcap(cap)) n++;
- }
+ }
}
}
@@ -2145,8 +2274,10 @@ static int locale_l (lua_State *L) {
lua_settop(L, 0);
lua_createtable(L, 0, 12);
}
- else
+ else {
luaL_checktype(L, 1, LUA_TTABLE);
+ lua_settop(L, 1);
+ }
createcat(L, "alnum", isalnum);
createcat(L, "alpha", isalpha);
createcat(L, "cntrl", iscntrl);
@@ -2162,11 +2293,19 @@ static int locale_l (lua_State *L) {
}
+static int setmax (lua_State *L) {
+ luaL_optinteger(L, 1, -1);
+ lua_settop(L, 1);
+ lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
+ return 0;
+}
+
+
static int printpat_l (lua_State *L) {
Instruction *p = getpatt(L, 1, NULL);
int n, i;
lua_getfenv(L, 1);
- n = lua_objlen(L, -1);
+ n = ktablelen(L, -1);
printf("[");
for (i = 1; i <= n; i++) {
printf("%d = ", i);
@@ -2184,7 +2323,7 @@ static int printpat_l (lua_State *L) {
static int matchl (lua_State *L) {
- Capture capture[IMAXCAPTURES];
+ Capture capture[INITCAPSIZE];
const char *r;
size_t l;
Instruction *p = getpatt(L, 1, NULL);
@@ -2210,6 +2349,8 @@ static struct luaL_reg pattreg[] = {
{"match", matchl},
{"print", printpat_l},
{"locale", locale_l},
+ {"setmaxstack", setmax},
+ {"B", pattbehind},
{"C", capture_l},
{"Cf", fold_l},
{"Cc", capconst_l},
@@ -2224,7 +2365,6 @@ static struct luaL_reg pattreg[] = {
{"R", range_l},
{"S", set_l},
{"V", nter_l},
- {"span", span_l},
{"type", type_l},
{"version", version_l},
{NULL, NULL}
@@ -2245,9 +2385,11 @@ static struct luaL_reg metapattreg[] = {
int luaopen_lpeg (lua_State *L);
int luaopen_lpeg (lua_State *L) {
- lua_newtable(L);
- lua_replace(L, LUA_ENVIRONINDEX); /* empty env for new patterns */
+ lua_pushcfunction(L, (lua_CFunction)&l_newpf); /* new-pattern function */
+ lua_setfield(L, LUA_REGISTRYINDEX, KEYNEWPATT); /* register it */
luaL_newmetatable(L, PATTERN_T);
+ lua_pushnumber(L, MAXBACK);
+ lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
luaL_register(L, NULL, metapattreg);
luaL_register(L, "lpeg", pattreg);
lua_pushliteral(L, "__index");
View
38 lpeg.h
@@ -0,0 +1,38 @@
+/*
+** $Id: lpeg.h,v 1.1 2009/12/23 16:15:36 roberto Exp $
+** LPeg - PEG pattern matching for Lua
+** Copyright 2009, Lua.org & PUC-Rio (see 'lpeg.html' for license)
+** written by Roberto Ierusalimschy
+*/
+
+#ifndef lpeg_h
+#define lpeg_h
+
+#include "lua.h"
+
+
+#define KEYNEWPATT "lpeg.newpf"
+
+
+/*
+** type of extension functions that define new "patterns" for LPEG
+** It should return the new current position or NULL if match fails
+*/
+typedef const char *(*PattFunc) (const char *s, /* current position */
+ const char *e, /* string end */
+ const char *o, /* string start */
+ const void *ud); /* user data */
+
+/*
+** function to create new patterns based on 'PattFunc' functions.
+** This function is available at *registry[KEYNEWPATT]. (Notice
+** the extra indirection; the userdata at the registry points to
+** a variable that points to the function. In ANSI C a void* cannot
+** point to a function.)
+*/
+typedef void (*Newpf) (lua_State *L,
+ PattFunc f, /* pattern */
+ const void *ud, /* (user) data to be passed to 'f' */
+ size_t l); /* size of data to be passed to 'f' */
+
+#endif
View
219 lpeg.html
@@ -10,7 +10,7 @@
</head>
<body>
-<!-- $Id: lpeg.html,v 1.54 2008/10/10 19:07:32 roberto Exp $ -->
+<!-- $Id: lpeg.html,v 1.60 2010/11/03 17:42:07 roberto Exp $ -->
<div id="container">
@@ -22,7 +22,7 @@
</div>
<div id="product_name"><big><strong>LPeg</strong></big></div>
<div id="product_description">
- Parsing Expression Grammars For Lua, version 0.9
+ Parsing Expression Grammars For Lua, version 0.10
</div>
</div> <!-- id="product" -->
@@ -87,7 +87,7 @@
On the other hand,
first-class patterns allow much better documentation
(as it is easy to comment the code,
-to use auxiliary variables to break complex definitions, etc.)
+to break complex definitions in smaller parts, etc.)
and are extensible,
as we can define new functions to create and compose patterns.
</p>
@@ -101,8 +101,8 @@
<tbody><tr><td><b>Operator</b></td><td><b>Description</b></td></tr>
<tr><td><a href="#op-p"><code>lpeg.P(string)</code></a></td>
<td>Matches <code>string</code> literally</td></tr>
-<tr><td><a href="#op-p"><code>lpeg.P(number)</code></a></td>
- <td>Matches exactly <code>number</code> characters</td></tr>
+<tr><td><a href="#op-p"><code>lpeg.P(n)</code></a></td>
+ <td>Matches exactly <code>n</code> characters</td></tr>
<tr><td><a href="#op-s"><code>lpeg.S(string)</code></a></td>
<td>Matches any character in <code>string</code> (Set)</td></tr>
<tr><td><a href="#op-r"><code>lpeg.R("<em>xy</em>")</code></a></td>
@@ -122,6 +122,9 @@
<td>Equivalent to <code>("" - patt)</code></td></tr>
<tr><td><a href="#op-len"><code>#patt</code></a></td>
<td>Matches <code>patt</code> but consumes no input</td></tr>
+<tr><td><a href="#op-behind"><code>lpeg.B(patt, n)</code></a></td>
+ <td>Matches <code>patt</code> <code>n</code> characters behind
+ the current position, consuming no input</td></tr>
</tbody></table>
<p>As a very simple example,
@@ -139,8 +142,9 @@
LPeg also offers the <a href="re.html"><code>re</code> module</a>,
which implements patterns following a regular-expression style
(e.g., <code>[09]+</code>).
-(This module is 200 lines of Lua code,
-and of course uses LPeg to parse regular expressions.)
+(This module is 250 lines of Lua code,
+and of course uses LPeg to parse regular expressions and
+translate them to regular LPeg patterns.)
</p>
@@ -153,7 +157,7 @@
It attempts to match the given pattern against the subject string.
If the match succeeds,
returns the index in the subject of the first character after the match,
-or the values of <a href="#captures">captured values</a>
+or the <a href="#captures">captured values</a>
(if the pattern captured any value).
</p>
@@ -189,6 +193,17 @@
Returns a string with the running version of LPeg.
</p>
+<h3><a name="f-setstack"></a><code>lpeg.setmaxstack (max)</code></h3>
+<p>
+Sets the maximum size for the backtrack stack used by LPeg to
+track calls and choices.
+Most well-written patterns need little backtrack levels and
+therefore you seldom need to change this maximum;
+but a few useful patterns may need more space.
+Before changing this maximum you should try to rewrite your
+pattern to avoid the need for extra space.
+</p>
+
<h2><a name="basic">Basic Constructions</a></h2>
@@ -228,8 +243,9 @@
If the argument is a negative number <em>-n</em>,
the result is a pattern that
succeeds only if the input string does not have <em>n</em> characters:
-It is equivalent to the <a href="#op-unm">unary minus operation</a>
-applied over the pattern corresponding to the (non-negative) value <em>n</em>.
+<code>lpeg.P(-n)</code>
+is equivalent to <code>-lpeg.P(n)</code>
+(see the <a href="#op-unm">unary minus operation</a>).
</p></li>
<li><p>
@@ -254,6 +270,33 @@
</ul>
+<h3><a name="op-behind"></a><code>lpeg.B(patt [, n])</code></h3>
+<p>
+Returns a pattern that
+matches only if the input string matches <code>patt</code>
+starting <code>n</code> positions behind the current position.
+(The default value for <code>n</code> is 1.)
+If the current position is less than or equal to <code>n</code>,
+this pattern fails.
+</p>
+
+<p>
+Like the <a href="#op-len">and predicate</a>,
+this pattern never consumes any input,
+independently of success or failure,
+and it never produces any capture.
+</p>
+
+<p>
+The pattern <code>patt</code> cannot contain any open reference
+to grammar rules (see <a href="#grammar">grammars</a>).
+</p>
+
+<p>
+(This is an experimental feature.
+There is a good chance it will change in future versions.)
+</p>
+
<h3><a name="op-r"></a><code>lpeg.R ({range})</code></h3>
<p>
@@ -337,13 +380,14 @@
matches only if the input string matches <code>patt</code>,
but without consuming any input,
independently of success or failure.
-(This pattern is equivalent to
+(This pattern is called an <em>and predicate</em>
+and it is equivalent to
<em>&amp;patt</em> in the original PEG notation.)
</p>
+
<p>
-When it succeeds,
-<code>#patt</code> produces all captures produced by <code>patt</code>.
+This pattern never produces any capture.
</p>.
@@ -403,6 +447,14 @@
</p>
<p>
+When succeeded,
+this pattern produces all captures from <code>patt1</code>.
+It never produces any capture from <code>patt2</code>
+(as either <code>patt2</code> fails or
+<code>patt1 - patt2</code> fails).
+</p>
+
+<p>
If both <code>patt1</code> and <code>patt2</code> are
character sets,
this operation is equivalent to set difference.
@@ -443,7 +495,7 @@
<p>
Otherwise, when <code>n</code> is negative,
this pattern is equivalent to <em>(patt?)<sup>-n</sup></em>.
-That is, it matches at most <code>-n</code>
+That is, it matches at most <code>|n|</code>
occurrences of <code>patt</code>.
</p>
@@ -520,6 +572,14 @@
B = "b" * lpeg.V"S" + "a" * lpeg.V"B" * lpeg.V"B",
} * -1
</pre>
+<p>
+It is equivalent to the following grammar in standard PEG notation:
+</p>
+<pre class="example">
+ S <- 'a' B / 'b' A / ''
+ A <- 'a' S / 'b' A A
+ B <- 'b' S / 'a' B B
+</pre>
<h2><a name="captures">Captures</a></h2>
@@ -539,7 +599,8 @@
<table border="1">
<tbody><tr><td><b>Operation</b></td><td><b>What it Produces</b></td></tr>
<tr><td><a href="#cap-c"><code>lpeg.C(patt)</code></a></td>
- <td>the match for <code>patt</code></td></tr>
+ <td>the match for <code>patt</code> plus all captures
+ made by <code>patt</code></td></tr>
<tr><td><a href="#cap-arg"><code>lpeg.Carg(n)</code></a></td>
<td>the value of the n<sup>th</sup> extra argument to
<code>lpeg.match</code> (matches the empty string)</td></tr>
@@ -551,7 +612,7 @@
<td>the given values (matches the empty string)</td></tr>
<tr><td><a href="#cap-f"><code>lpeg.Cf(patt, func)</code></a></td>
<td>a <em>folding</em> of the captures from <code>patt</code></td></tr>
-<tr><td><a href="#cap-g"><code>lpeg.Cg(patt, [name])</code></a></td>
+<tr><td><a href="#cap-g"><code>lpeg.Cg(patt [, name])</code></a></td>
<td>the values produced by <code>patt</code>,
optionally tagged with <code>name</code></td></tr>
<tr><td><a href="#cap-p"><code>lpeg.Cp()</code></a></td>
@@ -592,7 +653,7 @@
<p>
Usually,
LPeg evaluates all captures only after (and if) the entire match succeeds.
-At <em>match time</em> it only gathers enough information
+During the <em>match time</em> it only gathers enough information
to produce the capture values later.
As a particularly important consequence,
most captures cannot affect the way a pattern matches a subject.
@@ -654,7 +715,7 @@
<h3><a name="cap-f"></a><code>lpeg.Cf (patt, func)</code></h3>
<p>
-Creates an <em>fold capture</em>.
+Creates a <em>fold capture</em>.
If <code>patt</code> produces a list of captures
<em>C<sub>1</sub> C<sub>2</sub> ... C<sub>n</sub></em>,
this capture will produce the value
@@ -808,7 +869,7 @@
</p>
<p>
-The function gets as arguments the entire subject,
+The given function gets as arguments the entire subject,
the current position (after the match of <code>patt</code>),
plus any capture values produced by <code>patt</code>.
</p>
@@ -821,6 +882,9 @@
and the returned number becomes the new current position.
(Assuming a subject <em>s</em> and current position <em>i</em>,
the returned number must be in the range <em>[i, len(s) + 1]</em>.)
+If the call returns <b>true</b>,
+the match succeeds without consuming any input.
+(So, to return <b>true</b> is equivalent to return <em>i</em>.)
If the call returns <b>false</b>, <b>nil</b>, or no value,
the match fails.
</p>
@@ -835,9 +899,66 @@
<h2><a name="ex">Some Examples</a></h2>
+<h3>Using a Pattern</h3>
+<p>
+This example shows a very simple but complete program
+that builds and uses a pattern:
+</p>
+<pre class="example">
+local lpeg = require "lpeg"
+
+-- matches a word followed by end-of-string
+p = lpeg.R"az"^1 * -1
+
+print(p:match("hello")) --> 6
+print(lpeg.match(p, "hello")) --> 6
+print(p:match("1 hello")) --> nil
+</pre>
+<p>
+The pattern is simply a sequence of one or more lower-case letters
+followed by the end of string (-1).
+The program calls <code>match</code> both as a method
+and as a function.
+In both sucessful cases,
+the match returns
+the index of the first character after the match,
+which is the string length plus one.
+</p>
+
+
+<h3>Name-value lists</h3>
+<p>
+This example parses a list of name-value pairs and returns a table
+with those pairs:
+</p>
+<pre class="example">
+lpeg.locale(lpeg) -- adds locale entries into 'lpeg' table
+
+local space = lpeg.space^0
+local name = lpeg.C(lpeg.alpha^1) * space
+local sep = lpeg.S(",;") * space
+local pair = lpeg.Cg(name * "=" * space * name) * sep^-1
+local list = lpeg.Cf(lpeg.Ct("") * pair^0, rawset)
+t = list:match("a=b, c = hi; next = pi") --> { a = "b", c = "hi", next = "pi" }
+</pre>
+<p>
+Each pair has the format <code>name = name</code> followed by
+an optional separator (a comma or a semicolon).
+The <code>pair</code> pattern encloses the pair in a group pattern,
+so that the names become the values of a single capture.
+The <code>list</code> pattern then folds these captures.
+It starts with an empty table,
+created by a table capture matching an empty string;
+then for each capture (a pair of names) it applies <code>rawset</code>
+over the accumulator (the table) and the capture values (the pair of names).
+<code>rawset</code> returns the table itself,
+so the accumulator is always the table.
+</p>
+
<h3>Splitting a string</h3>
<p>
-The following code splits a string using a given pattern
+The following code builds a pattern that
+splits a string using a given pattern
<code>sep</code> as a separator:
</p>
<pre class="example">
@@ -982,36 +1103,6 @@
</p>
-<h3>Name-value lists</h3>
-<p>
-This example parses a list of name-value pairs and returns a table
-with those pairs:
-</p>
-<pre class="example">
-lpeg.locale(lpeg)
-
-local space = lpeg.space^0
-local name = lpeg.C(lpeg.alpha^1) * space
-local sep = lpeg.S(",;") * space
-local pair = lpeg.Cg(name * "=" * space * name) * sep^-1
-local list = lpeg.Cf(lpeg.Ct("") * pair^0, rawset)
-t = list:match("a=b, c = hi; next = pi") --> { a = "b", c = "hi", next = "pi" }
-</pre>
-<p>
-Each pair has the format <code>name = name</code> followed by
-an optional separator (a comma or a semicolon).
-The <code>pair</code> pattern encloses the pair in a group pattern,
-so that the names become the values of a single capture.
-The <code>list</code> pattern then folds these captures.
-It starts with an empty table,
-created by a table capture matching an empty string;
-then for each capture (a pair of names) it applies <code>rawset</code>
-over the accumulator (the table) and the capture values (the pair of names).
-<code>rawset</code> returns the table itself,
-so the accumulator is always the table.
-</p>
-
-
<h3><a name="CSV"></a>Comma-Separated Values (CSV)</h3>
<p>
This example breaks a string into comma-separated values,
@@ -1037,6 +1128,17 @@
ending with a newline or the string end (-1).
</p>
+<p>
+As it is,
+the previous pattern returns each field as a separated result.
+If we add a table capture in the definition of <code>record</code>,
+the pattern will return instead a single table
+containing all fields:
+</p>
+<pre>
+local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1)
+</pre>
+
<h3>UTF-8 and Latin 1</h3>
<p>
@@ -1144,15 +1246,16 @@
open = "[" * lpeg.Cg(lpeg.P"="^0, "init") * "[" * lpeg.P"\n"^-1
close = "]" * lpeg.C(lpeg.P"="^0) * "]"
closeeq = lpeg.Cmt(close * lpeg.Cb("init"), function (s, i, a, b) return a == b end)
-string = open * m.C((lpeg.P(1) - closeeq)^0) * close /
- function (o, s) return s end
+string = open * lpeg.C((lpeg.P(1) - closeeq)^0) * close /
+ function (s, o) return s end
</pre>
<p>
The <code>open</code> pattern matches <code>[=*[</code>,
capturing the repetitions of equal signs in a group named <code>init</code>;
it also discharges an optional newline, if present.
-The <code>close</code> pattern matches <code>]=*]</code>.
+The <code>close</code> pattern matches <code>]=*]</code>,
+also capturing the repetitions of equal signs.
The <code>closeeq</code> pattern first matches <code>close</code>;
then it uses a back capture to recover the capture made
by the previous <code>open</code>,
@@ -1162,10 +1265,8 @@
The <code>string</code> pattern starts with an <code>open</code>,
then it goes as far as possible until matching <code>closeeq</code>,
and then matches the final <code>close</code>.
-The final function capture simply consumes
-the captures made by <code>open</code> and <code>close</code>
-and returns only the middle capture,
-which is the string contents.
+The final function capture simply discards
+the capture made by <code>close</code>.
</p>
@@ -1267,7 +1368,7 @@
<h2><a name="download"></a>Download</h2>
<p>LPeg
-<a href="http://www.inf.puc-rio.br/~roberto/lpeg/lpeg-0.9.tar.gz">source code</a>.</p>
+<a href="http://www.inf.puc-rio.br/~roberto/lpeg/lpeg-0.10.tar.gz">source code</a>.</p>
<h2><a name="license">License</a></h2>
@@ -1311,7 +1412,7 @@
<div id="about">
<p><small>
-$Id: lpeg.html,v 1.54 2008/10/10 19:07:32 roberto Exp $
+$Id: lpeg.html,v 1.60 2010/11/03 17:42:07 roberto Exp $
</small></p>
</div> <!-- id="about" -->
View
15 makefile
@@ -1,3 +1,4 @@
+LIBNAME = lpeg
LUADIR = /usr/include/lua5.1/
COPT = -O2 -DNDEBUG
@@ -21,13 +22,21 @@ CWARNS = -Wall -Wextra -pedantic \
CFLAGS = $(CWARNS) $(COPT) -ansi -I$(LUADIR)
-DLLFLAGS = -shared
CC = gcc
+# For Linux
+DLLFLAGS = -shared -fpic
+ENV =
+
+# For Mac OS
+# ENV = MACOSX_DEPLOYMENT_TARGET=10.4
+# DLLFLAGS = -bundle -undefined dynamic_lookup
+
lpeg.so: lpeg.o
- $(CC) $(DLLFLAGS) lpeg.o -o lpeg.so
+ env $(ENV) $(CC) $(DLLFLAGS) lpeg.o -o lpeg.so
-lpeg.o: makefile lpeg.c
+lpeg.o: makefile lpeg.c lpeg.h
test: test.lua re.lua lpeg.so
test.lua
+
View
113 re.html
@@ -10,7 +10,7 @@
</head>
<body>
-<!-- $Id: re.html,v 1.11 2008/10/10 18:14:06 roberto Exp $ -->
+<!-- $Id: re.html,v 1.14 2010/11/03 17:07:19 roberto Exp $ -->
<div id="container">
@@ -46,7 +46,7 @@
<h2><a name="basic"></a>The <code>re</code> Module</h2>
<p>
-The <code>re</code> Module
+The <code>re</code> module
(provided by file <code>re.lua</code> in the distribution)
supports a somewhat conventional regex syntax
for pattern usage within <a href="lpeg.html">LPeg</a>.
@@ -57,7 +57,7 @@
A <code>p</code> represents an arbitrary pattern;
<code>num</code> represents a number (<code>[0-9]+</code>);
<code>name</code> represents an identifier
-(<code>[a-zA-Z][a-zA-Z0-9]*</code>).
+(<code>[a-zA-Z][a-zA-Z0-9_]*</code>).
Constructions are listed in order of decreasing precedence.
<table border="1">
<tbody><tr><td><b>Syntax</b></td><td><b>Description</b></td></tr>
@@ -68,6 +68,7 @@
<tr><td><code>.</code></td> <td>any character</td></tr>
<tr><td><code>%name</code></td>
<td>pattern <code>defs[name]</code> or a pre-defined pattern</td></tr>
+<tr><td><code>name</code></td><td>non terminal</td></tr>
<tr><td><code>&lt;name&gt;</code></td><td>non terminal</td></tr>
<tr><td><code>{}</code></td> <td>position capture</td></tr>
<tr><td><code>{ p }</code></td> <td>simple capture</td></tr>
@@ -166,15 +167,34 @@
<h2><a name="ex">Some Examples</a></h2>
+<h3>A complete simple program</h3>
+<p>
+The next code shows a simple complete Lua program using
+the <code>re</code> module:
+</p>
+<pre class="example">
+local re = require"re"
+
+-- find the position of the first number in a string
+print(re.find("the number 423 is odd", "[0-9]+")) --&gt; 12
+
+-- similar, but also captures (and returns) the number
+print(re.find("the number 423 is odd", "{[0-9]+}")) --&gt; 12 423
+
+-- returns all words in a string
+print(re.match("the number 423 is odd", "({%a+} / .)*"))
+--&gt; the number is odd
+</pre>
+
+
<h3>Balanced parentheses</h3>
<p>
-As a simple example,
-the following call will produce the same pattern produced by the
+The following call will produce the same pattern produced by the
Lua expression in the
<a href="lpeg.html#balanced">balanced parentheses</a> example:
</p>
<pre class="example">
-b = re.compile[[ balanced &lt;- "(" ([^()] / &lt;balanced&gt;)* ")" ]]
+b = re.compile[[ balanced &lt;- "(" ([^()] / balanced)* ")" ]]
</pre>
<h3>String reversal</h3>
@@ -182,7 +202,7 @@
The next example reverses a string:
</p>
<pre class="example">
-rev = re.compile[[ R &lt;- (!.) -&gt; '' / ({.} &lt;R&gt;) -&gt; '%2%1']]
+rev = re.compile[[ R &lt;- (!.) -&gt; '' / ({.} R) -&gt; '%2%1']]
print(rev:match"0123456789") --&gt; 9876543210
</pre>
@@ -192,8 +212,8 @@
</p>
<pre class="example">
record = re.compile[[
- record &lt;- ( &lt;field&gt; (',' &lt;field&gt;)* ) -&gt; {} (%nl / !.)
- field &lt;- &lt;escaped&gt; / &lt;nonescaped&gt;
+ record &lt;- ( field (',' field)* ) -&gt; {} (%nl / !.)
+ field &lt;- escaped / nonescaped
nonescaped &lt;- { [^,"%nl]* }
escaped &lt;- '"' {~ ([^"] / '""' -&gt; '"')* ~} '"'
]]
@@ -201,12 +221,12 @@
<h3>Lua's long strings</h3>
<p>
-The next example mathes Lua long strings:
+The next example matches Lua long strings:
</p>
<pre class="example">
c = re.compile([[
- longstring &lt;- ('[' {:eq: '='* :} '[' &lt;close&gt;) =&gt; void
- close &lt;- ']' =eq ']' / . &lt;close&gt;
+ longstring &lt;- ('[' {:eq: '='* :} '[' close) =&gt; void
+ close &lt;- ']' =eq ']' / . close
]], {void = function () return true end})
print(c:match'[==[]]===]]]]==]===[]') --&gt; 17
@@ -219,8 +239,8 @@
</p>
<pre class="example">
p = re.compile[[
- block &lt;- ({:ident:' '*:} &lt;line&gt;
- ((=ident !' ' &lt;line&gt;) / &(=ident ' ') &lt;block&gt;)*) -&gt; {}
+ block &lt;- ({:ident:' '*:} line
+ ((=ident !' ' line) / &(=ident ' ') block)*) -&gt; {}
line &lt;- {[^%nl]*} %nl
]]
</pre>
@@ -259,14 +279,14 @@
</p>
<pre class="example">
p = re.compile[[
- text &lt;- {~ &lt;item&gt;* ~}
- item &lt;- &lt;macro&gt; / [^()] / '(' &lt;item&gt;* ')'
- arg &lt;- ' '* {~ (!',' &lt;item&gt;)* ~}
- args &lt;- '(' &lt;arg&gt; (',' &lt;arg&gt;)* ')'
+ text &lt;- {~ item* ~}
+ item &lt;- macro / [^()] / '(' item* ')'
+ arg &lt;- ' '* {~ (!',' item)* ~}
+ args &lt;- '(' arg (',' arg)* ')'
-- now we define some macros
- macro &lt;- ('apply' &lt;args&gt;) -&gt; '%1(%2)'
- / ('add' &lt;args&gt;) -&gt; '%1 + %2'
- / ('mul' &lt;args&gt;) -&gt; '%1 * %2'
+ macro &lt;- ('apply' args) -&gt; '%1(%2)'
+ / ('add' args) -&gt; '%1 + %2'
+ / ('mul' args) -&gt; '%1 * %2'
]]
print(p:match"add(mul(a,b), apply(f,x))") --&gt; a * b + f(x)
@@ -290,13 +310,60 @@
with each <code>%</code><em>n</em> replaced by the <em>n</em>-th argument.
</p>
+<h3>Patterns</h3>
+<p>
+This example shows the complete syntax
+of patterns accepted by <code>re</code>.
+</p>
+<pre class="example">
+p = [=[
+
+pattern &lt;- exp !.
+exp &lt;- S (alternative / grammar)
+
+alternative &lt;- seq ('/' S seq)*
+seq &lt;- prefix*
+prefix &lt;- '&amp;' S prefix / '!' S prefix / suffix
+suffix &lt;- primary S (([+*?]
+ / '^' [+-]? num
+ / '-&gt;' S (string / '{}' / name)
+ / '=&gt;' S name) S)*
+
+primary &lt;- '(' exp ')' / string / class / defined
+ / '{:' (name ':')? exp ':}'
+ / '=' name
+ / '{}'
+ / '{~' exp '~}'
+ / '{' exp '}'
+ / '.'
+ / name S !arrow
+ / '&lt;' name '&gt;' -- old-style non terminals
+
+grammar &lt;- definition+
+definition &lt;- name S arrow exp
+
+class &lt;- '[' '^'? item (!']' item)* ']'
+item &lt;- defined / range / .
+range &lt;- . '-' [^]]
+
+S &lt;- (%s / '--' [^%nl]*)* -- spaces and comments
+name &lt;- [A-Za-z][A-Za-z0-9_]*
+arrow &lt;- '&lt;-'
+num &lt;- [0-9]+
+string &lt;- '"' [^"]* '"' / "'" [^']* "'"
+defined &lt;- '%' name
+
+]=]
+
+print(re.match(p, p)) -- a self description must match itself
+</pre>
<h2><a name="license">License</a></h2>
<p>
-Copyright &copy; 2008 Lua.org, PUC-Rio.
+Copyright &copy; 2008-2010 Lua.org, PUC-Rio.
</p>
<p>
Permission is hereby granted, free of charge,
@@ -334,7 +401,7 @@
<div id="about">
<p><small>
-$Id: re.html,v 1.11 2008/10/10 18:14:06 roberto Exp $
+$Id: re.html,v 1.14 2010/11/03 17:07:19 roberto Exp $
</small></p>
</div> <!-- id="about" -->
View
174 re.lua
@@ -1,16 +1,71 @@
--- $Id: re.lua,v 1.32 2008/10/09 20:25:06 roberto Exp $
+-- $Id: re.lua,v 1.38 2010/11/03 17:21:07 roberto Exp $
-local m = require"lpeg"
-local _G = _G
+-- imported functions and modules
local tonumber, type, print, error = tonumber, type, print, error
-local mt = getmetatable(m.P(0))
+local setmetatable = setmetatable
+local m = require"lpeg"
+
+-- 'm' will be used to parse expressions, and 'mm' will be used to
+-- create expressions; that is, 're' runs on 'm', creating patterns
+-- on 'mm'
+local mm = m
+
+-- pattern's metatable
+local mt = getmetatable(mm.P(0))
+
+
+
+-- No more global accesses after this point
+local version = _VERSION
+if version == "Lua 5.2" then _ENV = nil end
-module "re"
local any = m.P(1)
+
-- Pre-defined names
-Predef = { nl = m.P"\n" }
+local Predef = { nl = m.P"\n" }
+
+
+local mem
+local fmem
+local gmem
+
+
+local function updatelocale ()
+ mm.locale(Predef)
+ Predef.a = Predef.alpha
+ Predef.c = Predef.cntrl
+ Predef.d = Predef.digit
+ Predef.g = Predef.graph
+ Predef.l = Predef.lower
+ Predef.p = Predef.punct
+ Predef.s = Predef.space
+ Predef.u = Predef.upper
+ Predef.w = Predef.alnum
+ Predef.x = Predef.xdigit
+ Predef.A = any - Predef.a
+ Predef.C = any - Predef.c
+ Predef.D = any - Predef.d
+ Predef.G = any - Predef.g
+ Predef.L = any - Predef.l
+ Predef.P = any - Predef.p
+ Predef.S = any - Predef.s
+ Predef.U = any - Predef.u
+ Predef.W = any - Predef.w
+ Predef.X = any - Predef.x
+ mem = {} -- restart memoization
+ fmem = {}
+ gmem = {}
+ local mt = {__mode = "v"}
+ setmetatable(mem, mt)
+ setmetatable(fmem, mt)
+ setmetatable(gmem, mt)
+end
+
+
+updatelocale()
+
local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
@@ -31,7 +86,7 @@ local function patt_error (s, i)
end
local function mult (p, n)
- local np = m.P(true)
+ local np = mm.P(true)
while n >= 1 do
if n%2 >= 1 then np = np * p end
p = p * p
@@ -47,11 +102,13 @@ local function equalcap (s, i, c)
end
-local S = (m.S(" \t\n") + "--" * (any - m.S"\n")^0)^0
+local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
-local name = m.R("AZ", "az") * m.R("AZ", "az", "09")^0
+local name = m.R("AZ", "az") * m.R("AZ", "az", "__", "09")^0
-local exp_follow = m.P"/" + ")" + "}" + ":}" + "~}" + name + -1
+local arrow = S * "<-"
+
+local exp_follow = m.P"/" + ")" + "}" + ":}" + "~}" + (name * arrow) + -1
name = m.C(name)
@@ -65,15 +122,15 @@ local String = "'" * m.C((any - "'")^0) * "'" +
'"' * m.C((any - '"')^0) * '"'
-local Cat = "%" * Identifier / function (c,Defs)
+local defined = "%" * Identifier / function (c,Defs)
local cat = Defs and Defs[c] or Predef[c]
if not cat then error ("name '" .. c .. "' undefined") end
return cat
end
-local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / m.R
+local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
-local item = Cat + Range + m.C(any)
+local item = defined + Range + m.C(any)
local Class =
"["
@@ -118,39 +175,34 @@ local exp = m.P{ "Exp",
) * S
)^0, function (a,b,f) return f(a,b) end );
Primary = "(" * m.V"Exp" * ")"
- + String / m.P
+ + String / mm.P
+ Class
- + Cat
+ + defined
+ "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
- function (n, p) return m.Cg(p, n) end
- + "=" * name / function (n) return m.Cmt(m.Cb(n), equalcap) end
- + m.P"{}" / m.Cp
- + "{~" * m.V"Exp" * "~}" / m.Cs
- + "{" * m.V"Exp" * "}" / m.C
+ function (n, p) return mm.Cg(p, n) end
+ + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
+ + m.P"{}" / mm.Cp
+ + "{~" * m.V"Exp" * "~}" / mm.Cs
+ + "{" * m.V"Exp" * "}" / mm.C
+ m.P"." * m.Cc(any)
- + "<" * name * ">" / m.V;
- Definition = Identifier * S * '<-' * m.V"Exp";
+ + name * -arrow / mm.V
+ + "<" * name * ">" / mm.V;
+ Definition = Identifier * arrow * m.V"Exp";
Grammar = m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0, adddef) /
- m.P
+ mm.P
}
-local pattern = S * exp / m.P * (-any + patt_error)
+local pattern = S * exp / mm.P * (-any + patt_error)
-function compile (p, defs)
- if m.type(p) == "pattern" then return p end -- already compiled
+local function compile (p, defs)
+ if mm.type(p) == "pattern" then return p end -- already compiled
local cp = pattern:match(p, 1, defs)
if not cp then error("incorrect pattern", 3) end
return cp
end
-
-local mem
-local fmem
-local gmem
-local mt = {__mode = "v"}
-
-function match (s, p, i)
+local function match (s, p, i)
local cp = mem[p]
if not cp then
cp = compile(p)
@@ -159,58 +211,38 @@ function match (s, p, i)
return cp:match(s, i or 1)
end
-function find (s, p, i)
+local function find (s, p, i)
local cp = fmem[p]
if not cp then
cp = compile(p)
- cp = m.P{ m.Cp() * cp + 1 * m.V(1) }
+ cp = mm.P{ mm.Cp() * cp + 1 * mm.V(1) }
fmem[p] = cp
end
return cp:match(s, i or 1)
end
-function gsub (s, p, rep)
- gmem[p] = gmem[p] or {}
- local cp = gmem[p][rep]
+local function gsub (s, p, rep)
+ local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
+ gmem[p] = g
+ local cp = g[rep]
if not cp then
cp = compile(p)
- cp = m.Cs((cp / rep + 1)^0)
- gmem[p][rep] = cp
+ cp = mm.Cs((cp / rep + 1)^0)
+ g[rep] = cp
end
return cp:match(s)
end
-function updatelocale ()
- m.locale(Predef)
- Predef.a = Predef.alpha
- Predef.c = Predef.cntrl
- Predef.d = Predef.digit
- Predef.g = Predef.graph
- Predef.l = Predef.lower
- Predef.p = Predef.punct
- Predef.s = Predef.space
- Predef.u = Predef.upper
- Predef.w = Predef.alnum
- Predef.x = Predef.xdigit
- Predef.A = any - Predef.a
- Predef.C = any - Predef.c
- Predef.D = any - Predef.d
- Predef.G = any - Predef.g
- Predef.L = any - Predef.l
- Predef.P = any - Predef.p
- Predef.S = any - Predef.s
- Predef.U = any - Predef.u
- Predef.W = any - Predef.w
- Predef.X = any - Predef.x
- mem = {} -- restart memoization
- fmem = {}
- gmem = {}
- _G.setmetatable(mem, mt)
- _G.setmetatable(fmem, mt)
- _G.setmetatable(gmem, mt)
-end
-
+-- exported names
+local re = {
+ compile = compile,
+ match = match,
+ find = find,
+ gsub = gsub,
+ updatelocale = updatelocale,
+}
-updatelocale()
+if version == "Lua 5.1" then _G.re = re end
+return re
View
181 test.lua
@@ -1,9 +1,21 @@
#!/usr/bin/env lua5.1
--- $Id: test.lua,v 1.70 2008/10/09 20:16:45 roberto Exp $
+-- $Id: test.lua,v 1.81 2010/11/03 17:07:50 roberto Exp $
+
+require"strict" -- just to be pedantic
local m = require"lpeg"
+local debug = require"debug"
+
+
+-- compatibility with Lua 5.2
+local unpack = table.unpack or unpack
+
+
+-- most tests here do not need much stack space
+m.setmaxstack(5)
+
any = m.P(1)
space = m.S" \t\n"^0
@@ -62,6 +74,8 @@ assert(m.match("a" * #m.P(true), "a") == 2)
-- tests for locale
do
assert(m.locale(m) == m)
+ local t = {}
+ assert(m.locale(t, m) == t)
local x = m.locale()
for n,v in pairs(x) do
assert(type(n) == "string")
@@ -301,11 +315,14 @@ local function checkerr (msg, ...)
assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...))))
end
-checkerr("rule '1' is left recursive", m.match, { m.V(1) * 'a' }, "a")
-checkerr("stack overflow", m.match, m.C('a')^0, string.rep("a", 50000))
+-- checkerr("rule '1' is left recursive", m.match, { m.V(1) * 'a' }, "a")
checkerr("rule '1' outside a grammar", m.match, m.V(1), "")
checkerr("rule 'hiii' outside a grammar", m.match, m.V('hiii'), "")
checkerr("rule 'hiii' is not defined", m.match, { m.V('hiii') }, "")
+checkerr("rule <a table> is not defined", m.match, { m.V{} }, "")
+
+checkerr("rule 'A' is not a pattern", m.P, { A = {} })
+checkerr("rule <a function> is not a pattern", m.P, { [print] = {} })
-- test for non-pattern as arguments to pattern functions
@@ -323,7 +340,6 @@ local function checkerr (msg, ...)
end
checkerr("rule '1' is left recursive", m.match, { m.V(1) * 'a' }, "a")
-checkerr("stack overflow", m.match, m.C('a')^0, string.rep("a", 50000))
checkerr("rule '1' outside a grammar", m.match, m.V(1), "")
checkerr("rule 'hiii' outside a grammar", m.match, m.V('hiii'), "")
checkerr("rule 'hiii' is not defined", m.match, { m.V('hiii') }, "")
@@ -403,6 +419,39 @@ assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
+-- look-behind predicate
+assert(not m.match(m.B'a', 'a'))
+assert(m.match(1 * m.B'a', 'a') == 2)
+assert(not m.match(m.B(1), 'a'))
+assert(m.match(1 * m.B(1), 'a') == 2)
+assert(m.match(-m.B(1), 'a') == 1)
+
+B = #letter * -m.B(letter) + -letter * m.B(letter)
+x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
+checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10})
+checkeq(m.match(x, ' ar cal '), {2,4,5,8})
+checkeq(m.match(x, ' '), {})
+checkeq(m.match(x, 'aloalo'), {1,7})
+
+assert(m.match(B, "a") == 1)
+assert(m.match(1 * B, "a") == 2)
+assert(not m.B(-letter):match(""))
+assert((-m.B(letter)):match("") == 1)
+
+assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5)
+assert(not (4 * m.B(letter, 5)):match("aaaaaaaa"))
+assert((4 * -m.B(letter, 5)):match("aaaaaaaa") == 5)
+
+assert((3 * m.B(m.C(1))):match("12345") == 4)
+
+
+-- bug in 0.9
+assert(m.match(('a' * #m.P'b'), "ab") == 2)
+assert(not m.match(('a' * #m.P'b'), "a"))
+
+assert(not m.match(#m.S'567', ""))
+assert(m.match(#m.S'567' * 1, "6") == 2)
+
-- tests for Tail Calls
@@ -432,6 +481,14 @@ assert(p:match(string.rep("011", 10000) .. "$"))
assert(not p:match(string.rep("011", 10001) .. "$"))
+-- this grammar does need backtracking info.
+local lim = 10000
+p = m.P{ '0' * m.V(1) + '0' }
+assert(not pcall(m.match, p, string.rep("0", lim)))
+m.setmaxstack(2*lim)
+assert(not pcall(m.match, p, string.rep("0", lim)))
+m.setmaxstack(2*lim + 2)
+assert(pcall(m.match, p, string.rep("0", lim)))
-- tests for optional start position
assert(m.match("a", "abc", 1))
@@ -449,13 +506,6 @@ assert(not m.match(1, "", 1))
assert(not m.match(1, "", -1))
assert(not m.match(1, "", 0))
-
--- basic tests for external C function
-
-assert(m.match(m.span("abcd"), "abbbacebb") == 7)
-assert(m.match(m.span("abcd"), "0abbbacebb") == 1)
-assert(m.match(m.span("abcd"), "") == 1)
-
print("+")
@@ -697,13 +747,13 @@ badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'")
badgrammar({[1] = -1 * m.V(1)}, "rule '1'")
badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'")
badgrammar({[1] = m.P(0), [2] = 1 * m.V(1)^0}, "loop in rule '2'")
-badgrammar({ lpeg.V(2), lpeg.V(3)^0, lpeg.P"" }, "rule '2'")
-badgrammar({ lpeg.V(2) * lpeg.V(3)^0, lpeg.V(3)^0, lpeg.P"" }, "rule '1'")
-badgrammar({ #(lpeg.V(1) * 'a') }, "rule '1'")
-badgrammar({ -(lpeg.V(1) * 'a') }, "rule '1'")
+badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'")
+badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'")
+badgrammar({ #(m.V(1) * 'a') }, "rule '1'")
+badgrammar({ -(m.V(1) * 'a') }, "rule '1'")
-assert(m.match({'a' * -lpeg.V(1)}, "aaa") == 2)
-assert(m.match({'a' * -lpeg.V(1)}, "aaaa") == nil)
+assert(m.match({'a' * -m.V(1)}, "aaa") == 2)
+assert(m.match({'a' * -m.V(1)}, "aaaa") == nil)
-- simple tests for maximum sizes:
@@ -783,7 +833,6 @@ checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}});
x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))}
assert(#x == 500)
-assert(not pcall(m.match, m.Cmt(1, id)^0, string.rep('a', 50000)))
local function id(s, i, x)
if x == 'a' then return i + 1, 1, 3, 7
@@ -833,13 +882,15 @@ assert(not c:match'[[]=]====]=]=]==]===[]')
-- Tests for 're' module
-------------------------------------------------------------------
-require "re"
+local re = require "re"
local match, compile = re.match, re.compile
+
+
assert(match("a", ".") == 2)
assert(match("a", "''") == 1)
-assert(match("", "!.") == 1)
+assert(match("", " ! . ") == 1)
assert(not match("a", " ! . "))
assert(match("abcde", " ( . . ) * ") == 5)
assert(match("abbcde", " [a-c] +") == 5)
@@ -850,42 +901,42 @@ assert(match("abbc--", " [ac-] +") == 2)
assert(match("abbc--", " [-acb] + ") == 7)
assert(not match("abbcde", " [b-z] + "))
assert(match("abb\"de", '"abb"["]"de"') == 7)
-assert(match("abceeef", "'ac'? 'ab'* 'c' {'e'*} / 'abceeef' ") == "eee")
+assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee")
assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8)
-local t = {match("abceefe", "((&'e' {})? .)*")}
+local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")}
checkeq(t, {4, 5, 7})
local t = {match("abceefe", "((&&'e' {})? .)*")}
checkeq(t, {4, 5, 7})
local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")}
checkeq(t, {4, 5, 7})
-local t = {match("abceefe", "((&!&!'e' {})? .)*")}
+local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")}
checkeq(t, {4, 5, 7})
assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5)
assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+")