From ab20ae646002c81c1dafd1dff54b43a2f252f78b Mon Sep 17 00:00:00 2001 From: Stefan O'Rear Date: Mon, 30 Aug 2010 02:36:52 -0700 Subject: [PATCH] First version of the new regex execution engine --- lib/Cursor.cs | 123 ++++++++++++++++++++++++++++++++++++++++++++------ lib/Kernel.cs | 4 +- 2 files changed, 113 insertions(+), 14 deletions(-) diff --git a/lib/Cursor.cs b/lib/Cursor.cs index 399425ef..172f7f33 100644 --- a/lib/Cursor.cs +++ b/lib/Cursor.cs @@ -31,23 +31,120 @@ public class Matched { // we keep the cursor in exploded form to avoid creating lots and lots of // cursor objects public sealed class RxFrame { + public sealed class PSN { + public X obj; + public readonly PSN next; + public PSN(X obj, PSN next) { this.obj = obj; this.next = next; } + } + + public struct State { + public PSN reps; + public PSN captures; + public PSN klasses; + + public int pos; + public int ip; + } + + public PSN bt; + // our backing string, in a cheap to index form public char[] orig; // cache of orig.Length public int end; - // the current match position; restored on backtrack - public int pos; - // uninterpreted value which is restored on backtrack - public int xtra; - // cursor class to use if cursor needs to be deexploded - public DynMetaObject cklass; - // used for efficiently restoring classes, since :lang is always scoped - public DynMetaObject[] saved_cklasses; - // stack of backtrack states, each 3 ints long - // each record looks like: ip pos xtra - public int[] bstack; - // auxilliary vector for bstack entries that need to refer to objects - // not automatically managed + + public RxFrame(DynObject csr) { + Cursor c = (Cursor) Kernel.UnboxDO(csr); + orig = c.backing.ToCharArray(); + end = orig.Length; + bt = new PSN(default(State), null); + bt.obj.klasses = new PSN(csr.klass, null); + bt.obj.pos = c.pos; + } + + public Frame Backtrack(Frame th) { + bt = bt.next; + if (bt == null) { + return th.caller; + } else { + th.ip = bt.obj.ip; + return th; + } + } + + public void PushMark(int ip) { + bt.obj.ip = ip; + bt = new PSN(bt.obj, bt); + } + + public Frame Exact(Frame th, string st) { + if (bt.obj.pos + st.Length > end) + return Backtrack(th); + foreach (char ch in st) + if (orig[bt.obj.pos++] != ch) + return Backtrack(th); + return th; + } + + public Frame ExactOne(Frame th, char ch) { + if (bt.obj.pos == end || orig[bt.obj.pos++] != ch) + return Backtrack(th); + return th; + } + + public void OpenQuant() { + bt.obj.reps = new PSN(0, bt.obj.reps); + } + + public int CloseQuant() { + int x = bt.obj.reps.obj; + bt.obj.reps = bt.obj.reps.next; + return x; + } + + public void IncQuant() { + bt.obj.reps.obj++; + } + + public int GetQuant() { + return bt.obj.reps.obj; + } + + private RxFrame(string st) { + orig = st.ToCharArray(); + end = orig.Length; + bt = new PSN(default(State), null); + bt.obj.klasses = new PSN(null, null); + bt.obj.pos = 0; + } + + private static SubInfo TestSI = new SubInfo(TestC); + private static Frame TestC(Frame th) { + if (Kernel.TraceCont) System.Console.WriteLine("At {0}", th.ip); + switch (th.ip) { + case 0: + th.rx = new RxFrame("aaaaab"); + th.rx.OpenQuant(); + goto case 1; + case 1: + th.rx.PushMark(3); + th.ip = 2; + return th.rx.ExactOne(th, 'a'); + case 2: + th.rx.IncQuant(); + goto case 1; + case 3: + th.rx.CloseQuant(); + th.ip = 4; + return th.rx.Exact(th, "ab"); + case 4: + System.Console.WriteLine("Match!"); + return null; + default: + System.Console.WriteLine("Bad IP"); + return null; + } + } } public class Cursor { diff --git a/lib/Kernel.cs b/lib/Kernel.cs index d0a2ab97..db348e6d 100644 --- a/lib/Kernel.cs +++ b/lib/Kernel.cs @@ -197,6 +197,8 @@ public class Frame: IP6 { public object lex3; public object[] lexn; + public RxFrame rx; + public Variable[] pos; public Dictionary named; @@ -470,7 +472,7 @@ public class CLRImportObject : IP6 { public class Kernel { public static DynBlockDelegate MainlineContinuation; - private static object UnboxDO(DynObject o) { + public static object UnboxDO(DynObject o) { return o.slots["value"]; }