From b73a97e845233cc8c0721c049e0e9b420cfb5719 Mon Sep 17 00:00:00 2001 From: Nick Gammon Date: Sat, 10 Jul 2010 08:46:08 +1000 Subject: [PATCH] Removed limit of 1000 wildcards in a regular expression --- regexp.cpp | 18 +++++++++++------- scripting/lua_scripting.cpp | 14 ++------------ 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/regexp.cpp b/regexp.cpp index 78e412d4..766015f2 100644 --- a/regexp.cpp +++ b/regexp.cpp @@ -42,18 +42,23 @@ pcre_extra * extra; re->m_extra = extra; re->m_iExecutionError = 0; // no error now + // inspired by a suggestion by Twisol (to remove a hard-coded limit on the number of wildcards) + int capturecount = 0; + // how many captures did we get? + pcre_fullinfo(program, NULL, PCRE_INFO_CAPTURECOUNT, &capturecount); + // allocate memory for them + re->m_vOffsets.resize ((capturecount + 1) * 3); // add 1 for the whole expression + return re; } -#define MAX_PCRE_WILDCARDS 1000 - int regexec(register t_regexp *prog, register const char *string, const int start_offset) { int options = App.m_bRegexpMatchEmpty ? 0 : PCRE_NOTEMPTY; // don't match on an empty string int count; -static int offsets [MAX_PCRE_WILDCARDS * 3]; // hopefully we won't recurse and crash ;) + LARGE_INTEGER start, finish; @@ -67,7 +72,7 @@ LARGE_INTEGER start, pcre_callout = NULL; count = pcre_exec(prog->m_program, prog->m_extra, string, strlen (string), - start_offset, options, offsets, NUMITEMS (offsets)); + start_offset, options, &prog->m_vOffsets [0], prog->m_vOffsets.size ()); if (App.m_iCounterFrequency) { @@ -83,6 +88,8 @@ LARGE_INTEGER start, { pcre_free (prog->m_program); prog->m_program = NULL; + pcre_free (prog->m_extra); + prog->m_extra = NULL; prog->m_iExecutionError = count; // remember reason } @@ -99,9 +106,6 @@ LARGE_INTEGER start, prog->m_sTarget = string; // for extracting wildcards prog->m_iCount = count; // ditto - prog->m_vOffsets.clear (); - // only need first 2/3 of offsets - copy (offsets, &offsets [count * 2], back_inserter (prog->m_vOffsets)); return true; // match } diff --git a/scripting/lua_scripting.cpp b/scripting/lua_scripting.cpp index fbbe6deb..d47febfe 100644 --- a/scripting/lua_scripting.cpp +++ b/scripting/lua_scripting.cpp @@ -468,15 +468,10 @@ bool CScriptEngine::ExecuteLua (DISPID & dispid, // dispatch ID, will be set to paramCount++; // we have one more parameter to the call pcre_fullinfo(regexp->m_program, regexp->m_extra, PCRE_INFO_CAPTURECOUNT, &ncapt); - int iTot = regexp->m_vOffsets.size (); // how many did we actually get? - for (i = 0; i <= ncapt; i++) { int j = i * 2; - if (j < iTot) // save if available, otherwise push a false value lua_pushlstring(L, regexp->m_sTarget.c_str () + regexp->m_vOffsets[j], regexp->m_vOffsets[j + 1] - regexp->m_vOffsets[j]); - else - lua_pushboolean (L, 0); lua_rawseti (L, -2, i); } // now add item 0 - the whole matching line @@ -508,7 +503,7 @@ bool CScriptEngine::ExecuteLua (DISPID & dispid, // dispatch ID, will be set to if (found_strings.find (sName) != found_strings.end ()) { // do not replace if this one is out of range - if (n < 0 || n > ncapt || j >= iTot) + if (n < 0 || n > ncapt) continue; } // end of duplicate else @@ -517,12 +512,7 @@ bool CScriptEngine::ExecuteLua (DISPID & dispid, // dispatch ID, will be set to lua_pushstring (L, (LPCTSTR) name); if (n >= 0 && n <= ncapt) - { - if (j < iTot) // save if available, otherwise push a false value - lua_pushlstring(L, regexp->m_sTarget.c_str () + regexp->m_vOffsets[j], regexp->m_vOffsets[j + 1] - regexp->m_vOffsets[j]); - else - lua_pushboolean(L, 0); - } + lua_pushlstring(L, regexp->m_sTarget.c_str () + regexp->m_vOffsets[j], regexp->m_vOffsets[j + 1] - regexp->m_vOffsets[j]); else lua_pushnil (L); /* n out of range */ lua_settable (L, -3);