Skip to content

Commit

Permalink
Fixes to changes to regular expression handling
Browse files Browse the repository at this point in the history
  • Loading branch information
nickgammon committed Jul 10, 2010
1 parent b73a97e commit f7ab822
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 28 deletions.
41 changes: 19 additions & 22 deletions regexp.cpp
Expand Up @@ -42,12 +42,6 @@ pcre_extra * extra;
re->m_extra = extra;
re->m_iExecutionError = 0; // no error now

// inspired by a suggestion by Twisol (to remove a hard-coded limit on the number of wildcards)
int capturecount = 0;
// how many captures did we get?
pcre_fullinfo(program, NULL, PCRE_INFO_CAPTURECOUNT, &capturecount);
// allocate memory for them
re->m_vOffsets.resize ((capturecount + 1) * 3); // add 1 for the whole expression

return re;
}
Expand All @@ -59,20 +53,27 @@ int regexec(register t_regexp *prog,
int options = App.m_bRegexpMatchEmpty ? 0 : PCRE_NOTEMPTY; // don't match on an empty string
int count;


LARGE_INTEGER start,
finish;

// exit if no regexp program to process (possibly because of previous error)
if (prog->m_program == NULL)
return false;

// inspired by a suggestion by Twisol (to remove a hard-coded limit on the number of wildcards)
int capturecount = 0;
// how many captures did we get?
pcre_fullinfo(prog->m_program, NULL, PCRE_INFO_CAPTURECOUNT, &capturecount);
// allocate enough memory
vector<int> offsets ((capturecount + 1) * 3); // we always get offset 0 - the whole match

LARGE_INTEGER start,
finish;


if (App.m_iCounterFrequency)
QueryPerformanceCounter (&start);

pcre_callout = NULL;
count = pcre_exec(prog->m_program, prog->m_extra, string, strlen (string),
start_offset, options, &prog->m_vOffsets [0], prog->m_vOffsets.size ());
start_offset, options, &offsets [0], offsets.size ());

if (App.m_iCounterFrequency)
{
Expand All @@ -81,32 +82,28 @@ LARGE_INTEGER start,
}

if (count == PCRE_ERROR_NOMATCH)
return false; // no match
return false; // no match - don't save matching string etc.

// free program as an indicator that we can't keep trying to do this one
// otherwise free program as an indicator that we can't keep trying to do this one
if (count <= 0)

This comment has been minimized.

Copy link
@Twisol

Twisol Jul 10, 2010

Contributor

This can be changed to count < 0 if you want.

This comment has been minimized.

Copy link
@nickgammon

nickgammon Jul 10, 2010

Author Owner

Previously the case of ==0 was reported as an error, so the case of zero is more "error like" than not, so I prefer it this way.

This comment has been minimized.

Copy link
@Twisol

Twisol Jul 10, 2010

Contributor

Ahh, thanks for the explanation.

{
pcre_free (prog->m_program);
prog->m_program = NULL;
pcre_free (prog->m_extra);
prog->m_extra = NULL;
prog->m_iExecutionError = count; // remember reason
}

if (count == 0)
ThrowErrorException (Translate ("Too many substrings in regular expression"));

if (count < 0)
ThrowErrorException (TFormat ("Error executing regular expression: %s",
Convert_PCRE_Runtime_Error (count)));
Convert_PCRE_Runtime_Error (count)));
}


// if, and only if, we match we will save the matching string, the count
// if, and only if, we match, we will save the matching string, the count
// and offsets, so we can extract the wildcards later on

prog->m_sTarget = string; // for extracting wildcards
prog->m_iCount = count; // ditto

prog->m_vOffsets.resize (0); // clear for copy, but leave allocated memory
copy (offsets.begin (), offsets.end (), back_inserter (prog->m_vOffsets));
return true; // match
}

Expand Down
8 changes: 2 additions & 6 deletions scripting/lua_scripting.cpp
Expand Up @@ -470,13 +470,9 @@ bool CScriptEngine::ExecuteLua (DISPID & dispid, // dispatch ID, will be set to

for (i = 0; i <= ncapt; i++)
{
int j = i * 2;
lua_pushlstring(L, regexp->m_sTarget.c_str () + regexp->m_vOffsets[j], regexp->m_vOffsets[j + 1] - regexp->m_vOffsets[j]);
lua_pushstring(L, regexp->GetWildcard (i).c_str ());

This comment has been minimized.

Copy link
@Twisol

Twisol Jul 10, 2010

Contributor

I'm actually somewhat inclined to keep the pushlstring versions of the changes, just to handle any NUL bytes that might sneak in. My code is:

      string wildcard = regexp->GetWildcard(i);
      lua_pushlstring(L, wildcard.c_str(), wildcard.length());
lua_rawseti (L, -2, i);
}
// now add item 0 - the whole matching line
// lua_pushstring (trigger_item->wildcards [0].c_str ())
// lua_rawseti(L, -2, 0);

/* now do named subpatterns */
pcre_fullinfo(regexp->m_program, regexp->m_extra, PCRE_INFO_NAMECOUNT, &namecount);
Expand Down Expand Up @@ -512,7 +508,7 @@ bool CScriptEngine::ExecuteLua (DISPID & dispid, // dispatch ID, will be set to

lua_pushstring (L, (LPCTSTR) name);
if (n >= 0 && n <= ncapt)
lua_pushlstring(L, regexp->m_sTarget.c_str () + regexp->m_vOffsets[j], regexp->m_vOffsets[j + 1] - regexp->m_vOffsets[j]);
lua_pushstring(L, regexp->GetWildcard (n).c_str ());
else
lua_pushnil (L); /* n out of range */
lua_settable (L, -3);
Expand Down

0 comments on commit f7ab822

Please sign in to comment.