Skip to content

Commit

Permalink
Merge pull request #52840 from hanfei1991/hanfei/analyze-named-group
Browse files Browse the repository at this point in the history
make regexp analyzer recognize named capturing groups
  • Loading branch information
hanfei1991 committed Aug 1, 2023
2 parents c681405 + 7aecec8 commit d900486
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 0 deletions.
24 changes: 24 additions & 0 deletions src/Common/OptimizedRegularExpression.cpp
Expand Up @@ -45,6 +45,25 @@ size_t shortest_literal_length(const Literals & literals)
return shortest;
}

const char * skipNameCapturingGroup(const char * pos, size_t offset, const char * end)
{
const char special = *(pos + offset) == '<' ? '>' : '\'';
offset ++;
while (pos + offset < end)
{
const char cur = *(pos + offset);
if (cur == special)
{
return pos + offset;
}
if (('0' <= cur && cur <= '9') || ('a' <= cur && cur <= 'z') || ('A' <= cur && cur <= 'Z'))
offset ++;
else
return pos;
}
return pos;
}

const char * analyzeImpl(
std::string_view regexp,
const char * pos,
Expand Down Expand Up @@ -247,10 +266,15 @@ const char * analyzeImpl(
break;
}
}
/// (?:regex) means non-capturing parentheses group
if (pos + 2 < end && pos[1] == '?' && pos[2] == ':')
{
pos += 2;
}
if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<')))
{
pos = skipNameCapturingGroup(pos, pos[2] == 'P' ? 3: 2, end);
}
Literal group_required_substr;
bool group_is_trival = true;
Literals group_alters;
Expand Down
4 changes: 4 additions & 0 deletions src/Common/tests/gtest_optimize_re.cpp
Expand Up @@ -47,4 +47,8 @@ TEST(OptimizeRE, analyze)
test_f("abc|(:?xx|yy|zz|x?)def", "", {"abc", "def"});
test_f("abc|(:?xx|yy|zz|x?){1,2}def", "", {"abc", "def"});
test_f(R"(\\A(?:(?:[-0-9_a-z]+(?:\\.[-0-9_a-z]+)*)/k8s1)\\z)", "/k8s1");
test_f("[a-zA-Z]+(?P<num>\\d+)", "");
test_f("[a-zA-Z]+(?<num>\\d+)", "");
test_f("[a-zA-Z]+(?'num'\\d+)", "");
test_f("[a-zA-Z]+(?x<num>\\d+)", "x<num>");
}
@@ -1 +1,2 @@
1
1
1 change: 1 addition & 0 deletions tests/queries/0_stateless/02751_match_constant_needle.sql
@@ -1 +1,2 @@
select match('default/k8s1', '\\A(?:(?:[-0-9_a-z]+(?:\\.[-0-9_a-z]+)*)/k8s1)\\z');
select match('abc123', '[a-zA-Z]+(?P<num>\\d+)');

0 comments on commit d900486

Please sign in to comment.