Skip to content

Commit

Permalink
Merge pull request #539 from miloyip/issue538_regexzeromin
Browse files Browse the repository at this point in the history
Support {0, } and {0, m} in Regex
  • Loading branch information
miloyip committed Feb 12, 2016
2 parents 3c2c162 + 60116cf commit 3cb5733
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 6 deletions.
28 changes: 23 additions & 5 deletions include/rapidjson/internal/regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,13 +256,13 @@ class GenericRegex {
case '{':
{
unsigned n, m;
if (!ParseUnsigned(ds, &n) || n == 0)
if (!ParseUnsigned(ds, &n))
return;

if (ds.Peek() == ',') {
ds.Take();
if (ds.Peek() == '}')
m = 0;
m = kInfinityQuantifier;
else if (!ParseUnsigned(ds, &m) || m < n)
return;
}
Expand Down Expand Up @@ -424,15 +424,29 @@ class GenericRegex {
}

bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
RAPIDJSON_ASSERT(n > 0);
RAPIDJSON_ASSERT(m == 0 || n <= m); // m == 0 means infinity
RAPIDJSON_ASSERT(n <= m);
if (operandStack.GetSize() < sizeof(Frag))
return false;

if (n == 0) {
if (m == 0) // a{0} not support
return false;
else if (m == kInfinityQuantifier)
Eval(operandStack, kZeroOrMore); // a{0,} -> a*
else {
Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
for (unsigned i = 0; i < m - 1; i++)
CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
for (unsigned i = 0; i < m - 1; i++)
Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
}
return true;
}

for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
CloneTopOperand(operandStack);

if (m == 0)
if (m == kInfinityQuantifier)
Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
else if (m > n) {
CloneTopOperand(operandStack); // a{3,5} -> a a a a
Expand Down Expand Up @@ -469,6 +483,8 @@ class GenericRegex {
template <typename InputStream>
bool ParseUnsigned(DecodedStream<InputStream>& ds, unsigned* u) {
unsigned r = 0;
if (ds.Peek() < '0' || ds.Peek() > '9')
return false;
while (ds.Peek() >= '0' && ds.Peek() <= '9') {
if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
return false; // overflow
Expand Down Expand Up @@ -658,6 +674,8 @@ class GenericRegex {
SizeType stateCount_;
SizeType rangeCount_;

static const unsigned kInfinityQuantifier = ~0u;

// For SearchWithAnchoring()
uint32_t* stateSet_; // allocated by states_.GetAllocator()
mutable Stack<Allocator> state0_;
Expand Down
45 changes: 44 additions & 1 deletion test/unittest/regextest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,43 @@ TEST(Regex, QuantifierMinMax3) {
EXPECT_FALSE(re.Match("abbbbbbd"));
}

// Issue538
TEST(Regex, QuantifierMinMax4) {
Regex re("a(b|c){0,3}d");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ad"));
EXPECT_TRUE(re.Match("abd"));
EXPECT_TRUE(re.Match("acd"));
EXPECT_TRUE(re.Match("abbd"));
EXPECT_TRUE(re.Match("accd"));
EXPECT_TRUE(re.Match("abcd"));
EXPECT_TRUE(re.Match("abbbd"));
EXPECT_TRUE(re.Match("acccd"));
EXPECT_FALSE(re.Match("abbbbd"));
EXPECT_FALSE(re.Match("add"));
EXPECT_FALSE(re.Match("accccd"));
EXPECT_FALSE(re.Match("abcbcd"));
}

// Issue538
TEST(Regex, QuantifierMinMax5) {
Regex re("a(b|c){0,}d");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ad"));
EXPECT_TRUE(re.Match("abd"));
EXPECT_TRUE(re.Match("acd"));
EXPECT_TRUE(re.Match("abbd"));
EXPECT_TRUE(re.Match("accd"));
EXPECT_TRUE(re.Match("abcd"));
EXPECT_TRUE(re.Match("abbbd"));
EXPECT_TRUE(re.Match("acccd"));
EXPECT_TRUE(re.Match("abbbbd"));
EXPECT_TRUE(re.Match("accccd"));
EXPECT_TRUE(re.Match("abcbcd"));
EXPECT_FALSE(re.Match("add"));
EXPECT_FALSE(re.Match("aad"));
}

#define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC

TEST(Regex, Unicode) {
Expand Down Expand Up @@ -501,6 +538,7 @@ TEST(Regex, Invalid) {
EXPECT_FALSE(re.IsValid());\
}

TEST_INVALID("");
TEST_INVALID("a|");
TEST_INVALID("()");
TEST_INVALID(")");
Expand All @@ -517,7 +555,7 @@ TEST(Regex, Invalid) {
TEST_INVALID("a{0}");
TEST_INVALID("a{-1}");
TEST_INVALID("a{}");
TEST_INVALID("a{0,}");
// TEST_INVALID("a{0,}"); // Support now
TEST_INVALID("a{,0}");
TEST_INVALID("a{1,0}");
TEST_INVALID("a{-1,0}");
Expand All @@ -530,4 +568,9 @@ TEST(Regex, Invalid) {
#undef TEST_INVALID
}

TEST(Regex, Issue538) {
Regex re("^[0-9]+(\\\\.[0-9]+){0,2}");
EXPECT_TRUE(re.IsValid());
}

#undef EURO

0 comments on commit 3cb5733

Please sign in to comment.