diff --git a/include/rapidjson/internal/regex.h b/include/rapidjson/internal/regex.h index 422a5240b..8530cd771 100644 --- a/include/rapidjson/internal/regex.h +++ b/include/rapidjson/internal/regex.h @@ -43,12 +43,40 @@ RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated RAPIDJSON_NAMESPACE_BEGIN namespace internal { +/////////////////////////////////////////////////////////////////////////////// +// DecodedStream + +template +class DecodedStream { +public: + DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); } + unsigned Peek() { return codepoint_; } + unsigned Take() { + unsigned c = codepoint_; + if (c) // No further decoding when '\0' + Decode(); + return c; + } + +private: + void Decode() { + if (!Encoding::Decode(ss_, &codepoint_)) + codepoint_ = 0; + } + + SourceStream& ss_; + unsigned codepoint_; +}; + /////////////////////////////////////////////////////////////////////////////// // GenericRegex static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1 static const SizeType kRegexInvalidRange = ~SizeType(0); +template +class GenericRegexSearch; + //! Regular expression engine with subset of ECMAscript grammar. /*! Supported regular expression syntax: @@ -84,45 +112,25 @@ static const SizeType kRegexInvalidRange = ~SizeType(0); template class GenericRegex { public: + typedef Encoding EncodingType; typedef typename Encoding::Ch Ch; + template friend class GenericRegexSearch; GenericRegex(const Ch* source, Allocator* allocator = 0) : states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), - stateSet_(), state0_(allocator, 0), state1_(allocator, 0), anchorBegin_(), anchorEnd_() + anchorBegin_(), anchorEnd_() { GenericStringStream ss(source); - DecodedStream > ds(ss); + DecodedStream, Encoding> ds(ss); Parse(ds); } - ~GenericRegex() { - Allocator::Free(stateSet_); - } + ~GenericRegex() {} bool IsValid() const { return root_ != kRegexInvalidState; } - template - bool Match(InputStream& is) const { - return SearchWithAnchoring(is, true, true); - } - - bool Match(const Ch* s) const { - GenericStringStream is(s); - return Match(is); - } - - template - bool Search(InputStream& is) const { - return SearchWithAnchoring(is, anchorBegin_, anchorEnd_); - } - - bool Search(const Ch* s) const { - GenericStringStream is(s); - return Search(is); - } - private: enum Operator { kZeroOrOne, @@ -157,28 +165,6 @@ class GenericRegex { SizeType minIndex; }; - template - class DecodedStream { - public: - DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); } - unsigned Peek() { return codepoint_; } - unsigned Take() { - unsigned c = codepoint_; - if (c) // No further decoding when '\0' - Decode(); - return c; - } - - private: - void Decode() { - if (!Encoding::Decode(ss_, &codepoint_)) - codepoint_ = 0; - } - - SourceStream& ss_; - unsigned codepoint_; - }; - State& GetState(SizeType index) { RAPIDJSON_ASSERT(index < stateCount_); return states_.template Bottom()[index]; @@ -200,7 +186,7 @@ class GenericRegex { } template - void Parse(DecodedStream& ds) { + void Parse(DecodedStream& ds) { Allocator allocator; Stack operandStack(&allocator, 256); // Frag Stack operatorStack(&allocator, 256); // Operator @@ -327,14 +313,6 @@ class GenericRegex { printf("\n"); #endif } - - // Preallocate buffer for SearchWithAnchoring() - RAPIDJSON_ASSERT(stateSet_ == 0); - if (stateCount_ > 0) { - stateSet_ = static_cast(states_.GetAllocator().Malloc(GetStateSetSize())); - state0_.template Reserve(stateCount_); - state1_.template Reserve(stateCount_); - } } SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) { @@ -483,7 +461,7 @@ class GenericRegex { } template - bool ParseUnsigned(DecodedStream& ds, unsigned* u) { + bool ParseUnsigned(DecodedStream& ds, unsigned* u) { unsigned r = 0; if (ds.Peek() < '0' || ds.Peek() > '9') return false; @@ -497,7 +475,7 @@ class GenericRegex { } template - bool ParseRange(DecodedStream& ds, SizeType* range) { + bool ParseRange(DecodedStream& ds, SizeType* range) { bool isBegin = true; bool negate = false; int step = 0; @@ -575,7 +553,7 @@ class GenericRegex { } template - bool CharacterEscape(DecodedStream& ds, unsigned* escapedCodepoint) { + bool CharacterEscape(DecodedStream& ds, unsigned* escapedCodepoint) { unsigned codepoint; switch (codepoint = ds.Take()) { case '^': @@ -603,34 +581,93 @@ class GenericRegex { } } + Stack states_; + Stack ranges_; + SizeType root_; + SizeType stateCount_; + SizeType rangeCount_; + + static const unsigned kInfinityQuantifier = ~0u; + + // For SearchWithAnchoring() + bool anchorBegin_; + bool anchorEnd_; +}; + +template +class GenericRegexSearch { +public: + typedef typename RegexType::EncodingType Encoding; + typedef typename Encoding::Ch Ch; + + GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) : + regex_(regex), allocator_(allocator), ownAllocator_(0), + state0_(allocator, 0), state1_(allocator, 0), stateSet_() + { + RAPIDJSON_ASSERT(regex_.IsValid()); + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + stateSet_ = static_cast(allocator_->Malloc(GetStateSetSize())); + state0_.template Reserve(regex_.stateCount_); + state1_.template Reserve(regex_.stateCount_); + } + + ~GenericRegexSearch() { + Allocator::Free(stateSet_); + RAPIDJSON_DELETE(ownAllocator_); + } + template - bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) const { - RAPIDJSON_ASSERT(IsValid()); - DecodedStream ds(is); + bool Match(InputStream& is) { + return SearchWithAnchoring(is, true, true); + } + + bool Match(const Ch* s) { + GenericStringStream is(s); + return Match(is); + } + + template + bool Search(InputStream& is) { + return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_); + } + + bool Search(const Ch* s) { + GenericStringStream is(s); + return Search(is); + } + +private: + typedef typename RegexType::State State; + typedef typename RegexType::Range Range; + + template + bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) { + DecodedStream ds(is); state0_.Clear(); Stack *current = &state0_, *next = &state1_; const size_t stateSetSize = GetStateSetSize(); std::memset(stateSet_, 0, stateSetSize); - bool matched = AddState(*current, root_); + bool matched = AddState(*current, regex_.root_); unsigned codepoint; while (!current->Empty() && (codepoint = ds.Take()) != 0) { std::memset(stateSet_, 0, stateSetSize); next->Clear(); matched = false; for (const SizeType* s = current->template Bottom(); s != current->template End(); ++s) { - const State& sr = GetState(*s); + const State& sr = regex_.GetState(*s); if (sr.codepoint == codepoint || - sr.codepoint == kAnyCharacterClass || - (sr.codepoint == kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint))) + sr.codepoint == RegexType::kAnyCharacterClass || + (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint))) { matched = AddState(*next, sr.out) || matched; if (!anchorEnd && matched) return true; } if (!anchorBegin) - AddState(*next, root_); + AddState(*next, regex_.root_); } internal::Swap(current, next); } @@ -639,14 +676,14 @@ class GenericRegex { } size_t GetStateSetSize() const { - return (stateCount_ + 31) / 32 * 4; + return (regex_.stateCount_ + 31) / 32 * 4; } // Return whether the added states is a match state - bool AddState(Stack& l, SizeType index) const { + bool AddState(Stack& l, SizeType index) { RAPIDJSON_ASSERT(index != kRegexInvalidState); - const State& s = GetState(index); + const State& s = regex_.GetState(index); if (s.out1 != kRegexInvalidState) { // Split bool matched = AddState(l, s.out); return AddState(l, s.out1) || matched; @@ -659,33 +696,26 @@ class GenericRegex { } bool MatchRange(SizeType rangeIndex, unsigned codepoint) const { - bool yes = (GetRange(rangeIndex).start & kRangeNegationFlag) == 0; + bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0; while (rangeIndex != kRegexInvalidRange) { - const Range& r = GetRange(rangeIndex); - if (codepoint >= (r.start & ~kRangeNegationFlag) && codepoint <= r.end) + const Range& r = regex_.GetRange(rangeIndex); + if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end) return yes; rangeIndex = r.next; } return !yes; } - Stack states_; - Stack ranges_; - SizeType root_; - SizeType stateCount_; - SizeType rangeCount_; - - static const unsigned kInfinityQuantifier = ~0u; - - // For SearchWithAnchoring() - uint32_t* stateSet_; // allocated by states_.GetAllocator() - mutable Stack state0_; - mutable Stack state1_; - bool anchorBegin_; - bool anchorEnd_; + const RegexType& regex_; + Allocator* allocator_; + Allocator* ownAllocator_; + Stack state0_; + Stack state1_; + uint32_t* stateSet_; }; typedef GenericRegex > Regex; +typedef GenericRegexSearch RegexSearch; } // namespace internal RAPIDJSON_NAMESPACE_END diff --git a/include/rapidjson/schema.h b/include/rapidjson/schema.h index 8497d3031..288b93d0f 100644 --- a/include/rapidjson/schema.h +++ b/include/rapidjson/schema.h @@ -1011,7 +1011,8 @@ class Schema { } static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType) { - return pattern->Search(str); + GenericRegexSearch rs(*pattern); + return rs.Search(str); } #elif RAPIDJSON_SCHEMA_USE_STDREGEX template diff --git a/test/unittest/regextest.cpp b/test/unittest/regextest.cpp index 4fb5b222e..cdd363018 100644 --- a/test/unittest/regextest.cpp +++ b/test/unittest/regextest.cpp @@ -20,523 +20,569 @@ using namespace rapidjson::internal; TEST(Regex, Single) { Regex re("a"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("a")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("b")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("a")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("b")); } TEST(Regex, Concatenation) { Regex re("abc"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abc")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("a")); - EXPECT_FALSE(re.Match("b")); - EXPECT_FALSE(re.Match("ab")); - EXPECT_FALSE(re.Match("abcd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abc")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("a")); + EXPECT_FALSE(rs.Match("b")); + EXPECT_FALSE(rs.Match("ab")); + EXPECT_FALSE(rs.Match("abcd")); } TEST(Regex, Alternation1) { Regex re("abab|abbb"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abab")); - EXPECT_TRUE(re.Match("abbb")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("ab")); - EXPECT_FALSE(re.Match("ababa")); - EXPECT_FALSE(re.Match("abb")); - EXPECT_FALSE(re.Match("abbbb")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abab")); + EXPECT_TRUE(rs.Match("abbb")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("ab")); + EXPECT_FALSE(rs.Match("ababa")); + EXPECT_FALSE(rs.Match("abb")); + EXPECT_FALSE(rs.Match("abbbb")); } TEST(Regex, Alternation2) { Regex re("a|b|c"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("a")); - EXPECT_TRUE(re.Match("b")); - EXPECT_TRUE(re.Match("c")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("aa")); - EXPECT_FALSE(re.Match("ab")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("a")); + EXPECT_TRUE(rs.Match("b")); + EXPECT_TRUE(rs.Match("c")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("aa")); + EXPECT_FALSE(rs.Match("ab")); } TEST(Regex, Parenthesis1) { Regex re("(ab)c"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abc")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("a")); - EXPECT_FALSE(re.Match("b")); - EXPECT_FALSE(re.Match("ab")); - EXPECT_FALSE(re.Match("abcd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abc")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("a")); + EXPECT_FALSE(rs.Match("b")); + EXPECT_FALSE(rs.Match("ab")); + EXPECT_FALSE(rs.Match("abcd")); } TEST(Regex, Parenthesis2) { Regex re("a(bc)"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abc")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("a")); - EXPECT_FALSE(re.Match("b")); - EXPECT_FALSE(re.Match("ab")); - EXPECT_FALSE(re.Match("abcd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abc")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("a")); + EXPECT_FALSE(rs.Match("b")); + EXPECT_FALSE(rs.Match("ab")); + EXPECT_FALSE(rs.Match("abcd")); } TEST(Regex, Parenthesis3) { Regex re("(a|b)(c|d)"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("ac")); - EXPECT_TRUE(re.Match("ad")); - EXPECT_TRUE(re.Match("bc")); - EXPECT_TRUE(re.Match("bd")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("ab")); - EXPECT_FALSE(re.Match("cd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("ac")); + EXPECT_TRUE(rs.Match("ad")); + EXPECT_TRUE(rs.Match("bc")); + EXPECT_TRUE(rs.Match("bd")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("ab")); + EXPECT_FALSE(rs.Match("cd")); } TEST(Regex, ZeroOrOne1) { Regex re("a?"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("")); - EXPECT_TRUE(re.Match("a")); - EXPECT_FALSE(re.Match("aa")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("")); + EXPECT_TRUE(rs.Match("a")); + EXPECT_FALSE(rs.Match("aa")); } TEST(Regex, ZeroOrOne2) { Regex re("a?b"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("b")); - EXPECT_TRUE(re.Match("ab")); - EXPECT_FALSE(re.Match("a")); - EXPECT_FALSE(re.Match("aa")); - EXPECT_FALSE(re.Match("bb")); - EXPECT_FALSE(re.Match("ba")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("b")); + EXPECT_TRUE(rs.Match("ab")); + EXPECT_FALSE(rs.Match("a")); + EXPECT_FALSE(rs.Match("aa")); + EXPECT_FALSE(rs.Match("bb")); + EXPECT_FALSE(rs.Match("ba")); } TEST(Regex, ZeroOrOne3) { Regex re("ab?"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("a")); - EXPECT_TRUE(re.Match("ab")); - EXPECT_FALSE(re.Match("b")); - EXPECT_FALSE(re.Match("aa")); - EXPECT_FALSE(re.Match("bb")); - EXPECT_FALSE(re.Match("ba")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("a")); + EXPECT_TRUE(rs.Match("ab")); + EXPECT_FALSE(rs.Match("b")); + EXPECT_FALSE(rs.Match("aa")); + EXPECT_FALSE(rs.Match("bb")); + EXPECT_FALSE(rs.Match("ba")); } TEST(Regex, ZeroOrOne4) { Regex re("a?b?"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("")); - EXPECT_TRUE(re.Match("a")); - EXPECT_TRUE(re.Match("b")); - EXPECT_TRUE(re.Match("ab")); - EXPECT_FALSE(re.Match("aa")); - EXPECT_FALSE(re.Match("bb")); - EXPECT_FALSE(re.Match("ba")); - EXPECT_FALSE(re.Match("abc")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("")); + EXPECT_TRUE(rs.Match("a")); + EXPECT_TRUE(rs.Match("b")); + EXPECT_TRUE(rs.Match("ab")); + EXPECT_FALSE(rs.Match("aa")); + EXPECT_FALSE(rs.Match("bb")); + EXPECT_FALSE(rs.Match("ba")); + EXPECT_FALSE(rs.Match("abc")); } TEST(Regex, ZeroOrOne5) { Regex re("a(ab)?b"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("ab")); - EXPECT_TRUE(re.Match("aabb")); - EXPECT_FALSE(re.Match("aab")); - EXPECT_FALSE(re.Match("abb")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("ab")); + EXPECT_TRUE(rs.Match("aabb")); + EXPECT_FALSE(rs.Match("aab")); + EXPECT_FALSE(rs.Match("abb")); } TEST(Regex, ZeroOrMore1) { Regex re("a*"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("")); - EXPECT_TRUE(re.Match("a")); - EXPECT_TRUE(re.Match("aa")); - EXPECT_FALSE(re.Match("b")); - EXPECT_FALSE(re.Match("ab")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("")); + EXPECT_TRUE(rs.Match("a")); + EXPECT_TRUE(rs.Match("aa")); + EXPECT_FALSE(rs.Match("b")); + EXPECT_FALSE(rs.Match("ab")); } TEST(Regex, ZeroOrMore2) { Regex re("a*b"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("b")); - EXPECT_TRUE(re.Match("ab")); - EXPECT_TRUE(re.Match("aab")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("bb")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("b")); + EXPECT_TRUE(rs.Match("ab")); + EXPECT_TRUE(rs.Match("aab")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("bb")); } TEST(Regex, ZeroOrMore3) { Regex re("a*b*"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("")); - EXPECT_TRUE(re.Match("a")); - EXPECT_TRUE(re.Match("aa")); - EXPECT_TRUE(re.Match("b")); - EXPECT_TRUE(re.Match("bb")); - EXPECT_TRUE(re.Match("ab")); - EXPECT_TRUE(re.Match("aabb")); - EXPECT_FALSE(re.Match("ba")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("")); + EXPECT_TRUE(rs.Match("a")); + EXPECT_TRUE(rs.Match("aa")); + EXPECT_TRUE(rs.Match("b")); + EXPECT_TRUE(rs.Match("bb")); + EXPECT_TRUE(rs.Match("ab")); + EXPECT_TRUE(rs.Match("aabb")); + EXPECT_FALSE(rs.Match("ba")); } TEST(Regex, ZeroOrMore4) { Regex re("a(ab)*b"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("ab")); - EXPECT_TRUE(re.Match("aabb")); - EXPECT_TRUE(re.Match("aababb")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("aa")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("ab")); + EXPECT_TRUE(rs.Match("aabb")); + EXPECT_TRUE(rs.Match("aababb")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("aa")); } TEST(Regex, OneOrMore1) { Regex re("a+"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("a")); - EXPECT_TRUE(re.Match("aa")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("b")); - EXPECT_FALSE(re.Match("ab")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("a")); + EXPECT_TRUE(rs.Match("aa")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("b")); + EXPECT_FALSE(rs.Match("ab")); } TEST(Regex, OneOrMore2) { Regex re("a+b"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("ab")); - EXPECT_TRUE(re.Match("aab")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("b")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("ab")); + EXPECT_TRUE(rs.Match("aab")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("b")); } TEST(Regex, OneOrMore3) { Regex re("a+b+"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("ab")); - EXPECT_TRUE(re.Match("aab")); - EXPECT_TRUE(re.Match("abb")); - EXPECT_TRUE(re.Match("aabb")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("b")); - EXPECT_FALSE(re.Match("ba")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("ab")); + EXPECT_TRUE(rs.Match("aab")); + EXPECT_TRUE(rs.Match("abb")); + EXPECT_TRUE(rs.Match("aabb")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("b")); + EXPECT_FALSE(rs.Match("ba")); } TEST(Regex, OneOrMore4) { Regex re("a(ab)+b"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("aabb")); - EXPECT_TRUE(re.Match("aababb")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("ab")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("aabb")); + EXPECT_TRUE(rs.Match("aababb")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("ab")); } TEST(Regex, QuantifierExact1) { Regex re("ab{3}c"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abbbc")); - EXPECT_FALSE(re.Match("ac")); - EXPECT_FALSE(re.Match("abc")); - EXPECT_FALSE(re.Match("abbc")); - EXPECT_FALSE(re.Match("abbbbc")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abbbc")); + EXPECT_FALSE(rs.Match("ac")); + EXPECT_FALSE(rs.Match("abc")); + EXPECT_FALSE(rs.Match("abbc")); + EXPECT_FALSE(rs.Match("abbbbc")); } TEST(Regex, QuantifierExact2) { Regex re("a(bc){3}d"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abcbcbcd")); - EXPECT_FALSE(re.Match("ad")); - EXPECT_FALSE(re.Match("abcd")); - EXPECT_FALSE(re.Match("abcbcd")); - EXPECT_FALSE(re.Match("abcbcbcbcd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abcbcbcd")); + EXPECT_FALSE(rs.Match("ad")); + EXPECT_FALSE(rs.Match("abcd")); + EXPECT_FALSE(rs.Match("abcbcd")); + EXPECT_FALSE(rs.Match("abcbcbcbcd")); } TEST(Regex, QuantifierExact3) { Regex re("a(b|c){3}d"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abbbd")); - EXPECT_TRUE(re.Match("acccd")); - EXPECT_TRUE(re.Match("abcbd")); - EXPECT_FALSE(re.Match("ad")); - EXPECT_FALSE(re.Match("abbd")); - EXPECT_FALSE(re.Match("accccd")); - EXPECT_FALSE(re.Match("abbbbd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abbbd")); + EXPECT_TRUE(rs.Match("acccd")); + EXPECT_TRUE(rs.Match("abcbd")); + EXPECT_FALSE(rs.Match("ad")); + EXPECT_FALSE(rs.Match("abbd")); + EXPECT_FALSE(rs.Match("accccd")); + EXPECT_FALSE(rs.Match("abbbbd")); } TEST(Regex, QuantifierMin1) { Regex re("ab{3,}c"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abbbc")); - EXPECT_TRUE(re.Match("abbbbc")); - EXPECT_TRUE(re.Match("abbbbbc")); - EXPECT_FALSE(re.Match("ac")); - EXPECT_FALSE(re.Match("abc")); - EXPECT_FALSE(re.Match("abbc")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abbbc")); + EXPECT_TRUE(rs.Match("abbbbc")); + EXPECT_TRUE(rs.Match("abbbbbc")); + EXPECT_FALSE(rs.Match("ac")); + EXPECT_FALSE(rs.Match("abc")); + EXPECT_FALSE(rs.Match("abbc")); } TEST(Regex, QuantifierMin2) { Regex re("a(bc){3,}d"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abcbcbcd")); - EXPECT_TRUE(re.Match("abcbcbcbcd")); - EXPECT_FALSE(re.Match("ad")); - EXPECT_FALSE(re.Match("abcd")); - EXPECT_FALSE(re.Match("abcbcd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abcbcbcd")); + EXPECT_TRUE(rs.Match("abcbcbcbcd")); + EXPECT_FALSE(rs.Match("ad")); + EXPECT_FALSE(rs.Match("abcd")); + EXPECT_FALSE(rs.Match("abcbcd")); } TEST(Regex, QuantifierMin3) { Regex re("a(b|c){3,}d"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abbbd")); - EXPECT_TRUE(re.Match("acccd")); - EXPECT_TRUE(re.Match("abcbd")); - EXPECT_TRUE(re.Match("accccd")); - EXPECT_TRUE(re.Match("abbbbd")); - EXPECT_FALSE(re.Match("ad")); - EXPECT_FALSE(re.Match("abbd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abbbd")); + EXPECT_TRUE(rs.Match("acccd")); + EXPECT_TRUE(rs.Match("abcbd")); + EXPECT_TRUE(rs.Match("accccd")); + EXPECT_TRUE(rs.Match("abbbbd")); + EXPECT_FALSE(rs.Match("ad")); + EXPECT_FALSE(rs.Match("abbd")); } TEST(Regex, QuantifierMinMax1) { Regex re("ab{3,5}c"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abbbc")); - EXPECT_TRUE(re.Match("abbbbc")); - EXPECT_TRUE(re.Match("abbbbbc")); - EXPECT_FALSE(re.Match("ac")); - EXPECT_FALSE(re.Match("abc")); - EXPECT_FALSE(re.Match("abbc")); - EXPECT_FALSE(re.Match("abbbbbbc")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abbbc")); + EXPECT_TRUE(rs.Match("abbbbc")); + EXPECT_TRUE(rs.Match("abbbbbc")); + EXPECT_FALSE(rs.Match("ac")); + EXPECT_FALSE(rs.Match("abc")); + EXPECT_FALSE(rs.Match("abbc")); + EXPECT_FALSE(rs.Match("abbbbbbc")); } TEST(Regex, QuantifierMinMax2) { Regex re("a(bc){3,5}d"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abcbcbcd")); - EXPECT_TRUE(re.Match("abcbcbcbcd")); - EXPECT_TRUE(re.Match("abcbcbcbcbcd")); - EXPECT_FALSE(re.Match("ad")); - EXPECT_FALSE(re.Match("abcd")); - EXPECT_FALSE(re.Match("abcbcd")); - EXPECT_FALSE(re.Match("abcbcbcbcbcbcd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abcbcbcd")); + EXPECT_TRUE(rs.Match("abcbcbcbcd")); + EXPECT_TRUE(rs.Match("abcbcbcbcbcd")); + EXPECT_FALSE(rs.Match("ad")); + EXPECT_FALSE(rs.Match("abcd")); + EXPECT_FALSE(rs.Match("abcbcd")); + EXPECT_FALSE(rs.Match("abcbcbcbcbcbcd")); } TEST(Regex, QuantifierMinMax3) { Regex re("a(b|c){3,5}d"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("abbbd")); - EXPECT_TRUE(re.Match("acccd")); - EXPECT_TRUE(re.Match("abcbd")); - EXPECT_TRUE(re.Match("accccd")); - EXPECT_TRUE(re.Match("abbbbd")); - EXPECT_TRUE(re.Match("acccccd")); - EXPECT_TRUE(re.Match("abbbbbd")); - EXPECT_FALSE(re.Match("ad")); - EXPECT_FALSE(re.Match("abbd")); - EXPECT_FALSE(re.Match("accccccd")); - EXPECT_FALSE(re.Match("abbbbbbd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("abbbd")); + EXPECT_TRUE(rs.Match("acccd")); + EXPECT_TRUE(rs.Match("abcbd")); + EXPECT_TRUE(rs.Match("accccd")); + EXPECT_TRUE(rs.Match("abbbbd")); + EXPECT_TRUE(rs.Match("acccccd")); + EXPECT_TRUE(rs.Match("abbbbbd")); + EXPECT_FALSE(rs.Match("ad")); + EXPECT_FALSE(rs.Match("abbd")); + EXPECT_FALSE(rs.Match("accccccd")); + EXPECT_FALSE(rs.Match("abbbbbbd")); } // Issue538 TEST(Regex, QuantifierMinMax4) { Regex re("a(b|c){0,3}d"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("ad")); - EXPECT_TRUE(re.Match("abd")); - EXPECT_TRUE(re.Match("acd")); - EXPECT_TRUE(re.Match("abbd")); - EXPECT_TRUE(re.Match("accd")); - EXPECT_TRUE(re.Match("abcd")); - EXPECT_TRUE(re.Match("abbbd")); - EXPECT_TRUE(re.Match("acccd")); - EXPECT_FALSE(re.Match("abbbbd")); - EXPECT_FALSE(re.Match("add")); - EXPECT_FALSE(re.Match("accccd")); - EXPECT_FALSE(re.Match("abcbcd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("ad")); + EXPECT_TRUE(rs.Match("abd")); + EXPECT_TRUE(rs.Match("acd")); + EXPECT_TRUE(rs.Match("abbd")); + EXPECT_TRUE(rs.Match("accd")); + EXPECT_TRUE(rs.Match("abcd")); + EXPECT_TRUE(rs.Match("abbbd")); + EXPECT_TRUE(rs.Match("acccd")); + EXPECT_FALSE(rs.Match("abbbbd")); + EXPECT_FALSE(rs.Match("add")); + EXPECT_FALSE(rs.Match("accccd")); + EXPECT_FALSE(rs.Match("abcbcd")); } // Issue538 TEST(Regex, QuantifierMinMax5) { Regex re("a(b|c){0,}d"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("ad")); - EXPECT_TRUE(re.Match("abd")); - EXPECT_TRUE(re.Match("acd")); - EXPECT_TRUE(re.Match("abbd")); - EXPECT_TRUE(re.Match("accd")); - EXPECT_TRUE(re.Match("abcd")); - EXPECT_TRUE(re.Match("abbbd")); - EXPECT_TRUE(re.Match("acccd")); - EXPECT_TRUE(re.Match("abbbbd")); - EXPECT_TRUE(re.Match("accccd")); - EXPECT_TRUE(re.Match("abcbcd")); - EXPECT_FALSE(re.Match("add")); - EXPECT_FALSE(re.Match("aad")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("ad")); + EXPECT_TRUE(rs.Match("abd")); + EXPECT_TRUE(rs.Match("acd")); + EXPECT_TRUE(rs.Match("abbd")); + EXPECT_TRUE(rs.Match("accd")); + EXPECT_TRUE(rs.Match("abcd")); + EXPECT_TRUE(rs.Match("abbbd")); + EXPECT_TRUE(rs.Match("acccd")); + EXPECT_TRUE(rs.Match("abbbbd")); + EXPECT_TRUE(rs.Match("accccd")); + EXPECT_TRUE(rs.Match("abcbcd")); + EXPECT_FALSE(rs.Match("add")); + EXPECT_FALSE(rs.Match("aad")); } -#define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC +#define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 rsquence of Euro sign U+20AC TEST(Regex, Unicode) { Regex re("a" EURO "+b"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("a" EURO "b")); - EXPECT_TRUE(re.Match("a" EURO EURO "b")); - EXPECT_FALSE(re.Match("a?b")); - EXPECT_FALSE(re.Match("a" EURO "\xAC" "b")); // unaware of UTF-8 will match + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("a" EURO "b")); + EXPECT_TRUE(rs.Match("a" EURO EURO "b")); + EXPECT_FALSE(rs.Match("a?b")); + EXPECT_FALSE(rs.Match("a" EURO "\xAC" "b")); // unaware of UTF-8 will match } TEST(Regex, AnyCharacter) { Regex re("."); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("a")); - EXPECT_TRUE(re.Match("b")); - EXPECT_TRUE(re.Match(EURO)); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("aa")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("a")); + EXPECT_TRUE(rs.Match("b")); + EXPECT_TRUE(rs.Match(EURO)); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("aa")); } TEST(Regex, CharacterRange1) { Regex re("[abc]"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("a")); - EXPECT_TRUE(re.Match("b")); - EXPECT_TRUE(re.Match("c")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("`")); - EXPECT_FALSE(re.Match("d")); - EXPECT_FALSE(re.Match("aa")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("a")); + EXPECT_TRUE(rs.Match("b")); + EXPECT_TRUE(rs.Match("c")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("`")); + EXPECT_FALSE(rs.Match("d")); + EXPECT_FALSE(rs.Match("aa")); } TEST(Regex, CharacterRange2) { Regex re("[^abc]"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("`")); - EXPECT_TRUE(re.Match("d")); - EXPECT_FALSE(re.Match("a")); - EXPECT_FALSE(re.Match("b")); - EXPECT_FALSE(re.Match("c")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("aa")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("`")); + EXPECT_TRUE(rs.Match("d")); + EXPECT_FALSE(rs.Match("a")); + EXPECT_FALSE(rs.Match("b")); + EXPECT_FALSE(rs.Match("c")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("aa")); } TEST(Regex, CharacterRange3) { Regex re("[a-c]"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("a")); - EXPECT_TRUE(re.Match("b")); - EXPECT_TRUE(re.Match("c")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("`")); - EXPECT_FALSE(re.Match("d")); - EXPECT_FALSE(re.Match("aa")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("a")); + EXPECT_TRUE(rs.Match("b")); + EXPECT_TRUE(rs.Match("c")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("`")); + EXPECT_FALSE(rs.Match("d")); + EXPECT_FALSE(rs.Match("aa")); } TEST(Regex, CharacterRange4) { Regex re("[^a-c]"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("`")); - EXPECT_TRUE(re.Match("d")); - EXPECT_FALSE(re.Match("a")); - EXPECT_FALSE(re.Match("b")); - EXPECT_FALSE(re.Match("c")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("aa")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("`")); + EXPECT_TRUE(rs.Match("d")); + EXPECT_FALSE(rs.Match("a")); + EXPECT_FALSE(rs.Match("b")); + EXPECT_FALSE(rs.Match("c")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("aa")); } TEST(Regex, CharacterRange5) { Regex re("[-]"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("-")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("a")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("-")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("a")); } TEST(Regex, CharacterRange6) { Regex re("[a-]"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("a")); - EXPECT_TRUE(re.Match("-")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("`")); - EXPECT_FALSE(re.Match("b")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("a")); + EXPECT_TRUE(rs.Match("-")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("`")); + EXPECT_FALSE(rs.Match("b")); } TEST(Regex, CharacterRange7) { Regex re("[-a]"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("a")); - EXPECT_TRUE(re.Match("-")); - EXPECT_FALSE(re.Match("")); - EXPECT_FALSE(re.Match("`")); - EXPECT_FALSE(re.Match("b")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("a")); + EXPECT_TRUE(rs.Match("-")); + EXPECT_FALSE(rs.Match("")); + EXPECT_FALSE(rs.Match("`")); + EXPECT_FALSE(rs.Match("b")); } TEST(Regex, CharacterRange8) { Regex re("[a-zA-Z0-9]*"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("Milo")); - EXPECT_TRUE(re.Match("MT19937")); - EXPECT_TRUE(re.Match("43")); - EXPECT_FALSE(re.Match("a_b")); - EXPECT_FALSE(re.Match("!")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("Milo")); + EXPECT_TRUE(rs.Match("MT19937")); + EXPECT_TRUE(rs.Match("43")); + EXPECT_FALSE(rs.Match("a_b")); + EXPECT_FALSE(rs.Match("!")); } TEST(Regex, Search) { Regex re("abc"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Search("abc")); - EXPECT_TRUE(re.Search("_abc")); - EXPECT_TRUE(re.Search("abc_")); - EXPECT_TRUE(re.Search("_abc_")); - EXPECT_TRUE(re.Search("__abc__")); - EXPECT_TRUE(re.Search("abcabc")); - EXPECT_FALSE(re.Search("a")); - EXPECT_FALSE(re.Search("ab")); - EXPECT_FALSE(re.Search("bc")); - EXPECT_FALSE(re.Search("cba")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Search("abc")); + EXPECT_TRUE(rs.Search("_abc")); + EXPECT_TRUE(rs.Search("abc_")); + EXPECT_TRUE(rs.Search("_abc_")); + EXPECT_TRUE(rs.Search("__abc__")); + EXPECT_TRUE(rs.Search("abcabc")); + EXPECT_FALSE(rs.Search("a")); + EXPECT_FALSE(rs.Search("ab")); + EXPECT_FALSE(rs.Search("bc")); + EXPECT_FALSE(rs.Search("cba")); } TEST(Regex, Search_BeginAnchor) { Regex re("^abc"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Search("abc")); - EXPECT_TRUE(re.Search("abc_")); - EXPECT_TRUE(re.Search("abcabc")); - EXPECT_FALSE(re.Search("_abc")); - EXPECT_FALSE(re.Search("_abc_")); - EXPECT_FALSE(re.Search("a")); - EXPECT_FALSE(re.Search("ab")); - EXPECT_FALSE(re.Search("bc")); - EXPECT_FALSE(re.Search("cba")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Search("abc")); + EXPECT_TRUE(rs.Search("abc_")); + EXPECT_TRUE(rs.Search("abcabc")); + EXPECT_FALSE(rs.Search("_abc")); + EXPECT_FALSE(rs.Search("_abc_")); + EXPECT_FALSE(rs.Search("a")); + EXPECT_FALSE(rs.Search("ab")); + EXPECT_FALSE(rs.Search("bc")); + EXPECT_FALSE(rs.Search("cba")); } TEST(Regex, Search_EndAnchor) { Regex re("abc$"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Search("abc")); - EXPECT_TRUE(re.Search("_abc")); - EXPECT_TRUE(re.Search("abcabc")); - EXPECT_FALSE(re.Search("abc_")); - EXPECT_FALSE(re.Search("_abc_")); - EXPECT_FALSE(re.Search("a")); - EXPECT_FALSE(re.Search("ab")); - EXPECT_FALSE(re.Search("bc")); - EXPECT_FALSE(re.Search("cba")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Search("abc")); + EXPECT_TRUE(rs.Search("_abc")); + EXPECT_TRUE(rs.Search("abcabc")); + EXPECT_FALSE(rs.Search("abc_")); + EXPECT_FALSE(rs.Search("_abc_")); + EXPECT_FALSE(rs.Search("a")); + EXPECT_FALSE(rs.Search("ab")); + EXPECT_FALSE(rs.Search("bc")); + EXPECT_FALSE(rs.Search("cba")); } TEST(Regex, Search_BothAnchor) { Regex re("^abc$"); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Search("abc")); - EXPECT_FALSE(re.Search("")); - EXPECT_FALSE(re.Search("a")); - EXPECT_FALSE(re.Search("b")); - EXPECT_FALSE(re.Search("ab")); - EXPECT_FALSE(re.Search("abcd")); + RegexSearch rs(re); + EXPECT_TRUE(rs.Search("abc")); + EXPECT_FALSE(rs.Search("")); + EXPECT_FALSE(rs.Search("a")); + EXPECT_FALSE(rs.Search("b")); + EXPECT_FALSE(rs.Search("ab")); + EXPECT_FALSE(rs.Search("abcd")); } TEST(Regex, Escape) { const char* s = "\\^\\$\\|\\(\\)\\?\\*\\+\\.\\[\\]\\{\\}\\\\\\f\\n\\r\\t\\v[\\b][\\[][\\]]"; Regex re(s); ASSERT_TRUE(re.IsValid()); - EXPECT_TRUE(re.Match("^$|()?*+.[]{}\\\x0C\n\r\t\x0B\b[]")); - EXPECT_FALSE(re.Match(s)); // Not escaping + RegexSearch rs(re); + EXPECT_TRUE(rs.Match("^$|()?*+.[]{}\\\x0C\n\r\t\x0B\b[]")); + EXPECT_FALSE(rs.Match(s)); // Not escaping } TEST(Regex, Invalid) {