Skip to content

Commit cdec23a

Browse files
alimpfardawesomekling
authored andcommitted
LibRegex: Treat \<ORD_CHAR> as unescaped in POSIX BRE/ERE
This is undefined according to the spec, but glibc ignores the backslash and some applications seem to prefer this behaviour (e.g. sed).
1 parent ce186dc commit cdec23a

File tree

1 file changed

+25
-2
lines changed

1 file changed

+25
-2
lines changed

Userland/Libraries/LibRegex/RegexParser.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ ALWAYS_INLINE bool Parser::match_ordinary_characters()
205205
// NOTE: This method must not be called during bracket and repetition parsing!
206206
// FIXME: Add assertion for that?
207207
auto type = m_parser_state.current_token.type();
208-
return (type == TokenType::Char
208+
return ((type == TokenType::Char && m_parser_state.current_token.value() != "\\"sv) // NOTE: Backslash will only be matched as 'char' if it does not form a valid escape.
209209
|| type == TokenType::Comma
210210
|| type == TokenType::Slash
211211
|| type == TokenType::EqualSign
@@ -529,8 +529,23 @@ bool PosixBasicParser::parse_one_char_or_collation_element(ByteCode& bytecode, s
529529
back(2);
530530
}
531531

532+
if (match(TokenType::Char)) {
533+
auto ch = consume().value()[0];
534+
if (ch == '\\') {
535+
if (m_parser_state.regex_options.has_flag_set(AllFlags::Extra))
536+
return set_error(Error::InvalidPattern);
537+
538+
// This was \<ORD_CHAR>, the spec does not define any behaviour for this but glibc regex ignores it - and so do we.
539+
return true;
540+
}
541+
542+
bytecode.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)ch } });
543+
match_length_minimum += 1;
544+
return true;
545+
}
546+
532547
// None of these are special in BRE.
533-
if (match(TokenType::Char) || match(TokenType::Questionmark) || match(TokenType::RightParen) || match(TokenType::HyphenMinus)
548+
if (match(TokenType::Questionmark) || match(TokenType::RightParen) || match(TokenType::HyphenMinus)
534549
|| match(TokenType::Circumflex) || match(TokenType::RightCurly) || match(TokenType::Comma) || match(TokenType::Colon)
535550
|| match(TokenType::Dollar) || match(TokenType::EqualSign) || match(TokenType::LeftCurly) || match(TokenType::LeftParen)
536551
|| match(TokenType::Pipe) || match(TokenType::Slash) || match(TokenType::RightBracket) || match(TokenType::RightParen)) {
@@ -721,6 +736,14 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
721736
break;
722737
}
723738

739+
if (m_parser_state.current_token.value() == "\\"sv) {
740+
if (m_parser_state.regex_options.has_flag_set(AllFlags::Extra))
741+
return set_error(Error::InvalidPattern);
742+
743+
consume();
744+
continue;
745+
}
746+
724747
if (match_repetition_symbol())
725748
return set_error(Error::InvalidRepetitionMarker);
726749

0 commit comments

Comments
 (0)