Skip to content

Commit c33d17d

Browse files
committed
LibWeb: Fix tokenization of attributes with URL query strings in them
<a href="/foo&amp=bar"> was being tokenized into <a href="/foo&=bar">. The spec mentions this but I had overlooked it. The bug happens because we interpreted the "&amp" as a named character reference.
1 parent 6400122 commit c33d17d

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

Libraries/LibWeb/Parser/HTMLTokenizer.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,6 +1458,14 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
14581458
for (auto ch : match.value().entity)
14591459
m_temporary_buffer.append(ch);
14601460

1461+
if (consumed_as_part_of_an_attribute() && match.value().codepoints.last() != ';') {
1462+
auto next = peek_codepoint(0);
1463+
if (next.has_value() && (next.value() == '=' || isalnum(next.value()))) {
1464+
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
1465+
SWITCH_TO_RETURN_STATE;
1466+
}
1467+
}
1468+
14611469
if (consumed_as_part_of_an_attribute() && match.value().entity.ends_with(';')) {
14621470
auto next_codepoint = peek_codepoint(0);
14631471
if (next_codepoint.has_value() && next_codepoint.value() == '=') {

0 commit comments

Comments
 (0)