From e466f381d09692f484f8ff022273e2ac8cea0b16 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 3 Jan 2024 13:24:33 -0500 Subject: [PATCH] Fix handling of bogus comments. As with most implementations, we now pass through bogus comments (as defined by the HTML Spec) unaltered except that they are HTML escaped. This deviates from the reference implementation which completely ignores them. As the reference implementation seems to not have even contemplated their existence, it is not being used as a reference in this instance. Fixes #1425. --- docs/changelog.md | 1 + markdown/htmlparser.py | 9 +++++++++ tests/test_syntax/blocks/test_html_blocks.py | 16 ++++++++-------- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 84f0bfaa..53104f60 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Fix edge-case crash in `InlineProcessor` with `AtomicString` (#1406). * Fix edge-case crash in `codehilite` with an empty `code` tag (#1405). * Improve and expand type annotations in the code base (#1401). +* Fix handling of bogus comments (#1425). ## [3.5.1] -- 2023-10-31 diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 5155ef69..33b918d5 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -277,6 +277,15 @@ def parse_html_declaration(self, i: int) -> int: self.handle_data(' int: + # Override the default behavior so that bogus comments get passed + # through unaltered by setting `report` to `0` (see #1425). + pos = super().parse_bogus_comment(i, report) + if pos == -1: # pragma: no cover + return -1 + self.handle_empty_tag(self.rawdata[i:pos], is_block=False) + return pos + # The rest has been copied from base class in standard lib to address #1036. # As `__startag_text` is private, all references to it must be in this subclass. # The last few lines of `parse_starttag` are reversed so that `handle_starttag` diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py index 22b94983..85b0c48d 100644 --- a/tests/test_syntax/blocks/test_html_blocks.py +++ b/tests/test_syntax/blocks/test_html_blocks.py @@ -782,16 +782,16 @@ def test_raw_comment_trailing_whitespace(self): '' ) - # Note: this is a change in behavior for Python-Markdown, which does *not* match the reference - # implementation. However, it does match the HTML5 spec. Declarations must start with either - # `', - '' + '', + '

<!invalid>

' + ) + + def test_bogus_comment_endtag(self): + self.assertMarkdownRenders( + '', + '

</#invalid>

' ) def test_raw_multiline_comment(self):