More tests passing

Python-Markdown · Mar 14, 2019 · 3b05be9 · 3b05be9
1 parent 36949bf
commit 3b05be9
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 43 deletions.
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
@@ -278,11 +278,12 @@ def run(self, parent, blocks):
 
 class RawHtmlProcessor(BlockProcessor):
 
-    TAG_RE = re.compile(r'(^|\n)[ ]{0,3}\<(?P<tag>[^<> ]+)[^<>]*>')
+    TAG_RE = re.compile(r'(^|\n)[ ]{0,3}<([?!].*?|(?P<tag>[^<> ]+)[^<>]*)>', re.S | re.U)
 
     def test(self, parent, block):
         m = self.TAG_RE.search(block)
-        return m and self.parser.md.is_block_level(m.group('tag'))
+        # If m but no 'tag', then we have a comment, declaration, or processing instruction.
+        return m and (self.parser.md.is_block_level(m.group('tag')) or not m.group('tag'))
 
     def run(self, parent, blocks):
         parser = HTMLExtractor(md=self.parser.md)
@@ -292,7 +293,6 @@ def run(self, parent, blocks):
                 break
         parser.close()
         # Insert Markdown back into blocks with raw HTML extracted.
-        print parser.cleandoc
         parts = ''.join(parser.cleandoc).split('\n\n')
         parts.reverse()
         for block in parts:

diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
@@ -98,12 +98,27 @@ def handle_data(self, data):
         else:
             self.cleandoc.append(data)
 
-    def handle_comment(self, data):
-        text = '<!--{}-->'.format(data)
+    def handle_empty_tag(self, data):
+        """ Handle empty tags (`<data>`). """
         line, col = self.getpos()
         if self.inraw:
             # Append this to the existing raw block
-            self._cache.append(text)
-        else:
+            self._cache.append(data)
+        elif col < 4:
             # Handle this as a standalone raw block
-            self.cleandoc.append(self.md.htmlStash.store(text))
+            self.cleandoc.append(self.md.htmlStash.store(data))
+        else:
+            # Presumably part of a code block.
+            self.cleandoc.append(data)
+
+    def handle_comment(self, data):
+        self.handle_empty_tag('<!--{}-->'.format(data))
+
+    def handle_decl(self, data):
+        self.handle_empty_tag('<!{}>'.format(data))
+
+    def handle_pi(self, data):
+        self.handle_empty_tag('<?{}>'.format(data))
+
+    def handle_unknown_decl(self, data):
+        self.handle_empty_tag('<![{}]>'.format(data))
diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py
@@ -483,22 +483,12 @@ def test_raw_comment_one_line(self):
             '<!-- *foo* -->'
         )
 
-    # TODO: Decide behavior here. Python-Markdown current outputs:
-    #
-    #   <!-- *foo* -->
-    #   <p><em>bar</em></p>
-    #
-    # But the reference implementation outputs:
-    #
-    #   <p><!-- *foo* --><em>bar</em></p>
-    #
-    # As the raw HTML is not alone on the line, the reference implementation
-    # considers it inline rather than block level. The behavior defined in
-    # the test below is from the CommonMark spec, which we don't follow.
+    # Note: this is a change in behavior for Python_markdown but matches the reference implementation.
+    # Previous output was `<!-- *foo* -->\n<p><em>bar</em></p>`. Browsers render both the same.
     def test_raw_comment_one_line_followed_by_text(self):
         self.assertMarkdownRenders(
             '<!-- *foo* -->*bar*',
-            '<!-- *foo* -->*bar*'
+            '<p><!-- *foo* --><em>bar</em></p>'
         )
 
     def test_raw_multiline_comment(self):
@@ -581,6 +571,17 @@ def test_raw_comment_nested(self):
             )
         )
 
+    def test_comment_in_code_block(self):
+        self.assertMarkdownRenders(
+            '    <!-- *foo* -->',
+            self.dedent(
+                """
+                <pre><code>&lt;!-- *foo* --&gt;
+                </code></pre>
+                """
+            )
+        )
+
     def test_raw_processing_instruction_one_line(self):
         self.assertMarkdownRenders(
             "<?php echo '>';' ?>",
@@ -662,20 +663,12 @@ def test_raw_declaration_one_line(self):
             '<!DOCTYPE html>'
         )
 
-    # TODO: Decide correct behavior. This matches current behavior and Commonmark.
-    # The reference implementation considers this inline not block level:
-    #
-    #   <p><!DOCTYPE html><em>bar</em></p>
-    #
-    # But most implementations do this instead:
-    #
-    #   <p>&lt;!DOCTYPE html&gt;<em>bar</em></p>
-    #
-    # Either makes sense, but the later seems more correct to me.
+    # Note: this is a change in behavior for Python_markdown but matches the reference implementation.
+    # Previous output was `<!DOCTYPE html>*bar*`.
     def test_raw_declaration_one_line_followed_by_text(self):
         self.assertMarkdownRenders(
             '<!DOCTYPE html>*bar*',
-            '<!DOCTYPE html>*bar*'
+            '<p><!DOCTYPE html><em>bar</em></p>'
         )
 
     def test_raw_multiline_declaration(self):
@@ -702,20 +695,12 @@ def test_raw_cdata_one_line(self):
             '<![CDATA[ document.write(">"); ]]>'
         )
 
-    # TODO: Decide correct behavior. This matches current behavior and Commonmark.
-    # The reference implementation considers this inline not block level:
-    #
-    #   <p><![CDATA[ document.write(">"); ]]><em>bar</em></p>
-    #
-    # But most implementations do this instead:
-    #
-    #   <p>&lt;[CDATA[ document.write(“&gt;”); ]]&gt;<em>bar</em></p>
-    #
-    # Either makes sense, but the later seems more correct to me.
+    # Note: this is a change in behavior for Python_markdown but matches the reference implementation.
+    # Previous output was `<![CDATA[ document.write(">"); ]]>*bar*`.
     def test_raw_cdata_one_line_followed_by_text(self):
         self.assertMarkdownRenders(
             '<![CDATA[ document.write(">"); ]]>*bar*',
-            '<![CDATA[ document.write(">"); ]]>*bar*'
+            '<p><![CDATA[ document.write(">"); ]]><em>bar</em></p>'
         )
 
     def test_raw_multiline_cdata(self):