Skip to content

Commit

Permalink
More tests passing
Browse files Browse the repository at this point in the history
  • Loading branch information
waylan committed Mar 14, 2019
1 parent 36949bf commit 3b05be9
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 43 deletions.
6 changes: 3 additions & 3 deletions markdown/blockprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,11 +278,12 @@ def run(self, parent, blocks):

class RawHtmlProcessor(BlockProcessor):

TAG_RE = re.compile(r'(^|\n)[ ]{0,3}\<(?P<tag>[^<> ]+)[^<>]*>')
TAG_RE = re.compile(r'(^|\n)[ ]{0,3}<([?!].*?|(?P<tag>[^<> ]+)[^<>]*)>', re.S | re.U)

def test(self, parent, block):
m = self.TAG_RE.search(block)
return m and self.parser.md.is_block_level(m.group('tag'))
# If m but no 'tag', then we have a comment, declaration, or processing instruction.
return m and (self.parser.md.is_block_level(m.group('tag')) or not m.group('tag'))

def run(self, parent, blocks):
parser = HTMLExtractor(md=self.parser.md)
Expand All @@ -292,7 +293,6 @@ def run(self, parent, blocks):
break
parser.close()
# Insert Markdown back into blocks with raw HTML extracted.
print parser.cleandoc
parts = ''.join(parser.cleandoc).split('\n\n')
parts.reverse()
for block in parts:
Expand Down
25 changes: 20 additions & 5 deletions markdown/htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,27 @@ def handle_data(self, data):
else:
self.cleandoc.append(data)

def handle_comment(self, data):
text = '<!--{}-->'.format(data)
def handle_empty_tag(self, data):
""" Handle empty tags (`<data>`). """
line, col = self.getpos()
if self.inraw:
# Append this to the existing raw block
self._cache.append(text)
else:
self._cache.append(data)
elif col < 4:
# Handle this as a standalone raw block
self.cleandoc.append(self.md.htmlStash.store(text))
self.cleandoc.append(self.md.htmlStash.store(data))
else:
# Presumably part of a code block.
self.cleandoc.append(data)

def handle_comment(self, data):
self.handle_empty_tag('<!--{}-->'.format(data))

def handle_decl(self, data):
self.handle_empty_tag('<!{}>'.format(data))

def handle_pi(self, data):
self.handle_empty_tag('<?{}>'.format(data))

def handle_unknown_decl(self, data):
self.handle_empty_tag('<![{}]>'.format(data))
55 changes: 20 additions & 35 deletions tests/test_syntax/blocks/test_html_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,22 +483,12 @@ def test_raw_comment_one_line(self):
'<!-- *foo* -->'
)

# TODO: Decide behavior here. Python-Markdown current outputs:
#
# <!-- *foo* -->
# <p><em>bar</em></p>
#
# But the reference implementation outputs:
#
# <p><!-- *foo* --><em>bar</em></p>
#
# As the raw HTML is not alone on the line, the reference implementation
# considers it inline rather than block level. The behavior defined in
# the test below is from the CommonMark spec, which we don't follow.
# Note: this is a change in behavior for Python_markdown but matches the reference implementation.
# Previous output was `<!-- *foo* -->\n<p><em>bar</em></p>`. Browsers render both the same.
def test_raw_comment_one_line_followed_by_text(self):
self.assertMarkdownRenders(
'<!-- *foo* -->*bar*',
'<!-- *foo* -->*bar*'
'<p><!-- *foo* --><em>bar</em></p>'
)

def test_raw_multiline_comment(self):
Expand Down Expand Up @@ -581,6 +571,17 @@ def test_raw_comment_nested(self):
)
)

def test_comment_in_code_block(self):
self.assertMarkdownRenders(
' <!-- *foo* -->',
self.dedent(
"""
<pre><code>&lt;!-- *foo* --&gt;
</code></pre>
"""
)
)

def test_raw_processing_instruction_one_line(self):
self.assertMarkdownRenders(
"<?php echo '>';' ?>",
Expand Down Expand Up @@ -662,20 +663,12 @@ def test_raw_declaration_one_line(self):
'<!DOCTYPE html>'
)

# TODO: Decide correct behavior. This matches current behavior and Commonmark.
# The reference implementation considers this inline not block level:
#
# <p><!DOCTYPE html><em>bar</em></p>
#
# But most implementations do this instead:
#
# <p>&lt;!DOCTYPE html&gt;<em>bar</em></p>
#
# Either makes sense, but the later seems more correct to me.
# Note: this is a change in behavior for Python_markdown but matches the reference implementation.
# Previous output was `<!DOCTYPE html>*bar*`.
def test_raw_declaration_one_line_followed_by_text(self):
self.assertMarkdownRenders(
'<!DOCTYPE html>*bar*',
'<!DOCTYPE html>*bar*'
'<p><!DOCTYPE html><em>bar</em></p>'
)

def test_raw_multiline_declaration(self):
Expand All @@ -702,20 +695,12 @@ def test_raw_cdata_one_line(self):
'<![CDATA[ document.write(">"); ]]>'
)

# TODO: Decide correct behavior. This matches current behavior and Commonmark.
# The reference implementation considers this inline not block level:
#
# <p><![CDATA[ document.write(">"); ]]><em>bar</em></p>
#
# But most implementations do this instead:
#
# <p>&lt;[CDATA[ document.write(“&gt;”); ]]&gt;<em>bar</em></p>
#
# Either makes sense, but the later seems more correct to me.
# Note: this is a change in behavior for Python_markdown but matches the reference implementation.
# Previous output was `<![CDATA[ document.write(">"); ]]>*bar*`.
def test_raw_cdata_one_line_followed_by_text(self):
self.assertMarkdownRenders(
'<![CDATA[ document.write(">"); ]]>*bar*',
'<![CDATA[ document.write(">"); ]]>*bar*'
'<p><![CDATA[ document.write(">"); ]]><em>bar</em></p>'
)

def test_raw_multiline_cdata(self):
Expand Down

0 comments on commit 3b05be9

Please sign in to comment.