Skip to content

Commit

Permalink
TOC fix for AtomicString handling (#934)
Browse files Browse the repository at this point in the history
Fixes #931.
  • Loading branch information
facelessuser authored Apr 6, 2020
1 parent 7c595e2 commit ada40c6
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 3 deletions.
6 changes: 5 additions & 1 deletion docs/change_log/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ title: Change Log
Python-Markdown Change Log
=========================

Feb 12, 2020: Released version 3.2.1 (a bug-fix release).
Under development: version 3.2.2 (a bug-fix release).

* Fixed issue where double escaped entities could end up in TOC.

Feb 12, 2020: Released version 3.2.1 (a bug-fix release).

* The `name` property in `toc_tokens` from the TOC extension now
escapes HTML special characters (`<`, `>`, and `&`).
Expand Down
17 changes: 15 additions & 2 deletions markdown/extensions/toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@

from . import Extension
from ..treeprocessors import Treeprocessor
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString
from ..postprocessors import UnescapePostprocessor
import re
import html
import unicodedata
import xml.etree.ElementTree as etree

Expand All @@ -44,6 +45,18 @@ def unique(id, ids):
return id


def get_name(el):
"""Get title name."""

text = []
for c in el.itertext():
if isinstance(c, AtomicString):
text.append(html.unescape(c))
else:
text.append(c)
return ''.join(text).strip()


def stashedHTML2text(text, md, strip_entities=True):
""" Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
def _html_sub(m):
Expand Down Expand Up @@ -253,7 +266,7 @@ def run(self, doc):
self.set_level(el)
if int(el.tag[-1]) < self.toc_top or int(el.tag[-1]) > self.toc_bottom:
continue
text = ''.join(el.itertext()).strip()
text = get_name(el)

# Do not override pre-existing ids
if "id" not in el.attrib:
Expand Down
22 changes: 22 additions & 0 deletions tests/test_syntax/extensions/test_toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,28 @@ class TestTOC(TestCase):

# TODO: Move the rest of the TOC tests here.

def test_escaped_code(self):
self.assertMarkdownRenders(
self.dedent(
'''
[TOC]
# `<test>`
'''
),
self.dedent(
'''
<div class="toc">
<ul>
<li><a href="#test">&lt;test&gt;</a></li>
</ul>
</div>
<h1 id="test"><code>&lt;test&gt;</code></h1>
'''
),
extensions=['toc']
)

def test_escaped_char_in_id(self):
self.assertMarkdownRenders(
r'# escaped\_character',
Expand Down

0 comments on commit ada40c6

Please sign in to comment.