TOC fix for AtomicString handling (#934)

Fixes #931.
Python-Markdown · Apr 6, 2020 · ada40c6 · ada40c6
1 parent 7c595e2
commit ada40c6
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 3 deletions.
diff --git a/docs/change_log/index.md b/docs/change_log/index.md
@@ -3,7 +3,11 @@ title: Change Log
 Python-Markdown Change Log
 =========================
 
-Feb 12, 2020: Released version 3.2.1 (a bug-fix release). 
+Under development: version 3.2.2 (a bug-fix release).
+
+* Fixed issue where double escaped entities could end up in TOC.
+
+Feb 12, 2020: Released version 3.2.1 (a bug-fix release).
 
 * The `name` property in `toc_tokens` from the TOC extension now
   escapes HTML special characters (`<`, `>`, and `&`).

diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py
@@ -15,9 +15,10 @@
 
 from . import Extension
 from ..treeprocessors import Treeprocessor
-from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
+from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString
 from ..postprocessors import UnescapePostprocessor
 import re
+import html
 import unicodedata
 import xml.etree.ElementTree as etree
 
@@ -44,6 +45,18 @@ def unique(id, ids):
     return id
 
 
+def get_name(el):
+    """Get title name."""
+
+    text = []
+    for c in el.itertext():
+        if isinstance(c, AtomicString):
+            text.append(html.unescape(c))
+        else:
+            text.append(c)
+    return ''.join(text).strip()
+
+
 def stashedHTML2text(text, md, strip_entities=True):
     """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
     def _html_sub(m):
@@ -253,7 +266,7 @@ def run(self, doc):
                 self.set_level(el)
                 if int(el.tag[-1]) < self.toc_top or int(el.tag[-1]) > self.toc_bottom:
                     continue
-                text = ''.join(el.itertext()).strip()
+                text = get_name(el)
 
                 # Do not override pre-existing ids
                 if "id" not in el.attrib:

diff --git a/tests/test_syntax/extensions/test_toc.py b/tests/test_syntax/extensions/test_toc.py
@@ -27,6 +27,28 @@ class TestTOC(TestCase):
 
     # TODO: Move the rest of the TOC tests here.
 
+    def test_escaped_code(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                '''
+                [TOC]
+
+                # `<test>`
+                '''
+            ),
+            self.dedent(
+                '''
+                <div class="toc">
+                <ul>
+                <li><a href="#test">&lt;test&gt;</a></li>
+                </ul>
+                </div>
+                <h1 id="test"><code>&lt;test&gt;</code></h1>
+                '''
+            ),
+            extensions=['toc']
+        )
+
     def test_escaped_char_in_id(self):
         self.assertMarkdownRenders(
             r'# escaped\_character',