Skip to content

Commit

Permalink
Refactor abbr escaping
Browse files Browse the repository at this point in the history
A alternate fix to #1444. This does not exclude the use of carrots or square 
brackets in abbreviations. It still excludes backslashse, however. I played
with backslashes and it just doesn't make sense to support them as they 
have special meaning in the Markdown, not because of their use in regular
expressions.
  • Loading branch information
waylan committed Mar 8, 2024
1 parent e4ab4a6 commit 9edba85
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 33 deletions.
3 changes: 2 additions & 1 deletion docs/changelog.md
Expand Up @@ -34,7 +34,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Include `scripts/*.py` in the generated source tarballs (#1430).
* Ensure lines after heading in loose list are properly detabbed (#1443).
* Give smarty tree processor higher priority than toc (#1440).
* Explicitly omit carrot (`^`) and backslash (`\`) from abbreviations (#1444).
* Permit carrots (`^`) and square brackets (`]`) but explicitly exclude
backslashes (`\`) from abbreviations (#1444).

## [3.5.2] -- 2024-01-10

Expand Down
10 changes: 3 additions & 7 deletions docs/extensions/abbreviations.md
Expand Up @@ -36,13 +36,9 @@ will be rendered as:
is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.</p>
```

The following three characters are not permitted in an abbreviation. Any
abbreviation definitions which include one will not be recognized as an
abbreviation definition.

1. carrot (`^`)
2. backslash (`\`)
3. left square bracket (`]`)
The backslash (`\`) is not permitted in an abbreviation. Any abbreviation
definitions which include one or more backslashes between the square brackets
will not be recognized as an abbreviation definition.

Usage
-----
Expand Down
14 changes: 3 additions & 11 deletions markdown/extensions/abbr.py
Expand Up @@ -41,7 +41,7 @@ def extendMarkdown(self, md):
class AbbrPreprocessor(BlockProcessor):
""" Abbreviation Preprocessor - parse text for abbr references. """

RE = re.compile(r'^[*]\[(?P<abbr>[^\]\^\\]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)

def test(self, parent: etree.Element, block: str) -> bool:
return True
Expand Down Expand Up @@ -72,16 +72,8 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
return False

def _generate_pattern(self, text: str) -> str:
"""
Given a string, returns a regex pattern to match that string.
'HTML' -> r'(?P<abbr>\b[H][T][M][L]\b)'
Note: we force each char as a literal match via a character set (in brackets)
as we don't know what they will be beforehand.
"""
return f"(?P<abbr>\\b{ ''.join(f'[{ c }]' for c in text) }\\b)"
""" Given a string, returns a regex pattern to match that string. """
return f"(?P<abbr>\\b{ re.escape(text) }\\b)"


class AbbrInlineProcessor(InlineProcessor):
Expand Down
52 changes: 38 additions & 14 deletions tests/test_syntax/extensions/test_abbr.py
Expand Up @@ -24,6 +24,7 @@


class TestAbbr(TestCase):
maxDiff = None

default_kwargs = {'extensions': ['abbr']}

Expand Down Expand Up @@ -260,28 +261,19 @@ def test_abbr_single_quoted(self):
)
)

def test_abbr_ignore_special_chars(self):
def test_abbr_ignore_backslash(self):
self.assertMarkdownRenders(
self.dedent(
r"""
[^] [\\] [\]] []]
\\foo
*[^]: Not an abbreviation
*[\\]: Not an abbreviation
*[\]]: Not an abbreviation
*[]]: Not an abbreviation
*[\\foo]: Not an abbreviation
"""
),
self.dedent(
r"""
<p>[^] [\] []] []]</p>
<p>*[^]: Not an abbreviation</p>
<p>*[\]: Not an abbreviation</p>
<p>*[]]: Not an abbreviation</p>
<p>*[]]: Not an abbreviation</p>
<p>\foo</p>
<p>*[\foo]: Not an abbreviation</p>
"""
)
)
Expand All @@ -301,3 +293,35 @@ def test_abbr_hyphen(self):
"""
)
)

def test_abbr_carrot(self):
self.assertMarkdownRenders(
self.dedent(
"""
ABBR^abbr
*[ABBR^abbr]: Abbreviation
"""
),
self.dedent(
"""
<p><abbr title="Abbreviation">ABBR^abbr</abbr></p>
"""
)
)

def test_abbr_bracket(self):
self.assertMarkdownRenders(
self.dedent(
"""
ABBR]abbr
*[ABBR]abbr]: Abbreviation
"""
),
self.dedent(
"""
<p><abbr title="Abbreviation">ABBR]abbr</abbr></p>
"""
)
)

0 comments on commit 9edba85

Please sign in to comment.