Skip to content

Commit

Permalink
Optimize several regexes from quadratic time to linear time
Browse files Browse the repository at this point in the history
Part of the discussion in #798.

Signed-off-by: Anders Kaseorg <andersk@mit.edu>
  • Loading branch information
andersk authored and waylan committed Mar 7, 2019
1 parent 4b11593 commit cb47805
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
10 changes: 5 additions & 5 deletions markdown/inlinepatterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,10 @@ def build_inlinepatterns(md, **kwargs):
NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))'

# <http://www.123.com>
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>'
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>'

# <me@example.com>
AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'
AUTOMAIL_RE = r'<([^<> !]*@[^@<> ]*)>'

# <...>
HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)'
Expand Down Expand Up @@ -433,7 +433,7 @@ def get_stash(m):

class LinkInlineProcessor(InlineProcessor):
""" Return a link element from the given match. """
RE_LINK = re.compile(r'''\(\s*(?:(<.*?>)\s*(?:(['"])(.*?)\2\s*)?\))?''', re.DOTALL | re.UNICODE)
RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE)
RE_TITLE_CLEAN = re.compile(r'\s')

def handleMatch(self, m, data):
Expand Down Expand Up @@ -467,8 +467,8 @@ def getLink(self, data, index):
if m and m.group(1):
# Matches [Text](<link> "title")
href = m.group(1)[1:-1].strip()
if m.group(3):
title = m.group(3)
if m.group(2):
title = m.group(2)[1:-1]
index = m.end(0)
handled = True
elif m:
Expand Down
2 changes: 1 addition & 1 deletion tests/misc/html.html
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ <h1>Block level html</h1>
</div>

<p>And of course <script>blah</script>.</p>
<p><a href="script&gt;stuff&lt;/script">this <script>link</a></p>
<p><a href="&lt;script&gt;stuff&lt;/script&gt;">this <script>link</a></p>
<p>Some funky <x\]> inline stuff with markdown escaping syntax.</p>
<p><img scr="foo.png" title="Only one inline element on a line." /></p>
<p>And now a line with only an opening bracket:</p>
Expand Down

0 comments on commit cb47805

Please sign in to comment.