Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Moar speed: fast path for :;{}()[] tokens

  • Loading branch information...
commit 5d33b50d8d8148ccf9dfa4ab20a76b6df0e3337d 1 parent cffc0ea
@SimonSapin authored
Showing with 53 additions and 45 deletions.
  1. +21 −15 tinycss/speedups.pyx
  2. +14 −17 tinycss/token_data.py
  3. +18 −13 tinycss/tokenizer.py
View
36 tinycss/speedups.pyx
@@ -89,22 +89,28 @@ def tokenize_flat(css_source, int ignore_comments=1):
tokens = []
while pos < source_len:
- for type_ in xrange(n_tokens):
- type_name, regexp = compiled_tokens[type_]
- match = regexp(css_source, pos)
- if match:
- # First match is the longest. See comments on TOKENS above.
- css_value = match.group()
- break
+ char = css_source[pos]
+ if char in ':;{}()[]':
+ type_ = -1 # not parsed further anyway
+ type_name = char
+ css_value = char
else:
- # No match.
- # "Any other character not matched by the above rules,
- # and neither a single nor a double quote."
- # ... but quotes at the start of a token are always matched
- # by STRING or BAD_STRING. So DELIM is any single character.
- type_ = DELIM
- type_name = 'DELIM'
- css_value = css_source[pos]
+ for type_ in xrange(n_tokens):
+ type_name, regexp = compiled_tokens[type_]
+ match = regexp(css_source, pos)
+ if match:
+ # First match is the longest. See comments on TOKENS above.
+ css_value = match.group()
+ break
+ else:
+ # No match.
+ # "Any other character not matched by the above rules,
+ # and neither a single nor a double quote."
+ # ... but quotes at the start of a token are always matched
+ # by STRING or BAD_STRING. So DELIM is any single character.
+ type_ = DELIM
+ type_name = 'DELIM'
+ css_value = char
length = len(css_value)
next_pos = pos + length
View
31 tinycss/token_data.py
@@ -118,29 +118,26 @@
def _init():
"""Import-time initialization."""
COMPILED_MACROS.clear()
- # Formatter is broken on PyPy: https://bugs.pypy.org/issue1081
-# expand_macros = functools.partial(
-# Formatter().vformat, args=(), kwargs=COMPILED_MACROS)
-
for line in MACROS.splitlines():
if line.strip():
name, value = line.split('\t')
COMPILED_MACROS[name.strip()] = '(?:%s)' \
% value.format(**COMPILED_MACROS)
- del COMPILED_TOKEN_REGEXPS[:]
- for line in TOKENS.splitlines():
- if line.strip():
- name, value = line.split('\t')
- COMPILED_TOKEN_REGEXPS.append((
- name.strip(),
- re.compile(
- value.format(**COMPILED_MACROS),
- # Case-insensitive when matching eg. uRL(foo)
- # but preserve the case in extracted groups
- re.I
- ).match
- ))
+ COMPILED_TOKEN_REGEXPS[:] = (
+ (
+ name.strip(),
+ re.compile(
+ value.format(**COMPILED_MACROS),
+ # Case-insensitive when matching eg. uRL(foo)
+ # but preserve the case in extracted groups
+ re.I
+ ).match
+ )
+ for line in TOKENS.splitlines()
+ if line.strip()
+ for name, value in [line.split('\t')]
+ )
COMPILED_TOKEN_INDEXES.clear()
for i, (name, regexp) in enumerate(COMPILED_TOKEN_REGEXPS):
View
31 tinycss/tokenizer.py
@@ -48,20 +48,25 @@ def tokenize_flat(css_source, ignore_comments=True,
source_len = len(css_source)
tokens = []
while pos < source_len:
- for type_, regexp in compiled_tokens:
- match = regexp(css_source, pos)
- if match:
- # First match is the longest. See comments on TOKENS above.
- css_value = match.group()
- break
+ char = css_source[pos]
+ if char in ':;{}()[]':
+ type_ = char
+ css_value = char
else:
- # No match.
- # "Any other character not matched by the above rules,
- # and neither a single nor a double quote."
- # ... but quotes at the start of a token are always matched
- # by STRING or BAD_STRING. So DELIM is any single character.
- type_ = 'DELIM'
- css_value = css_source[pos]
+ for type_, regexp in compiled_tokens:
+ match = regexp(css_source, pos)
+ if match:
+ # First match is the longest. See comments on TOKENS above.
+ css_value = match.group()
+ break
+ else:
+ # No match.
+ # "Any other character not matched by the above rules,
+ # and neither a single nor a double quote."
+ # ... but quotes at the start of a token are always matched
+ # by STRING or BAD_STRING. So DELIM is any single character.
+ type_ = 'DELIM'
+ css_value = char
length = len(css_value)
next_pos = pos + length
Please sign in to comment.
Something went wrong with that request. Please try again.