Skip to content

Commit

Permalink
Make type1font.py work better on Python 3.x
Browse files Browse the repository at this point in the history
Communicate token types as objects instead of strings from the
tokenizer to the parser. Use proper Unicode strings for string-like
data in the font properties. Fix the handling of delimiters.

Resolves matplotlib#3049.
  • Loading branch information
jkseppan committed Jul 9, 2014
1 parent 894f3df commit 1bb09ff
Showing 1 changed file with 36 additions and 25 deletions.
61 changes: 36 additions & 25 deletions lib/matplotlib/type1font.py
Expand Up @@ -141,30 +141,37 @@ def _split(self, data):

return data[:len1], binary, data[idx:]

_whitespace = re.compile(br'[\0\t\r\014\n ]+')
_token = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+')
_comment = re.compile(br'%[^\r\n\v]*')
_instring = re.compile(br'[()\\]')
_whitespace_re = re.compile(br'[\0\t\r\014\n ]+')
_token_re = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+')
_comment_re = re.compile(br'%[^\r\n\v]*')
_instring_re = re.compile(br'[()\\]')

# token types
_whitespace = object()
_name = object()
_string = object()
_delimiter = object()
_number = object()

@classmethod
def _tokens(cls, text):
"""
A PostScript tokenizer. Yield (token, value) pairs such as
('whitespace', ' ') or ('name', '/Foobar').
(cls._whitespace, ' ') or (cls._name, '/Foobar').
"""
pos = 0
while pos < len(text):
match = (cls._comment.match(text[pos:]) or
cls._whitespace.match(text[pos:]))
match = (cls._comment_re.match(text[pos:]) or
cls._whitespace_re.match(text[pos:]))
if match:
yield ('whitespace', match.group())
yield (cls._whitespace, match.group())
pos += match.end()
elif text[pos] == '(':
start = pos
pos += 1
depth = 1
while depth:
match = cls._instring.search(text[pos:])
match = cls._instring_re.search(text[pos:])
if match is None:
return
pos += match.end()
Expand All @@ -174,25 +181,25 @@ def _tokens(cls, text):
depth -= 1
else: # a backslash - skip the next character
pos += 1
yield ('string', text[start:pos])
yield (cls._string, text[start:pos])
elif text[pos:pos + 2] in ('<<', '>>'):
yield ('delimiter', text[pos:pos + 2])
yield (cls._delimiter, text[pos:pos + 2])
pos += 2
elif text[pos] == '<':
start = pos
pos += text[pos:].index('>')
yield ('string', text[start:pos])
yield (cls._string, text[start:pos])
else:
match = cls._token.match(text[pos:])
match = cls._token_re.match(text[pos:])
if match:
try:
float(match.group())
yield ('number', match.group())
yield (cls._number, match.group())
except ValueError:
yield ('name', match.group())
yield (cls._name, match.group())
pos += match.end()
else:
yield ('delimiter', text[pos])
yield (cls._delimiter, text[pos:pos + 1])
pos += 1

def _parse(self):
Expand All @@ -205,26 +212,30 @@ def _parse(self):
prop = {'weight': 'Regular', 'ItalicAngle': 0.0, 'isFixedPitch': False,
'UnderlinePosition': -100, 'UnderlineThickness': 50}
tokenizer = self._tokens(self.parts[0])
filtered = filter(lambda x: x[0] != 'whitespace', tokenizer)
filtered = filter(lambda x: x[0] != self._whitespace, tokenizer)
# The spec calls this an ASCII format; in Python 2.x we could
# just treat the strings and names as opaque bytes but let's
# turn them into proper Unicode, and be lenient in case of high bytes.
convert = lambda x: x.decode('ascii', errors='replace')
for token, value in filtered:
if token == b'name' and value.startswith(b'/'):
key = value[1:]
if token is self._name and value.startswith(b'/'):
key = convert(value[1:])
token, value = next(filtered)
if token == b'name':
if token is self._name:
if value in (b'true', b'false'):
value = value == b'true'
else:
value = value.lstrip(b'/')
elif token == b'string':
value = value.lstrip(b'(').rstrip(b')')
elif token == b'number':
value = convert(value.lstrip(b'/'))
elif token is self._string:
value = convert(value.lstrip(b'(').rstrip(b')'))
elif token is self._number:
if b'.' in value:
value = float(value)
else:
value = int(value)
else: # more complicated value such as an array
value = None
if key != b'FontInfo' and value is not None:
if key != 'FontInfo' and value is not None:
prop[key] = value

# Fill in the various *Name properties
Expand Down

0 comments on commit 1bb09ff

Please sign in to comment.