Skip to content

Commit 6374332

Browse files
committed
Merge pull request matplotlib#3203 from jkseppan/issue-3049
Make type1font.py work better on Python 3.x
2 parents 3b714b8 + 1bb09ff commit 6374332

File tree

1 file changed

+36
-25
lines changed

1 file changed

+36
-25
lines changed

Diff for: lib/matplotlib/type1font.py

+36-25
Original file line numberDiff line numberDiff line change
@@ -141,30 +141,37 @@ def _split(self, data):
141141

142142
return data[:len1], binary, data[idx:]
143143

144-
_whitespace = re.compile(br'[\0\t\r\014\n ]+')
145-
_token = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+')
146-
_comment = re.compile(br'%[^\r\n\v]*')
147-
_instring = re.compile(br'[()\\]')
144+
_whitespace_re = re.compile(br'[\0\t\r\014\n ]+')
145+
_token_re = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+')
146+
_comment_re = re.compile(br'%[^\r\n\v]*')
147+
_instring_re = re.compile(br'[()\\]')
148+
149+
# token types
150+
_whitespace = object()
151+
_name = object()
152+
_string = object()
153+
_delimiter = object()
154+
_number = object()
148155

149156
@classmethod
150157
def _tokens(cls, text):
151158
"""
152159
A PostScript tokenizer. Yield (token, value) pairs such as
153-
('whitespace', ' ') or ('name', '/Foobar').
160+
(cls._whitespace, ' ') or (cls._name, '/Foobar').
154161
"""
155162
pos = 0
156163
while pos < len(text):
157-
match = (cls._comment.match(text[pos:]) or
158-
cls._whitespace.match(text[pos:]))
164+
match = (cls._comment_re.match(text[pos:]) or
165+
cls._whitespace_re.match(text[pos:]))
159166
if match:
160-
yield ('whitespace', match.group())
167+
yield (cls._whitespace, match.group())
161168
pos += match.end()
162169
elif text[pos] == '(':
163170
start = pos
164171
pos += 1
165172
depth = 1
166173
while depth:
167-
match = cls._instring.search(text[pos:])
174+
match = cls._instring_re.search(text[pos:])
168175
if match is None:
169176
return
170177
pos += match.end()
@@ -174,25 +181,25 @@ def _tokens(cls, text):
174181
depth -= 1
175182
else: # a backslash - skip the next character
176183
pos += 1
177-
yield ('string', text[start:pos])
184+
yield (cls._string, text[start:pos])
178185
elif text[pos:pos + 2] in ('<<', '>>'):
179-
yield ('delimiter', text[pos:pos + 2])
186+
yield (cls._delimiter, text[pos:pos + 2])
180187
pos += 2
181188
elif text[pos] == '<':
182189
start = pos
183190
pos += text[pos:].index('>')
184-
yield ('string', text[start:pos])
191+
yield (cls._string, text[start:pos])
185192
else:
186-
match = cls._token.match(text[pos:])
193+
match = cls._token_re.match(text[pos:])
187194
if match:
188195
try:
189196
float(match.group())
190-
yield ('number', match.group())
197+
yield (cls._number, match.group())
191198
except ValueError:
192-
yield ('name', match.group())
199+
yield (cls._name, match.group())
193200
pos += match.end()
194201
else:
195-
yield ('delimiter', text[pos])
202+
yield (cls._delimiter, text[pos:pos + 1])
196203
pos += 1
197204

198205
def _parse(self):
@@ -205,26 +212,30 @@ def _parse(self):
205212
prop = {'weight': 'Regular', 'ItalicAngle': 0.0, 'isFixedPitch': False,
206213
'UnderlinePosition': -100, 'UnderlineThickness': 50}
207214
tokenizer = self._tokens(self.parts[0])
208-
filtered = filter(lambda x: x[0] != 'whitespace', tokenizer)
215+
filtered = filter(lambda x: x[0] != self._whitespace, tokenizer)
216+
# The spec calls this an ASCII format; in Python 2.x we could
217+
# just treat the strings and names as opaque bytes but let's
218+
# turn them into proper Unicode, and be lenient in case of high bytes.
219+
convert = lambda x: x.decode('ascii', errors='replace')
209220
for token, value in filtered:
210-
if token == b'name' and value.startswith(b'/'):
211-
key = value[1:]
221+
if token is self._name and value.startswith(b'/'):
222+
key = convert(value[1:])
212223
token, value = next(filtered)
213-
if token == b'name':
224+
if token is self._name:
214225
if value in (b'true', b'false'):
215226
value = value == b'true'
216227
else:
217-
value = value.lstrip(b'/')
218-
elif token == b'string':
219-
value = value.lstrip(b'(').rstrip(b')')
220-
elif token == b'number':
228+
value = convert(value.lstrip(b'/'))
229+
elif token is self._string:
230+
value = convert(value.lstrip(b'(').rstrip(b')'))
231+
elif token is self._number:
221232
if b'.' in value:
222233
value = float(value)
223234
else:
224235
value = int(value)
225236
else: # more complicated value such as an array
226237
value = None
227-
if key != b'FontInfo' and value is not None:
238+
if key != 'FontInfo' and value is not None:
228239
prop[key] = value
229240

230241
# Fill in the various *Name properties

0 commit comments

Comments
 (0)