forked from mseld2/ProgrammerCalculator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tokenizer.py
64 lines (56 loc) · 2.22 KB
/
tokenizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from token_type import TokenType
import collections
import re
from number import Number
Token = collections.namedtuple('Token', ['type', 'value'])
class Tokenizer(object):
'''
Tokenizer for programmer calculator
See: https://docs.python.org/3/library/re.html (Writing a Tokenizer)
Expected format for numbers
Decimal: \b([0-9]+)
Examples: 120, 290, 01
Binary: \b(b[0-1]+)
Examples: b10101 b001
Hex: \b(x[a-fA-F0-9]+)
Examples: xae99 x19ed x00FF
Octal: \b(o[0-7]+)
Examples: o1764 o001
'''
def __init__(self, expr):
self.expr = expr
self._tokenDefinitions = [
(TokenType.BINARY.name, r'\b(b[0-1]+)'),
(TokenType.HEX.name, r'\b(x[a-fA-F0-9]+)'),
(TokenType.OCTAL.name, r'\b(o[0-7]+)'),
(TokenType.DECIMAL.name, r'\b([0-9]+)'),
(TokenType.LPAREN.name, r'\('),
(TokenType.RPAREN.name, r'\)'),
(TokenType.INVERT.name, r'~'),
(TokenType.MULTIPLY.name, r'\*'),
(TokenType.DIVIDE.name, r'\/\/'),
(TokenType.REMAINDER.name, r'%'),
(TokenType.ADD.name, r'\+'),
(TokenType.MINUS.name, r'-'),
(TokenType.LSHIFT.name, r'\<\<'),
(TokenType.RSHIFT.name, r'\>\>'),
(TokenType.AND.name, r'\&'),
(TokenType.XOR.name, r'\^'),
(TokenType.OR.name, r'\|'),
(TokenType.SKIP.name, r'[ \t]+'),
(TokenType.ERROR.name, r'.')
]
def tokenize(self):
tokenRegex = '|'.join(f'(?P<%s>%s)' % pair for pair in self._tokenDefinitions)
for match in re.finditer(tokenRegex, self.expr):
kind = match.lastgroup
value = match.group()
position = match.start()
tokenTypeKind = TokenType[kind]
if tokenTypeKind == TokenType.SKIP:
continue
elif tokenTypeKind == TokenType.HEX or tokenTypeKind == TokenType.OCTAL or tokenTypeKind == TokenType.BINARY:
value = f'0{value}'
elif TokenType[kind] == TokenType.ERROR:
raise SyntaxError(f'Unexpected value {value} at position {position}')
yield Token(TokenType[kind], value)