/
symbol.py
117 lines (93 loc) · 3.1 KB
/
symbol.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""
@author wilkeraziz
"""
from weakref import WeakValueDictionary
import re
NT_RE = re.compile('\[(.+),([0-9]+)-([0-9]+)\]')
def is_terminal(symbol):
"""nonterminals are formatted as this: [X]"""
return not (symbol[0] == '[' and symbol[-1] == ']')
def is_nonterminal(symbol):
"""nonterminals are formatted as this: [X]"""
return symbol[0] == '[' and symbol[-1] == ']'
def make_nonterminal(symbol):
return '[{0}]'.format(symbol)
def make_terminal(symbol):
return symbol
def make_symbol(base_symbol, sfrom, sto):
if sfrom is None and sto is None:
return base_symbol
return base_symbol if is_terminal(base_symbol) else '[%s,%s-%s]' % (base_symbol[1:-1], sfrom, sto)
def parse_annotated_nonterminal(nt):
m = NT_RE.match(nt)
if m is None:
return nt, None, None
else:
return '[{0}]'.format(m.group(1)), int(m.group(2)), int(m.group(3))
def _make_symbol(base_symbol, sfrom, sto, structured=True):
if not structured:
return base_symbol if is_terminal(base_symbol) else '[%s,%s-%s]' % (base_symbol[1:-1], sfrom, sto)
else:
return base_symbol if is_terminal(base_symbol) else (base_symbol[1:-1], sfrom, sto)
class Terminal(object):
"""
Implements a terminal symbol. References to terminal symbols are managed by the Terminal class.
We use WeakValueDictionary for builtin reference counting.
>>> t1 = Terminal(1)
>>> t2 = Terminal(1)
>>> t1 == t2
True
>>> t1 is t2
True
>>> t3 = Terminal(2)
>>> t1 != t3
True
>>> id(t1) == id(t2) != id(t3)
True
>>> hash(t1) == hash(t2) != hash(t3)
True
>>> Terminal(10)
Terminal(10)
>>> Terminal('x')
Terminal('x')
"""
_vocabulary = WeakValueDictionary()
def __new__(cls, surface):
"""The surface has to be hashable"""
obj = Terminal._vocabulary.get(surface, None)
if not obj:
obj = object.__new__(cls)
Terminal._vocabulary[surface] = obj
obj._surface = surface
return obj
def __repr__(self):
return '%s(%s)' % (Terminal.__name__, repr(self._surface))
def __str__(self):
return str(self._surface)
class Nonterminal(object):
"""
Implements a nonterminal symbol. References to nonterminal symbols are managed by the Nonterminal class.
We use WeakValueDictionary for builtin reference counting.
>>> n1 = Nonterminal('S')
>>> n2 = Nonterminal('S')
>>> n3 = Nonterminal('X')
>>> n1 == n2 != n3
True
>>> n1 is n2 is not n3
True
>>> Nonterminal(('NP', 1, 2)) # a noun phrase spanning from 1 to 2
Nonterminal(('NP', 1, 2))
"""
_categories = WeakValueDictionary()
def __new__(cls, label):
"""The label has to be hashable"""
obj = Nonterminal._categories.get(label, None)
if not obj:
obj = object.__new__(cls)
Nonterminal._categories[label] = obj
obj._label = label
return obj
def __repr__(self):
return '%s(%s)' % (Nonterminal.__name__, repr(self._label))
def __str__(self):
return str(self._label)