-
Notifications
You must be signed in to change notification settings - Fork 18
/
trie.py
60 lines (45 loc) · 1.96 KB
/
trie.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from collections import defaultdict
class Leaf: # pylint: disable=too-few-public-methods,missing-class-docstring
def __init__(self):
self.payloads = []
self.children = defaultdict(Leaf)
class Trie:
"""
`Trie <https://en.wikipedia.org/wiki/Trie>`_ is a data structure for effective prefix search. It
is used in Spylls to store prefixes and suffixes. For example, if we have suffixes "s", "ions",
"ications", they are stored (reversed) this way:
.. code-block:: text
root
+-s ... metadata for suffix "s"
+-noi ... metadata for suffix "ions"
+-taci ... metadata for suffix "ications"
So, for the word "complications", we can receive all its possible suffixes (all three) in one
pass through trie.
**Important:** Profiling shows that search through Trie of suffixes/prefixes is the center of
Spylls performance, that's why it is very primitive and fast implementation instead of some
library like `pygtrie <https://github.com/google/pygtrie>`_. Probably, by choosing fast (C)
implementation of trie, the whole spylls can be make much faster.
"""
def __init__(self, data=None):
self.root = Leaf()
if data:
for key, val in data.items():
self.set(key, val)
def put(self, path, payload):
cur = self.root
for p in path:
cur = cur.children[p]
cur.payloads.append(payload)
def set(self, path, payloads):
cur = self.root
for p in path:
cur = cur.children[p]
cur.payloads = payloads
def lookup(self, path):
for _, leaf in self.traverse(self.root, path):
yield from leaf.payloads
def traverse(self, cur, path, traversed=[]):
yield (traversed, cur)
if not path or path[0] not in cur.children:
return
yield from self.traverse(cur.children[path[0]], path[1:], [*traversed, path[0]])