In [1]:
from typing import Hashable


class Tree:
    def __repr__(self) -> str:
        def rec_print(node, level=0):
            line = "" if level == 0 else "┕━━━━ "
            ret = "\t" * level + line + node.symb + "\n"

            for _, child in node.next.items():
                ret += rec_print(child, level + 1)

            return ret

        return rec_print(self.root)


# Trie - w wariancie, w którym kolejne sufiksy dodawane są przez przeszukiwanie głowy od korzenia
# drzewa (1p.)
class Node:
    def __init__(self, symb):
        self.next: dict[Hashable, Node] = {}
        self.symb: str = symb


class Trie(Tree):
    def __init__(self, string):
        self.root = Node("")
        self.__build_trie(string)

    def __build_trie(self, string):
        for i in range(len(string)):
            node = self.root
            for c in string[i:]:
                if c in node.next:
                    node = node.next[c]
                else:
                    child = Node(symb=str(c))
                    node.next[c] = child
                    node = child


# Trie - w wariancie, w którym kolejne sufiksy dodawane są poprzez dodanie kolejnej litery tekstu
# (1p.)
class LinkNode(Node):
    def __init__(self, symb, link=None):
        super().__init__(symb)
        self.link: LinkNode = self if link is None else link


class OnlineTrie(Tree):
    def __init__(self, string):
        self.root = LinkNode("")
        self.__build_trie(string)

    def __build_trie(self, string):
        root = self.root
        deepest = LinkNode(symb=string[0], link=root)
        root.next[string[0]] = deepest

        for c in string[1:]:
            curr: LinkNode = deepest
            prev: LinkNode = None

            while c not in curr.next:
                new_node = LinkNode(symb=c)
                curr.next[c] = new_node

                if prev is not None:
                    prev.link = new_node

                prev = new_node
                curr = curr.link

            if curr is root and curr.next[c] == prev:
                prev.link = root
            else:
                prev.link = curr.next[c]

            deepest = deepest.next[c]


In [41]:
Trie("ababcd"+"$")


	┕━━━━ a
		┕━━━━ b
			┕━━━━ a
				┕━━━━ b
					┕━━━━ c
						┕━━━━ d
							┕━━━━ $
			┕━━━━ c
				┕━━━━ d
					┕━━━━ $
	┕━━━━ b
		┕━━━━ a
			┕━━━━ b
				┕━━━━ c
					┕━━━━ d
						┕━━━━ $
		┕━━━━ c
			┕━━━━ d
				┕━━━━ $
	┕━━━━ c
		┕━━━━ d
			┕━━━━ $
	┕━━━━ d
		┕━━━━ $
	┕━━━━ $

In [39]:
OnlineTrie("ababcd"+"$")


	┕━━━━ a
		┕━━━━ b
			┕━━━━ a
				┕━━━━ b
					┕━━━━ c
						┕━━━━ d
							┕━━━━ $
			┕━━━━ c
				┕━━━━ d
					┕━━━━ $
	┕━━━━ b
		┕━━━━ a
			┕━━━━ b
				┕━━━━ c
					┕━━━━ d
						┕━━━━ $
		┕━━━━ c
			┕━━━━ d
				┕━━━━ $
	┕━━━━ c
		┕━━━━ d
			┕━━━━ $
	┕━━━━ d
		┕━━━━ $
	┕━━━━ $

In [5]:
from dataclasses import dataclass


class CompressedNode:
    def __init__(self, l, r, parent=None):
        self.l: int = l
        self.r: int = r

        self.parent: CompressedNode = parent
        self.link: CompressedNode = None
        self.next: dict[str, CompressedNode] = {}

    def switch(self, ch):
        if ch in self.next:
            return self.next[ch]
        return None

    def len(self):
        return self.r - self.l


@dataclass
class State:
    node: CompressedNode
    pos: int


class CompressedTrie:
    def __init__(self, string):
        self.root = CompressedNode(0, 0)
        self.string = string
        self.__build_trie(string)

    def __build_trie(self, string):
        n: int = len(string)
        root: CompressedNode = self.root
        state: State = State(root, 0)

        def goto(st: State, l: int, r: int) -> State:
            while l < r:
                if st.pos == st.node.len():
                    st = State(st.node.switch(string[l]), 0)
                    if st.node == None:
                        return st
                else:
                    if string[st.node.l + st.pos] != string[l]:
                        return State(None, -1)
                    if r - l < st.node.len() - st.pos:
                        return State(st.node, st.pos + r - l)

                    l += st.node.len() - st.pos
                    st.pos = st.node.len()

            return st

        def split(st: State) -> CompressedNode:
            if st.pos == st.node.len():
                return st.node
            if st.pos == 0:
                return st.node.parent

            v = st.node
            new_node = CompressedNode(v.l, v.l + st.pos, v.parent)
            v.parent.next[string[v.l]] = new_node
            new_node.next[string[v.l + st.pos]] = st.node
            st.node.parent = new_node
            st.node.l += st.pos

            return new_node

        def get_link(v: CompressedNode) -> CompressedNode:
            nonlocal state, root

            if v.link != None:
                return v.link
            if v.parent == None:
                return root

            to = get_link(v.parent)
            v.link = split(
                goto(State(to, to.len()), v.l + (1 if v.parent is root else 0), v.r)
            )

            return v.link

        for pos in range(len(string)):
            while True:
                new_state: State = goto(state, pos, pos + 1)
                if new_state.node != None:
                    state = new_state
                    break

                mid = split(state)
                leaf = CompressedNode(pos, n, mid)
                mid.next[string[pos]] = leaf

                state.node = get_link(mid)
                state.pos = state.node.len()
                if mid is root:
                    break

    def __repr__(self) -> str:
        def rec(root, level=0):
            line = "" if level == 0 else "┕━━━━ "
            ret = "\t" * level + line + self.string[root.l : root.r] + "\n"

            for _, child in root.next.items():
                ret += rec(child, level + 1)

            return ret

        return rec(self.root)


In [34]:
CompressedTrie("ababcd"+"$")


	┕━━━━ ab
		┕━━━━ abcd$
		┕━━━━ cd$
	┕━━━━ b
		┕━━━━ abcd$
		┕━━━━ cd$
	┕━━━━ cd$
	┕━━━━ d$
	┕━━━━ $

In [38]:
from time import perf_counter

with open("1997_714_head.txt", "r") as file:
    string = file.read()


s = perf_counter()
t = CompressedTrie(string)
e = perf_counter()
print(len(string), round(e - s, 5), "s")


2538 0.00994 s
