# String Algorithms and Tries
Objectives:
- KMP and Rabin-Karp pattern search
- Trie for prefix search + autocomplete
- Timing comparisons on varied patterns

In [None]:
# Starter imports
import random
from pathlib import Path

# TODO: add KMP/Rabin-Karp and trie implementations with timing

In [None]:
def kmp_prefix(pattern: str):
    pi = [0]*len(pattern)
    k = 0
    for i in range(1, len(pattern)):
        while k > 0 and pattern[k] != pattern[i]:
            k = pi[k-1]
        if pattern[k] == pattern[i]:
            k += 1
        pi[i] = k
    return pi

def kmp_search(text: str, pattern: str):
    if not pattern: return []
    pi = kmp_prefix(pattern)
    matches = []; k = 0
    for i, ch in enumerate(text):
        while k > 0 and pattern[k] != ch:
            k = pi[k-1]
        if pattern[k] == ch:
            k += 1
            if k == len(pattern):
                matches.append(i - k + 1)
                k = pi[k-1]
    return matches

def rabin_karp(text: str, pattern: str, base=256, mod=101):
    n, m = len(text), len(pattern)
    if m == 0 or m > n: return []
    h = pow(base, m-1, mod)
    p_hash = t_hash = 0
    for i in range(m):
        p_hash = (p_hash*base + ord(pattern[i])) % mod
        t_hash = (t_hash*base + ord(text[i])) % mod
    matches = []
    for i in range(n-m+1):
        if p_hash == t_hash and text[i:i+m] == pattern:
            matches.append(i)
        if i < n-m:
            t_hash = (t_hash - ord(text[i])*h) % mod
            t_hash = (t_hash*base + ord(text[i+m])) % mod
            t_hash %= mod
    return matches

kmp_search("ababcabcababd", "ababd"), rabin_karp("ababcabcababd", "ababd")

In [None]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.end = False

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        node = self.root
        for ch in word:
            node = node.children.setdefault(ch, TrieNode())
        node.end = True

    def _collect(self, node, prefix, out):
        if node.end: out.append(prefix)
        for ch, child in node.children.items():
            self._collect(child, prefix+ch, out)

    def autocomplete(self, prefix):
        node = self.root
        for ch in prefix:
            if ch not in node.children:
                return []
            node = node.children[ch]
        out = []
        self._collect(node, prefix, out)
        return out

trie = Trie()
for word in ["car", "card", "care", "cart", "dog", "dot"]:
    trie.insert(word)
trie.autocomplete("car")

In [None]:
import timeit, random, string

def random_text(n=10000):
    return "".join(random.choice(string.ascii_lowercase) for _ in range(n))

text = random_text(20000)
pattern = text[500:520]

bench = {
    "kmp": lambda: kmp_search(text, pattern),
    "rabin_karp": lambda: rabin_karp(text, pattern),
}
{k: timeit.timeit(v, number=3) for k, v in bench.items()}

Notes:
- KMP guarantees O(n + m) worst-case search; Rabin-Karp is good for multiple pattern hashes.
- Tries trade space for fast prefix queries; use when you need autocomplete or prefix stats.