In [None]:
# ========================================
# NLP Assignment-2 : Prefix & Suffix Tries
# ========================================

# ---------- Trie Node ----------
class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end = False
        self.freq = 0  # frequency of words passing through this node


# ---------- Prefix Trie ----------
class PrefixTrie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word: str):
        node = self.root
        for ch in word:  # left-to-right insertion
            if ch not in node.children:
                node.children[ch] = TrieNode()
            node = node.children[ch]
            node.freq += 1
        node.is_end = True

    def find_stem_suffix(self, word: str):
        node = self.root
        stem, suffix = "", ""
        branch_point = 0

        for i, ch in enumerate(word):
            if ch in node.children:
                node = node.children[ch]
                # branching → multiple children
                if len(node.children) > 1:
                    branch_point = i + 1
            else:
                break

        stem = word[:branch_point]
        suffix = word[branch_point:]
        return stem, suffix


# ---------- Suffix Trie ----------
class SuffixTrie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word: str):
        node = self.root
        for ch in reversed(word):  # right-to-left insertion
            if ch not in node.children:
                node.children[ch] = TrieNode()
            node = node.children[ch]
            node.freq += 1
        node.is_end = True

    def find_stem_suffix(self, word: str):
        node = self.root
        stem, suffix = "", ""
        branch_point = 0

        rev_word = list(reversed(word))

        for i, ch in enumerate(rev_word):
            if ch in node.children:
                node = node.children[ch]
                if len(node.children) > 1:
                    branch_point = i + 1
            else:
                break

        # split original word using branch point
        stem = word[:len(word) - branch_point]
        suffix = word[len(word) - branch_point:]
        return stem, suffix


# ---------- Main Execution ----------
def main():
    # Load words dataset
    with open("brown_nouns.txt", "r") as f:
        words = [w.strip().lower() for w in f.readlines() if w.strip()]

    # Build Prefix Trie
    prefix_trie = PrefixTrie()
    for w in words:
        prefix_trie.insert(w)

    # Build Suffix Trie
    suffix_trie = SuffixTrie()
    for w in words:
        suffix_trie.insert(w)

    # Compare results
    print("=== Word Stemming Analysis ===\n")
    for w in words[:]:  # show first 30 words
        stem_p, suf_p = prefix_trie.find_stem_suffix(w)
        stem_s, suf_s = suffix_trie.find_stem_suffix(w)

        print(f"{w} (Prefix Trie) → {stem_p}+{suf_p}")
        print(f"{w} (Suffix Trie) → {stem_s}+{suf_s}\n")


if __name__ == "__main__":
    main()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
couple (Suffix Trie) → cou+ple

modifiers (Prefix Trie) → modifi+ers
modifiers (Suffix Trie) → mod+ifiers

headlinese (Prefix Trie) → headli+nese
headlinese (Suffix Trie) → headli+nese

journalism (Prefix Trie) → journalis+m
journalism (Suffix Trie) → jou+rnalism

variety (Prefix Trie) → variet+y
variety (Suffix Trie) → va+riety

ambiguities (Prefix Trie) → ambiguit+ies
ambiguities (Suffix Trie) → ambig+uities

matter (Prefix Trie) → matt+er
matter (Suffix Trie) → m+atter

newspaper (Prefix Trie) → newspaper+
newspaper (Suffix Trie) → news+paper

romance (Prefix Trie) → romance+
romance (Suffix Trie) → ro+mance

couple (Prefix Trie) → couple+
couple (Suffix Trie) → cou+ple

friendship (Prefix Trie) → friend+ship
friendship (Suffix Trie) → frien+dship

schooldays (Prefix Trie) → school+days
schooldays (Suffix Trie) → school+days

item (Prefix Trie) → item+
item (Suffix Trie) → i+tem

letters (Prefix Trie) → letter+s
letter

Prefix Trie → insert uses characters left-to-right.

Suffix Trie → insert uses characters right-to-left (reversed word).


## Final Report / Analysis

Prefix Trie:

- Captures common roots/prefixes.

- Works well when stem is at the start of word (English verbs, plurals like go → goes, going).

Suffix Trie:

- Captures common endings/suffixes.

- Works better for English since most morphology is suffix-based (-s, -es, -ing, -ed).

- Example: cats, dogs, bikes → suffix "s" is easily detected.

Conclusion:

- Both tries can split words into stem + suffix.

- But Suffix Trie performs better in stemming because English mainly modifies words at the end (plural, tense, derivation).

- Prefix Trie may mistakenly split too early in some cases (like players = play+ers instead of player+s).

- Therefore, for English stemming → Suffix Trie is more effective.