In [None]:
! pip install automathon

Collecting automathon
  Downloading automathon-0.0.15-py3-none-any.whl.metadata (6.8 kB)
Collecting graphviz==0.16 (from automathon)
  Downloading graphviz-0.16-py2.py3-none-any.whl.metadata (7.1 kB)
Downloading automathon-0.0.15-py3-none-any.whl (13 kB)
Downloading graphviz-0.16-py2.py3-none-any.whl (19 kB)
Installing collected packages: graphviz, automathon
  Attempting uninstall: graphviz
    Found existing installation: graphviz 0.21
    Uninstalling graphviz-0.21:
      Successfully uninstalled graphviz-0.21
Successfully installed automathon-0.0.15 graphviz-0.16


In [None]:
# Install automathon first: pip install automathon

from automathon import DFA

# DFA definition
# States: q0 (start), q1 (valid), q_dead (invalid)
# Alphabet: lowercase letters a-z
# Transitions:
#   q0 --lowercase--> q1
#   q1 --lowercase--> q1
# Any other input -> q_dead

states = {'q0', 'q1'}
input_symbols = set('abcdefghijklmnopqrstuvwxyz')
transitions = {
    'q0': {ch: 'q1' for ch in input_symbols},
    'q1': {ch: 'q1' for ch in input_symbols}
}

# Start and final states
initial_state = 'q0'
final_states = {'q1'}

# Create DFA
dfa = DFA(states, input_symbols, transitions, initial_state, final_states)

# Function to check a word
def check_word(word):
    # First, reject if any char not lowercase a-z
    if not word or any(ch not in input_symbols for ch in word):
        return "Not Accepted"
    # Process in DFA
    if dfa.accept(word):
        return "Accepted"
    return "Not Accepted"

# Test cases
words = ["cat", "dog", "a", "zebra", "dog1", "1dog", "DogHouse", "Dog_house", " cats"]
for w in words:
    print(f"{w!r} -> {check_word(w)}")

# Visualization
dfa.view("dfa_diagram.png")  # Saves DFA diagram as PNG

'cat' -> Accepted
'dog' -> Accepted
'a' -> Accepted
'zebra' -> Accepted
'dog1' -> Not Accepted
'1dog' -> Not Accepted
'DogHouse' -> Not Accepted
'Dog_house' -> Not Accepted
' cats' -> Not Accepted


In [6]:
class MorphologicalFST:
    def __init__(self):
        self.states = {
            'START',
            'ROOT',
            'S_END',
            'Z_END',
            'X_END',
            'CH_END',
            'SH_END',
            'Y_END',
            'ACCEPT',
            'REJECT'
        }

        self.irregular_patterns = {
            'children': 'child',
            'feet': 'foot',
            'teeth': 'tooth',
            'men': 'man',
            'women': 'woman',
            'mice': 'mouse',
            'geese': 'goose',
            'people': 'person'
        }

        self.non_plural_es_words = {
            'analyses', 'bases', 'crises', 'diagnoses', 'hypotheses',
            'oases', 'parentheses', 'syntheses', 'theses'
        }

    def analyze_word(self, word):
        if not word or not word.isalpha():
            return f"{word}: Invalid Word"

        word = word.lower().strip()

        if word in self.irregular_patterns:
            root = self.irregular_patterns[word]
            return f"{word} = {root}+N+PL"

        analysis = self.analyze_plural_morphology(word)

        if analysis:
            return analysis
        else:
            return f"{word} = {word}+N+SG"

    def analyze_plural_morphology(self, word):
        if word.endswith('es') and len(word) > 2:
            if word in self.non_plural_es_words:
                return None

            root_candidate = word[:-2]

            if (root_candidate.endswith(('s', 'x', 'z')) or root_candidate.endswith(('ch', 'sh'))):
                return f"{word} = {root_candidate}+N+PL"

        if word.endswith('ies') and len(word)>3:
            root_candidate = word[:-3] + 'y'
            if not word.endswith('eies'):
                return f"{word} = {root_candidate}+N+PL"

        if word.endswith('s') and len(word) > 1:
            if not word.endswith(('es', 'ies')):
                root_candidate = word[:-1]

                if not self.is_naturally_s_ending(root_candidate, word):
                    return f"{word} = {root_candidate}+N+PL"

        return None

    def is_naturally_s_ending(self, root, word):
        naturally_s_ending = [
            'lens', 'bus', 'gas', 'glass', 'class', 'mass', 'pass', 'bass',
            'grass', 'dress', 'stress', 'press', 'chess', 'mess', 'less',
            'business', 'princess', 'process', 'success', 'access', 'address'
        ]

        if word in naturally_s_ending:
            return True

        if len(root) < 2:
            return True

        return False

    def process_corpus(self, filename):
        analyses = []
        try:
            with open(filename, 'r', encoding='utf-8') as file:
                for line in file:
                    words = line.strip().split()
                    for word in words:
                        clean_word = ''.join(c for c in word if c.isalpha())
                        if clean_word:
                            analysis = self.analyze_word(clean_word)
                            analyses.append(analysis)

        except FileNotFoundError:
            print(f"Error: File '{filename}' not found.")
            return []

        return analyses

def main():
    fst = MorphologicalFST()
    test_words = [
        # Rule 1: E insertion
        'foxes',      # fox + es
        'watches',    # watch + es
        'boxes',      # box + es
        'glasses',    # glass + es
        'wishes',     # wish + es

        # Rule 2: Y replacement
        'tries',      # try -> tries
        'flies',      # fly -> flies
        'babies',     # baby -> babies
        'cities',     # city -> cities

        # Rule 3: S addition
        'bags',       # bag + s
        'cats',       # cat + s
        'dogs',       # dog + s
        'books',      # book + s

        # Singular words
        'investigation',
        'primary',
        'election',
        'evidence',
        'jury',
        'manner',

        # Edge cases
        'fox',        # Should be singular
        'lens',       # Naturally ends in s
        'bus',        # Naturally ends in s

        # Invalid cases
        'foxs',       # Invalid plural
    ]

    for word in test_words:
        analysis=fst.analyze_word(word)
        print(analysis)

    analyses = fst.process_corpus("brown_nouns.txt")

    with open("output.txt", "w", encoding='utf-8') as f:
        for a in analyses:
            f.write(a+"\n")

if __name__ == "__main__":
    main()

foxes = fox+N+PL
watches = watch+N+PL
boxes = box+N+PL
glasses = glass+N+PL
wishes = wish+N+PL
tries = try+N+PL
flies = fly+N+PL
babies = baby+N+PL
cities = city+N+PL
bags = bag+N+PL
cats = cat+N+PL
dogs = dog+N+PL
books = book+N+PL
investigation = investigation+N+SG
primary = primary+N+SG
election = election+N+SG
evidence = evidence+N+SG
jury = jury+N+SG
manner = manner+N+SG
fox = fox+N+SG
lens = lens+N+SG
bus = bus+N+SG
foxs = fox+N+PL


In [2]:
import string

# Load all nouns from the brown_nouns.txt file
with open("/content/brown_nouns.txt") as f:
    nouns = [w.strip() for w in f if w.strip()]

# Utility function: check if letter is vowel
def is_vowel(ch):
    return ch in "aeiou"

# FST simulation
def analyze_word(word):
    # Reject if contains characters outside lowercase letters
    if not word.isalpha() or not word.islower():
        return "Invalid Word"

    # Singular: no plural suffix
    if not word.endswith('s'):
        return f"{word}+N+SG"

    # Plural candidates:
    if word.endswith("ies"):
        # Rule: Y replacement: consonant + y -> ies
        if len(word) >= 4 and not is_vowel(word[-4]):
            root = word[:-3] + "y"
            return f"{root}+N+PL"
        else:
            return "Invalid Word"

    elif word.endswith("es"):
        # Rule: E insertion: after s, z, x, ch, sh
        root = word[:-2]
        if root.endswith(("s", "x", "z", "ch", "sh")):
            return f"{root}+N+PL"
        else:
            return "Invalid Word"

    else:
        # Rule: Simple S addition
        root = word[:-1]
        # Reject if it should have used 'es' or 'ies'
        if root.endswith(("s", "x", "z", "ch", "sh")):
            return "Invalid Word"
        if len(root) >= 2 and root.endswith("y") and not is_vowel(root[-2]):
            return "Invalid Word"
        return f"{root}+N+PL"

# Test with given examples
examples = ["cat", "dog", "a", "zebra", "foxes", "fox", "foxs",
            "tries", "try", "trys", "toies", "bags"]

for w in examples:
    print(f"{w} -> {analyze_word(w)}")

# Process all nouns from the corpus
results = {w: analyze_word(w) for w in nouns}

# Save output for inspection
with open("noun_analysis.txt", "w") as f:
    for w, analysis in results.items():
        f.write(f"{w} = {analysis}\n")

print("\nAnalysis complete. Results saved to noun_analysis.txt")


cat -> cat+N+SG
dog -> dog+N+SG
a -> a+N+SG
zebra -> zebra+N+SG
foxes -> fox+N+PL
fox -> fox+N+SG
foxs -> Invalid Word
tries -> try+N+PL
try -> try+N+SG
trys -> Invalid Word
toies -> Invalid Word
bags -> bag+N+PL

Analysis complete. Results saved to noun_analysis.txt


In [None]:
from graphviz import Digraph

# Create Graphviz Digraph
fst = Digraph("PluralFST", format="png")
fst.attr(rankdir="LR", size="8")

# States
states = ["q0", "q_s", "q_es", "q_ies", "q_root_SG", "q_root_PL", "q_invalid"]
accept_states = ["q_root_SG", "q_root_PL"]

for s in states:
    if s in accept_states:
        fst.attr("node", shape="doublecircle", style="filled", fillcolor="lightgreen")
    elif s == "q_invalid":
        fst.attr("node", shape="doublecircle", style="filled", fillcolor="red")
    else:
        fst.attr("node", shape="circle", style="filled", fillcolor="lightblue")
    fst.node(s)

# Transitions
# Start transitions
fst.edge("q0", "q_s", label="s")
fst.edge("q0", "q_root_SG", label="a..z except s / emit root+N+SG")

# q_s transitions
fst.edge("q_s", "q_es", label="e")
fst.edge("q_s", "q_root_PL", label="letter not requiring es or ies / emit root+N+PL")
fst.edge("q_s", "q_invalid", label="x,z,s,ch,sh or y preceded by consonant")

# q_es transitions
fst.edge("q_es", "q_ies", label="i")
fst.edge("q_es", "q_root_PL", label="x,z,s,ch,sh / emit root+N+PL")
fst.edge("q_es", "q_invalid", label="otherwise")

# q_ies transitions
fst.edge("q_ies", "q_root_PL", label="consonant / root ends with y")
fst.edge("q_ies", "q_invalid", label="vowel before y")

# Root processing loops
fst.edge("q_root_SG", "q_root_SG", label="a..z / accumulate root")
fst.edge("q_root_PL", "q_root_PL", label="a..z / accumulate root")

# Invalid loops
fst.edge("q_invalid", "q_invalid", label="a..z")

# Save and render to file in /mnt/data
output_path = "plural_fst"
fst.render(output_path, cleanup=True)
print(f"FST diagram saved to {output_path}.png")


FST diagram saved to plural_fst.png
