In [10]:
import re
import random
from graphviz import Digraph

# Read noun list and keep only alphabetic words
with open("brown_nouns.txt") as f:
    nouns = sorted(
        set(
            w.strip().lower()
            for w in f
            if w.strip() and re.match(r'^[a-zA-Z]+$', w.strip())
        )
    )

# Irregular plural mapping
irregular_plurals = {
    "men": "man",
    "women": "woman",
    "children": "child",
    "teeth": "tooth",
    "feet": "foot",
    "mice": "mouse",
    "geese": "goose",
    "oxen": "ox",
    "lice": "louse",
    "data": "datum",
    "indices": "index",
    "appendices": "appendix",
    "criteria": "criterion",
    "phenomena": "phenomenon",
    "buses": "bus",
    "bus": "bus"
}

# Regular plural → singular
def singularize(word):
    if word.endswith("ies") and len(word) > 3:
        return word[:-3] + "y"
    elif word.endswith("es") and len(word) > 2:
        return word[:-2]
    elif word.endswith("s") and len(word) > 1:
        return word[:-1]
    return word


In [11]:
results = []

for word in nouns:
    if word in irregular_plurals:  # irregular plural form
        root = irregular_plurals[word]
        results.append(f"{word} -> {root}+N+PL")
    elif word in irregular_plurals.values():  # irregular singular form
        plural = [pl for pl, sg in irregular_plurals.items() if sg == word][0]
        results.append(f"{word} -> {word}+N+SG")
        results.append(f"{plural} -> {word}+N+PL")
    else:
        root = singularize(word)
        if root != word:  # regular plural
            results.append(f"{word} -> {root}+N+PL")
        else:  # singular form
            results.append(f"{word} -> {word}+N+SG")
            # Generate plural using rules
            if word.endswith("y") and len(word) > 1 and word[-2] not in "aeiou":
                plural = word[:-1] + "ies"
            elif word.endswith(("s", "x", "z", "ch", "sh")):
                plural = word + "es"
            else:
                plural = word + "s"
            results.append(f"{plural} -> {word}+N+PL")

# Save results
with open("output2(2).txt", "w") as f:
    f.write("\n".join(results))

print(f" output.txt saved with {len(results)} entries")


 output.txt saved with 28262 entries


In [12]:
sample_words = random.sample(nouns, 10)

transitions = {}
accept_states = {}
edge_colors = {}
state_counter = 1

for word in sample_words:
    current_state = 0
    for i, ch in enumerate(word):
        is_last = (i == len(word) - 1)
        label = ch
        color = "black"

        if is_last:
            root = singularize(word)
            if root != word:
                if word.endswith("ies"):
                    label = f"{ch} / replace ies→y, +N+PL"
                    color = "blue"
                elif word.endswith("es"):
                    label = f"{ch} / remove es, +N+PL"
                    color = "red"
                elif word.endswith("s"):
                    label = f"{ch} / remove s, +N+PL"
                    color = "green"
            else:
                label = f"{ch} / +N+SG"

        key = (current_state, label)
        if key not in transitions:
            transitions[key] = state_counter
            edge_colors[key] = color
            current_state = state_counter
            state_counter += 1
        else:
            current_state = transitions[key]
    accept_states[current_state] = True


In [13]:
dot = Digraph(name="lab2(2)dfa", format='png')
dot.attr(rankdir='LR', size='12,6')

# State names q0, q1...
state_names = {num: f"q{num}" for num in set([0] + list(transitions.values()))}

# Draw states
for num, name in state_names.items():
    if num in accept_states:
        dot.node(name, shape='doublecircle')
    else:
        dot.node(name, shape='circle')

# Start arrow
dot.node('', shape='plaintext', label='')
dot.edge('', 'q0')

# Draw edges
for (state, label), next_state in transitions.items():
    dot.edge(state_names[state], state_names[next_state], label=label, color=edge_colors.get((state, label), "black"))

dot.render("lab(2)dfa", cleanup=True)
print(" DFA saved as lab2(2)dfa.png")


 DFA saved as lab2(2)dfa.png
