<a href="https://colab.research.google.com/github/MuhammadShavaiz/AI_learning/blob/main/NLP_Morphological_Analysis_and_Finite_State_Automata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  **Morphological Analysis and Finite State Automata**
**This notebook explores the transformation of English nouns and verbs into their plural and past tense forms using a finite state automaton (FSA). It applies regular and irregular morphological rules while categorizing the transformed words. Additionally, the notebook incorporates spaCy for detailed derivational morphology analysis, identifying word roots, suffixes, and parts of speech. The semantic role labeling further enriches the understanding of linguistic structure and meaning, highlighting the dynamic nature of word forms and their functions in sentences.**

## Noun Pluralization and Verb Tense Transformation

This code transforms English nouns into plurals and verbs into past tense, addressing regular and irregular cases. It processes lists based on word endings and predefined irregular forms, categorizing the results for clarity.








In [None]:
# Define the regular noun and verb rules
regular_rules = {
    'nouns': {
        's': lambda word: word + 's',  # Default rule for regular plurals
        'es': lambda word: word + 'es',  # Rule for nouns ending in s, x, z, sh, or ch
        'ies': lambda word: word[:-1] + 'ies',  # Rule for nouns ending in consonant + y
        'o_es': lambda word: word + 'es'
    },
    'verbs': {
        'ed': lambda word: word + 'ed',  # Default rule for regular past tense
        'ied': lambda word: word[:-1] + 'ied',  # Rule for verbs ending in consonant + y
        'd': lambda word: word + 'd'  # Rule for verbs ending in e
    }
}

# Define irregular plural and past tense forms
irregular_noun_forms = {
    'child': 'children',
    'foot': 'feet',
    'mouse': 'mice',
    'man': 'men',
    'woman':'women',
    'lead':'leaves',
    'goose':'geese',
    'tooth':'teeth',
    'cactus':'cacti',
    'leaf':'leaves'
}

irregular_verb_forms = {
    'go': 'went',
    'be': 'was',
    'leap': 'leapt',
    'have': 'had',
    'run':'ran'
}
# Nouns that take 'es' when pluralized and exceptions
nouns_ending_with_o = ['tomato', 'potato', 'hero', 'cargo', 'mango', 'volcano', 'buffalo', 'memento', 'tornado']

# Function to categorize nouns and verbs into regular/irregular forms
def categorize_words(nouns, verbs):
    regular_plurals = []
    irregular_plurals = []
    regular_past_tense = []
    irregular_past_tense = []

    # Process nouns
    for noun in nouns:
        if noun in irregular_noun_forms:
            irregular_plurals.append(irregular_noun_forms[noun])
        elif noun.endswith(('s', 'x', 'z', 'sh', 'ch')):
            irregular_plurals.append(regular_rules['nouns']['es'](noun))
        elif noun.endswith('y') and noun[-2] not in 'aeiou':
            regular_plurals.append(regular_rules['nouns']['ies'](noun))
        elif noun.endswith('o'):
            # Rule for nouns ending with 'o'
            if noun in nouns_ending_with_o:
                regular_plurals.append(regular_rules['nouns']['es'](noun))  # Take 'es' for specific nouns
            else:
                regular_plurals.append(regular_rules['nouns']['s'](noun))  # Otherwise just add 's'
        else:
            regular_plurals.append(regular_rules['nouns']['s'](noun))

    # Process verbs
    for verb in verbs:
        if verb in irregular_verb_forms:
            irregular_past_tense.append(irregular_verb_forms[verb])
        elif verb.endswith('y') and verb[-2] not in 'aeiou':
            regular_past_tense.append(regular_rules['verbs']['ied'](verb))
        elif verb.endswith('e'):
            regular_past_tense.append(regular_rules['verbs']['d'](verb))
        else:
            regular_past_tense.append(regular_rules['verbs']['ed'](verb))

    return {
        'Regular Plurals': regular_plurals,
        'Irregular Plurals': irregular_plurals,
        'Regular Past Tense': regular_past_tense,
        'Irregular Past Tense': irregular_past_tense
    }

# Sample input: a larger list of nouns and verbs
nouns = [
    'cat', 'dog', 'book', 'box', 'child', 'man', 'woman', 'car',
    'bus', 'city', 'baby', 'berry', 'lady', 'puppy', 'kitten',
    'fish', 'sheep', 'mouse', 'foot', 'leaf', 'goose', 'tooth',
    'cactus', 'lily', 'photo', 'church', 'dish',
    'class', 'family', 'party', 'key', 'toy', 'story', 'piano',
    'tomato', 'potato', 'memo', 'photo', 'cactus', 'zero'
]

verbs = [
    'walk', 'run', 'jump', 'like', 'talk', 'play', 'cry', 'try',
    'study', 'work', 'visit', 'enjoy', 'ask', 'watch', 'listen',
    'hurry', 'decide', 'create', 'paint', 'smile', 'call',
    'help', 'love', 'fix', 'climb','have', 'leap', 'dance', 'arrive',
    'cry', 'wash', 'carry', 'fill', 'start', 'finish',
    'answer', 'believe', 'change', 'look', 'decide',
    'need', 'want', 'open', 'close', 'bake', 'jump', 'paint',
    'discover','go','be'
]

# Get the categorized results
output = categorize_words(nouns, verbs)

# Print the results
for category, words in output.items():
    print(f"{category}: {words}")


Regular Plurals: ['cats', 'dogs', 'books', 'cars', 'cities', 'babies', 'berries', 'ladies', 'puppies', 'kittens', 'sheeps', 'lilies', 'photos', 'families', 'parties', 'keys', 'toys', 'stories', 'pianos', 'tomatoes', 'potatoes', 'memos', 'photos', 'zeros']
Irregular Plurals: ['boxes', 'children', 'men', 'women', 'buses', 'fishes', 'mice', 'feet', 'leaves', 'geese', 'teeth', 'cacti', 'churches', 'dishes', 'classes', 'cacti']
Regular Past Tense: ['walked', 'jumped', 'liked', 'talked', 'played', 'cried', 'tried', 'studied', 'worked', 'visited', 'enjoyed', 'asked', 'watched', 'listened', 'hurried', 'decided', 'created', 'painted', 'smiled', 'called', 'helped', 'loved', 'fixed', 'climbed', 'danced', 'arrived', 'cried', 'washed', 'carried', 'filled', 'started', 'finished', 'answered', 'believed', 'changed', 'looked', 'decided', 'needed', 'wanted', 'opened', 'closed', 'baked', 'jumped', 'painted', 'discovered']
Irregular Past Tense: ['ran', 'had', 'leapt', 'went', 'was']


##  Word Transformation Using Finite State Automaton (FSA)
The `WordTransformerFSA` class transforms English nouns into plural forms and verbs into past tense using a finite state automaton (FSA). It applies regular rules and handles irregular forms with predefined mappings. The class determines the appropriate transformation based on the word type. Test cases showcase the functionality for both nouns and verbs.

In [None]:
class WordTransformerFSA:
    def __init__(self):
        # Define the regular noun and verb rules
        self.regular_rules = {
            'nouns': {
                's': lambda word: word + 's',  # Default rule for regular plurals
                'es': lambda word: word + 'es',  # Rule for nouns ending in s, x, z, sh, or ch
                'ies': lambda word: word[:-1] + 'ies',  # Rule for nouns ending in consonant + y
                'o_es': lambda word: word + 'es'  # For specific nouns ending with 'o'
            },
            'verbs': {
                'ed': lambda word: word + 'ed',  # Default rule for regular past tense
                'ied': lambda word: word[:-1] + 'ied',  # Rule for verbs ending in consonant + y
                'd': lambda word: word + 'd'  # Rule for verbs ending in e
            }
        }

        # Define irregular plural and past tense forms
        self.irregular_noun_forms = {
            'child': 'children',
            'foot': 'feet',
            'mouse': 'mice',
            'man': 'men',
            'woman': 'women',
            'leaf': 'leaves',
            'goose': 'geese',
            'tooth': 'teeth',
            'cactus': 'cacti'
        }

        self.irregular_verb_forms = {
            'go': 'went',
            'be': 'was',
            'leap': 'leapt',
            'have': 'had',
            'run': 'ran'
        }

        # Nouns that take 'es' when pluralized and exceptions
        self.nouns_ending_with_o = ['tomato', 'potato', 'hero', 'cargo', 'mango', 'volcano', 'buffalo', 'memento', 'tornado']

        # Define the states for the FSA
        self.states = ['singular', 'plural', 'infinitive', 'past', 'irregular_plural', 'irregular_past']
        self.final_states = ['plural', 'past', 'irregular_plural', 'irregular_past']

    def transition(self, word, word_type):
        """
        Simulates the FSA transitions based on the word type and its morphological rules.
        """
        # Start at initial state
        current_state = 'singular' if word_type == 'noun' else 'infinitive'

        # Handle nouns
        if word_type == 'noun':
            if word in self.irregular_noun_forms:
                current_state = 'irregular_plural'
            else:
                current_state = 'plural'

        # Handle verbs
        elif word_type == 'verb':
            if word in self.irregular_verb_forms:
                current_state = 'irregular_past'
            else:
                current_state = 'past'

        return current_state

    def transform(self, word, word_type):
        """
        Transforms the word based on its type (noun or verb) and returns the plural or past form.
        """
        # Determine the transformation state
        current_state = self.transition(word, word_type)

        # Apply noun transformation rules
        if word_type == 'noun':
            if current_state == 'irregular_plural':
                return self.irregular_noun_forms[word]
            elif current_state == 'plural':
                if word.endswith(('s', 'x', 'z', 'sh', 'ch')):
                    return self.regular_rules['nouns']['es'](word)
                elif word.endswith('y') and word[-2] not in 'aeiou':
                    return self.regular_rules['nouns']['ies'](word)
                elif word.endswith('o'):
                    if word in self.nouns_ending_with_o:
                        return self.regular_rules['nouns']['es'](word)
                    else:
                        return self.regular_rules['nouns']['s'](word)
                else:
                    return self.regular_rules['nouns']['s'](word)

        # Apply verb transformation rules
        elif word_type == 'verb':
            if current_state == 'irregular_past':
                return self.irregular_verb_forms[word]
            elif current_state == 'past':
                if word.endswith('y') and word[-2] not in 'aeiou':
                    return self.regular_rules['verbs']['ied'](word)
                elif word.endswith('e'):
                    return self.regular_rules['verbs']['d'](word)
                else:
                    return self.regular_rules['verbs']['ed'](word)
        # Under above conditions this will not get triggered, change the rules for adding 's' for nouns and 'ed' for verbs to return unrecognized words
        return "Word not recognized or cannot be transformed."

# Instantiate the FSA
fsa = WordTransformerFSA()

# Test cases
nouns = [
    'cat', 'dog', 'book', 'box', 'child', 'man', 'woman', 'car',
    'bus', 'city', 'baby', 'berry', 'lady', 'puppy', 'kitten',
    'fish', 'sheep', 'mouse', 'foot', 'leaf', 'goose', 'tooth',
    'cactus', 'lily', 'photo', 'church', 'dish',
    'class', 'family', 'party', 'key', 'toy', 'story', 'piano',
    'tomato', 'potato', 'memo', 'photo', 'cactus', 'zero'
]

verbs = [
    'walk', 'run', 'jump', 'like', 'talk', 'play', 'cry', 'try',
    'study', 'work', 'visit', 'enjoy', 'ask', 'watch', 'listen',
    'hurry', 'decide', 'create', 'paint', 'smile', 'call',
    'help', 'love', 'fix', 'climb','have', 'leap', 'dance', 'arrive',
    'cry', 'wash', 'carry', 'fill', 'start', 'finish',
    'answer', 'believe', 'change', 'look', 'decide',
    'need', 'want', 'open', 'close', 'bake', 'jump', 'paint',
    'discover','go','be'
]

# Test noun transformations
print("Noun Transformations:")
for noun in nouns:
    print(f"{noun} -> {fsa.transform(noun, 'noun')}")

# Test verb transformations
print("\nVerb Transformations:")
for verb in verbs:
    print(f"{verb} -> {fsa.transform(verb, 'verb')}")


Noun Transformations:
cat -> cats
dog -> dogs
book -> books
box -> boxes
child -> children
man -> men
woman -> women
car -> cars
bus -> buses
city -> cities
baby -> babies
berry -> berries
lady -> ladies
puppy -> puppies
kitten -> kittens
fish -> fishes
sheep -> sheeps
mouse -> mice
foot -> feet
leaf -> leaves
goose -> geese
tooth -> teeth
cactus -> cacti
lily -> lilies
photo -> photos
church -> churches
dish -> dishes
class -> classes
family -> families
party -> parties
key -> keys
toy -> toys
story -> stories
piano -> pianos
tomato -> tomatoes
potato -> potatoes
memo -> memos
photo -> photos
cactus -> cacti
zero -> zeros

Verb Transformations:
walk -> walked
run -> ran
jump -> jumped
like -> liked
talk -> talked
play -> played
cry -> cried
try -> tried
study -> studied
work -> worked
visit -> visited
enjoy -> enjoyed
ask -> asked
watch -> watched
listen -> listened
hurry -> hurried
decide -> decided
create -> created
paint -> painted
smile -> smiled
call -> called
help -> helped
love

## Morphology and Semantic Role Analysis
This script leverages spaCy to perform detailed analysis on sentences, focusing on derivational morphology by identifying word roots, suffixes, and their parts of speech. Additionally, it assigns semantic roles (such as action, object, and subject) to each token, providing insights into the linguistic structure and meaning of the sentence. This dual analysis aids in understanding how words change form and function within a sentence.

In [None]:
!pip install spacy
!python -m spacy download en_core_web_md

Collecting en-core-web-md==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
import spacy

# Load the English language model
nlp = spacy.load("en_core_web_md")

def derivational_morphology(token):
    # Using spaCy's lemma for the root
    root = token.lemma_
    suffix = ''
    morph_pos = token.pos_  # Get the POS tag from the token

    # Check for suffixes
    suffixes = {
        # Adjectives
        'able': 'adjective', 'ful': 'adjective', 'ic': 'adjective', 'less': 'adjective', 'ous': 'adjective',
        # Nouns
        'ance': 'noun', 'hood': 'noun', 'ity': 'noun', 'ment': 'noun', 'ness': 'noun', 'ship': 'noun', 'tion': 'noun','s':'noun',
        # Verbs
        'ate': 'verb', 'en': 'verb', 'ify': 'verb', 'ize': 'verb', 'ed': 'verb', 'd': 'verb', 'ied': 'verb','ing':'verb',
        # Adverbs
        'ly': 'adverb',
    }

    # Check if the token has a suffix
    for s in suffixes.keys():
        if token.text.endswith(s):
            suffix = s
            # Update the root based on the suffix
            root = token.text[:-len(s)]
            morph_pos = suffixes[suffix]  # Set the POS based on the identified suffix
            break

    return root, suffix, morph_pos

def semantic_role_labeling(token):
    if token.pos_ == "VERB":
        return 'action'
    elif token.pos_ in ["NOUN", "PROPN"]:
        return 'object'
    elif token.pos_ == "ADJ":
        return 'adjective'
    elif token.pos_ == "ADV":
        return 'adverb'
    elif token.pos_ in ["PRON", "DET"]:  # pronouns and determiners
        return 'subject'
    else:
        return 'unknown'

def analyze_sentence(sentence):
    doc = nlp(sentence)

    print("Derivational Morphology Analysis:")
    for token in doc:
        root, suffix, morph_pos = derivational_morphology(token)
        print(f"{token.text} -> Root: {root}, Suffix: {suffix}, Part of Speech: {morph_pos}")

    print("\nSemantic Role Labeling:")
    for token in doc:
        role = semantic_role_labeling(token)
        print(f"{token.text} -> Role: {role}")

# Sample Input
sentence = "The enthusiastic professor swiftly instructs the pupils. She loves educating."
analyze_sentence(sentence)


Derivational Morphology Analysis:
The -> Root: the, Suffix: , Part of Speech: DET
enthusiastic -> Root: enthusiast, Suffix: ic, Part of Speech: adjective
professor -> Root: professor, Suffix: , Part of Speech: NOUN
swiftly -> Root: swift, Suffix: ly, Part of Speech: adverb
instructs -> Root: instruct, Suffix: s, Part of Speech: noun
the -> Root: the, Suffix: , Part of Speech: DET
pupils -> Root: pupil, Suffix: s, Part of Speech: noun
. -> Root: ., Suffix: , Part of Speech: PUNCT
She -> Root: she, Suffix: , Part of Speech: PRON
loves -> Root: love, Suffix: s, Part of Speech: noun
educating -> Root: educat, Suffix: ing, Part of Speech: verb
. -> Root: ., Suffix: , Part of Speech: PUNCT

Semantic Role Labeling:
The -> Role: subject
enthusiastic -> Role: adjective
professor -> Role: object
swiftly -> Role: adverb
instructs -> Role: action
the -> Role: subject
pupils -> Role: object
. -> Role: unknown
She -> Role: subject
loves -> Role: action
educating -> Role: action
. -> Role: unknown
