<a href="https://colab.research.google.com/github/ChintalaDeepu/IRS/blob/main/morphology.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet, stopwords
import re

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [2]:
def stem_word(word):
    stemmer = PorterStemmer()
    return stemmer.stem(word)

In [3]:
def lemmatize_word(word):
    lemmatizer = WordNetLemmatizer()
    return lemmatizer.lemmatize(word)

In [5]:
def extract_prefix_suffix(word):
    prefixes = ["un", "re", "pre", "mis", "dis", "in", "im", "non", "over", "under"]
    suffixes = ["ing", "ed", "er", "ly", "s", "es", "ment", "ness", "ful", "able", "less"]

    found_prefix = next((p for p in prefixes if word.startswith(p)), None)
    found_suffix = next((s for s in suffixes if word.endswith(s)), None)

    root = word
    if found_prefix:
        root = word[len(found_prefix):]
    if found_suffix:
        root = root[:-len(found_suffix)]

    return found_prefix, root, found_suffix

In [6]:
def morphological_analysis(word):
    stem = stem_word(word)
    lemma = lemmatize_word(word)
    prefix, root, suffix = extract_prefix_suffix(word)

    return {
        "Original Word": word,
        "Stemmed Root": stem,
        "Lemmatized Root": lemma,
        "Prefix": prefix,
        "Extracted Root": root,
        "Suffix": suffix
    }

In [7]:
words = ["unhappiness", "replaying", "mistreated", "jumping", "kindness", "running"]
for word in words:
    print(morphological_analysis(word))

{'Original Word': 'unhappiness', 'Stemmed Root': 'unhappi', 'Lemmatized Root': 'unhappiness', 'Prefix': 'un', 'Extracted Root': 'happines', 'Suffix': 's'}
{'Original Word': 'replaying', 'Stemmed Root': 'replay', 'Lemmatized Root': 'replaying', 'Prefix': 're', 'Extracted Root': 'play', 'Suffix': 'ing'}
{'Original Word': 'mistreated', 'Stemmed Root': 'mistreat', 'Lemmatized Root': 'mistreated', 'Prefix': 'mis', 'Extracted Root': 'treat', 'Suffix': 'ed'}
{'Original Word': 'jumping', 'Stemmed Root': 'jump', 'Lemmatized Root': 'jumping', 'Prefix': None, 'Extracted Root': 'jump', 'Suffix': 'ing'}
{'Original Word': 'kindness', 'Stemmed Root': 'kind', 'Lemmatized Root': 'kindness', 'Prefix': None, 'Extracted Root': 'kindnes', 'Suffix': 's'}
{'Original Word': 'running', 'Stemmed Root': 'run', 'Lemmatized Root': 'running', 'Prefix': None, 'Extracted Root': 'runn', 'Suffix': 'ing'}
