<a href="https://colab.research.google.com/github/Gousepasha789/Information-Retrieval-System/blob/main/Perform_Morphological_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
import re

# Download necessary NLTK resources before initializing the lemmatizer
nltk.download('punkt')
nltk.download('wordnet')

def get_prefix_suffix(word):
    # Simple regex-based approach for finding common prefixes and suffixes
    prefixes = ["un", "re", "in", "im", "dis", "non", "pre", "mis", "over", "under"]
    suffixes = ["ing", "ed", "ly", "er", "or", "ion", "able", "ible", "ment", "ness", "ful", "less"]

    prefix = next((pre for pre in prefixes if word.startswith(pre)), "")
    suffix = next((suf for suf in suffixes if word.endswith(suf)), "")

    root = word
    if prefix:
        root = root[len(prefix):]
    if suffix:
        root = root[:-len(suffix)]

    return prefix, root, suffix

# Initialize NLTK tools
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

# Sample words to analyze
words = ["running", "unhappiness", "predetermined", "disconnection", "overloading"]

print("Word Morphological Analysis")
print("="*40)
for word in words:
    prefix, root, suffix = get_prefix_suffix(word)
    stemmed = stemmer.stem(word)
    lemmatized = lemmatizer.lemmatize(word)

    print(f"Word: {word}")
    print(f"  Prefix: {prefix}")
    print(f"  Root: {root}")
    print(f"  Suffix: {suffix}")
    print(f"  Stemmed Form: {stemmed}")
    print(f"  Lemmatized Form: {lemmatized}")
    print("-"*40)

Word Morphological Analysis
Word: running
  Prefix: 
  Root: runn
  Suffix: ing
  Stemmed Form: run
  Lemmatized Form: running
----------------------------------------
Word: unhappiness
  Prefix: un
  Root: happi
  Suffix: ness
  Stemmed Form: unhappi
  Lemmatized Form: unhappiness
----------------------------------------
Word: predetermined
  Prefix: pre
  Root: determin
  Suffix: ed
  Stemmed Form: predetermin
  Lemmatized Form: predetermined
----------------------------------------
Word: disconnection
  Prefix: dis
  Root: connect
  Suffix: ion
  Stemmed Form: disconnect
  Lemmatized Form: disconnection
----------------------------------------
Word: overloading
  Prefix: over
  Root: load
  Suffix: ing
  Stemmed Form: overload
  Lemmatized Form: overloading
----------------------------------------


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
