# Morphological Parser to accept/reject given string.

In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import words
from nltk.metrics.distance import edit_distance

In [18]:
class MorphologicalParser:
    def __init__(self):
        self.accepted_suffixes = ["ing", "ed", "s", "ies"]
        self.accepted_prefixes = ["un", "re"]
        self.lexicon = set(words.words())
        self.lemmatizer = WordNetLemmatizer()
   
    def _is_spelled_correctly(self, word):
            #Check if the word is spelled correctly
            return word.lower() in self.lexicon

    def _apply_capitalization(self, lemmatized_word, original_word):
            # Apply capitalization based on the original word
            if original_word.islower():
                return lemmatized_word.lower()
            elif original_word.isupper():
                return lemmatized_word.upper()
            elif original_word.istitle():
                return lemmatized_word.capitalize()
            else:
                return lemmatized_word          
    
    def parse_words(self, word):
        prefix = ""
        suffix = ""
        tokens = word_tokenize(word)

        # Check if the word has an accepted prefix
        for p in self.accepted_prefixes:
            if tokens[0].startswith(p):
                prefix = p
                tokens[0] = tokens[0][len(p):]
                break
                
        # Check if the word has an accepted suffix
        for s in self.accepted_suffixes:
            if tokens[-1].endswith(s):
                suffix = s
                tokens[-1] = tokens[-1][:-len(s)]
                break

        # Lemmatize the remaining words
        lemmatized_word = self.lemmatizer.lemmatize(tokens[-1])

        # Check if the lemmatized word is in the lexicon
        if lemmatized_word.lower() in self.lexicon:
            # Apply spelling rules
            if self._is_spelled_correctly(lemmatized_word):
                # Apply Orthographic rules
                lemmatizef_word = self._apply_capitalization(lemmatized_word, word)

                # Check if the remaining word is acceptable
                if len(lemmatized_word) > 1:
                    return f"Accepted: ({prefix}-{lemmatized_word}-{suffix})"
        return "Rejected"

             
            

In [19]:
if __name__ == "__main__":
    nltk.download("punkt")
    nltk.download("wordnet")
    nltk.download("words")
    parser = MorphologicalParser()

    # Test some examples
    words_to_test = ["running", "unhappy", "restarted", "JUMPED", "sit", "ed", "unseen", "ladys"]
    for word in words_to_test:
        result = parser.parse_words(word)
        print(f"{word}:{result}")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


running:Rejected
unhappy:Accepted: (un-happy-)
restarted:Accepted: (re-start-ed)
JUMPED:Rejected
sit:Accepted: (-sit-)
ed:Rejected
unseen:Accepted: (un-seen-)
ladys:Accepted: (-lady-s)
