In [1]:
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

In [2]:
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to Noun

def analyze_word(word):
    lemmatizer = WordNetLemmatizer()
    tokenized_word = word_tokenize(word)
    pos_tags = pos_tag(tokenized_word)
    analyzed_word = {
        'Word': word,
        'RootWord': lemmatizer.lemmatize(word, get_wordnet_pos(pos_tags[0][1])),
        'Singular': lemmatizer.lemmatize(word, wordnet.NOUN),
        'Plural': lemmatizer.lemmatize(word, wordnet.NOUN) + 's',
        'Tense': lemmatizer.lemmatize(word, wordnet.VERB) + 'ed',
        'POS': pos_tags[0][1]
    }
    return analyzed_word

input_text = "the cats danced gracefully under the starry night"

words = word_tokenize(input_text)
analysis_results = []

In [3]:
for word in words:
    analysis_results.append(analyze_word(word))

In [4]:
df = pd.DataFrame(analysis_results)
df

Unnamed: 0,Word,RootWord,Singular,Plural,Tense,POS
0,the,the,the,thes,theed,DT
1,cats,cat,cat,cats,cated,NNS
2,danced,dance,danced,danceds,danceed,VBN
3,gracefully,gracefully,gracefully,gracefullys,gracefullyed,RB
4,under,under,under,unders,undered,IN
5,the,the,the,thes,theed,DT
6,starry,starry,starry,starrys,starryed,NN
7,night,night,night,nights,nighted,NN


In [5]:
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

def analyze_word(word):
    lemmatizer = WordNetLemmatizer()
    tokenized_word = word_tokenize(word)
    pos_tags = pos_tag(tokenized_word)

    if pos_tags[0][1].startswith('J'):
        comparative = lemmatizer.lemmatize(word, wordnet.ADJ) + 'er'
        superlative = lemmatizer.lemmatize(word, wordnet.ADJ) + 'est'
    elif pos_tags[0][1].startswith('R'):
        comparative = lemmatizer.lemmatize(word, wordnet.ADV) + 'er'
        superlative = lemmatizer.lemmatize(word, wordnet.ADV) + 'est'
    else:
        comparative = None
        superlative = None
        
    analyzed_word = {
        'Word': word,
        'RootWord': lemmatizer.lemmatize(word, get_wordnet_pos(pos_tags[0][1])),
        'Singular': lemmatizer.lemmatize(word, wordnet.NOUN),
        'Plural': lemmatizer.lemmatize(word, wordnet.NOUN) + 's',
        'Tense': lemmatizer.lemmatize(word, wordnet.VERB) + 'ed',
        'Comparative': comparative,
        'Superlative': superlative,
        'POS': pos_tags[0][1]
    }
    return analyzed_word

input_text = "the stars twinkled brightly in the dark sky"

words = word_tokenize(input_text)
analysis_results = []

In [6]:
for word in words:
    analysis_results.append(analyze_word(word))

In [7]:
df = pd.DataFrame(analysis_results)
df

Unnamed: 0,Word,RootWord,Singular,Plural,Tense,Comparative,Superlative,POS
0,the,the,the,thes,theed,,,DT
1,stars,star,star,stars,stared,,,NNS
2,twinkled,twinkle,twinkled,twinkleds,twinkleed,,,VBN
3,brightly,brightly,brightly,brightlys,brightlyed,brightlyer,brightlyest,RB
4,in,in,in,ins,ined,,,IN
5,the,the,the,thes,theed,,,DT
6,dark,dark,dark,darks,darked,,,NN
7,sky,sky,sky,skys,skyed,,,NN
