In [14]:
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
from nltk import pos_tag

In [2]:
example_text = """16/09/2004 Set in Varanasi, Gopal, Aarti and Raghav have been best friends since school. As teenagers, Gopal has fallen in love with Aarti, often pushing to be more than friends. Aarti consistently rebuffs, expressing that she values their friendship and isn't ready for a relationship with anybody.

Gopal and Raghav are both studying to get into engineering colleges, but Gopal gets a low rank in the All India Engineering Entrance Exams, while Raghav is among the toppers. To help improve his rank, Gopal moves to Kota alone to join reputed coaching classes to resit the exam the following year. Raghav goes on to pass the IIT entrance exam as well, and starts attending Banaras Hindu University.

During Gopal's absence in Kota, Aarti and Raghav develop feelings for each other, and Aarti tells Gopal about her relationship when they chat online. Gopal is heartbroken and lashes out at her, causing them to lose touch briefly and Gopal to fall behind in his course. When they rekindle their friendship, Gopal begins to study hard again, but gets a low rank once more after sitting the AIEEE exam for the second time. Gopal returns home to Varanasi, where his father dies shortly after learning of his low rank. Meanwhile, Raghav and Aarti's relationship is flourishing, and Raghav has found his passion in activism and being on his university's newspaper committee.
"""

In [5]:
words = word_tokenize(example_text)

In [6]:
Lemmatizer = WordNetLemmatizer()

In [9]:
Lemmatized_list = [Lemmatizer.lemmatize(word) for word in words]

In [13]:
print("original - lemmatized word")
for index,(original,lemmatized) in enumerate(zip(words,Lemmatized_list)):
    print(f"{index + 1}: Original: {original} -> Lemmatized: {lemmatized}")

original - lemmatized word
1: Original: 16/09/2004 -> Lemmatized: 16/09/2004
2: Original: Set -> Lemmatized: Set
3: Original: in -> Lemmatized: in
4: Original: Varanasi -> Lemmatized: Varanasi
5: Original: , -> Lemmatized: ,
6: Original: Gopal -> Lemmatized: Gopal
7: Original: , -> Lemmatized: ,
8: Original: Aarti -> Lemmatized: Aarti
9: Original: and -> Lemmatized: and
10: Original: Raghav -> Lemmatized: Raghav
11: Original: have -> Lemmatized: have
12: Original: been -> Lemmatized: been
13: Original: best -> Lemmatized: best
14: Original: friends -> Lemmatized: friend
15: Original: since -> Lemmatized: since
16: Original: school -> Lemmatized: school
17: Original: . -> Lemmatized: .
18: Original: As -> Lemmatized: As
19: Original: teenagers -> Lemmatized: teenager
20: Original: , -> Lemmatized: ,
21: Original: Gopal -> Lemmatized: Gopal
22: Original: has -> Lemmatized: ha
23: Original: fallen -> Lemmatized: fallen
24: Original: in -> Lemmatized: in
25: Original: love -> Lemmatized: l

In [29]:
def lemmatize_with_all_pos(word):
    pos_tags = [wordnet.NOUN, wordnet.VERB, wordnet.ADJ, wordnet.ADV]
    lemmatized_words = {pos: Lemmatizer.lemmatize(word, pos=pos) for pos in pos_tags}
    return lemmatized_words

In [27]:
pos_names = {
    wordnet.NOUN: 'NOUN',
    wordnet.VERB: 'VERB',
    wordnet.ADJ: 'ADJECTIVE',
    wordnet.ADV: 'ADVERB'
}

In [30]:
lemmatized_pos_list = []
for word in words:
    lemmatizations = lemmatize_with_all_pos(word)
    lemmatized_pos_list.append((word, lemmatizations))

In [31]:
print("Original word and its lemmatizations with different POS tags:\n")
for index, (original, lemmatizations) in enumerate(lemmatized_pos_list):
    print(f"{index + 1}: Original: {original}")
    for pos, lemma in lemmatizations.items():
        print(f"  Lemmatized as {pos_names[pos]}: {lemma}")
    print()

Original word and its lemmatizations with different POS tags:

1: Original: 16/09/2004
  Lemmatized as NOUN: 16/09/2004
  Lemmatized as VERB: 16/09/2004
  Lemmatized as ADJECTIVE: 16/09/2004
  Lemmatized as ADVERB: 16/09/2004

2: Original: Set
  Lemmatized as NOUN: Set
  Lemmatized as VERB: Set
  Lemmatized as ADJECTIVE: Set
  Lemmatized as ADVERB: Set

3: Original: in
  Lemmatized as NOUN: in
  Lemmatized as VERB: in
  Lemmatized as ADJECTIVE: in
  Lemmatized as ADVERB: in

4: Original: Varanasi
  Lemmatized as NOUN: Varanasi
  Lemmatized as VERB: Varanasi
  Lemmatized as ADJECTIVE: Varanasi
  Lemmatized as ADVERB: Varanasi

5: Original: ,
  Lemmatized as NOUN: ,
  Lemmatized as VERB: ,
  Lemmatized as ADJECTIVE: ,
  Lemmatized as ADVERB: ,

6: Original: Gopal
  Lemmatized as NOUN: Gopal
  Lemmatized as VERB: Gopal
  Lemmatized as ADJECTIVE: Gopal
  Lemmatized as ADVERB: Gopal

7: Original: ,
  Lemmatized as NOUN: ,
  Lemmatized as VERB: ,
  Lemmatized as ADJECTIVE: ,
  Lemmatized as 