In [8]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk import ne_chunk

In [9]:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


True

In [10]:
text = """
Once upon a time, in a faraway kingdom, there lived a beautiful princess named Cinderella.
She had two stepsisters who were jealous of her beauty. Despite the hardships, Cinderella
remained kind-hearted and hopeful. One day, a grand ball was announced in the palace, and
Cinderella dreamt of attending it. With the help of her fairy godmother, she got her wish
and went to the ball. There, she met the charming prince, and they danced the night away.
But as the clock struck midnight, Cinderella had to flee, leaving behind her glass slipper.
The prince searched the kingdom to find the owner of the glass slipper. Eventually, he found
Cinderella, and they lived happily ever after.
"""

In [11]:
tokens = word_tokenize(text)

In [12]:

stemmer = PorterStemmer()
stemmed_tokens = [stemmer.stem(token) for token in tokens]

In [13]:
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]

In [14]:
stop_words = set(stopwords.words('english'))
filtered_tokens = [token for token in tokens if token.lower() not in stop_words]

In [15]:
pos_tags = nltk.pos_tag(tokens)

In [16]:
ner_tags = ne_chunk(pos_tags)

In [17]:
print("Tokens:", tokens)

Tokens: ['Once', 'upon', 'a', 'time', ',', 'in', 'a', 'faraway', 'kingdom', ',', 'there', 'lived', 'a', 'beautiful', 'princess', 'named', 'Cinderella', '.', 'She', 'had', 'two', 'stepsisters', 'who', 'were', 'jealous', 'of', 'her', 'beauty', '.', 'Despite', 'the', 'hardships', ',', 'Cinderella', 'remained', 'kind-hearted', 'and', 'hopeful', '.', 'One', 'day', ',', 'a', 'grand', 'ball', 'was', 'announced', 'in', 'the', 'palace', ',', 'and', 'Cinderella', 'dreamt', 'of', 'attending', 'it', '.', 'With', 'the', 'help', 'of', 'her', 'fairy', 'godmother', ',', 'she', 'got', 'her', 'wish', 'and', 'went', 'to', 'the', 'ball', '.', 'There', ',', 'she', 'met', 'the', 'charming', 'prince', ',', 'and', 'they', 'danced', 'the', 'night', 'away', '.', 'But', 'as', 'the', 'clock', 'struck', 'midnight', ',', 'Cinderella', 'had', 'to', 'flee', ',', 'leaving', 'behind', 'her', 'glass', 'slipper', '.', 'The', 'prince', 'searched', 'the', 'kingdom', 'to', 'find', 'the', 'owner', 'of', 'the', 'glass', 'slip

In [18]:

print("Stemmed Tokens:", stemmed_tokens)

Stemmed Tokens: ['onc', 'upon', 'a', 'time', ',', 'in', 'a', 'faraway', 'kingdom', ',', 'there', 'live', 'a', 'beauti', 'princess', 'name', 'cinderella', '.', 'she', 'had', 'two', 'stepsist', 'who', 'were', 'jealou', 'of', 'her', 'beauti', '.', 'despit', 'the', 'hardship', ',', 'cinderella', 'remain', 'kind-heart', 'and', 'hope', '.', 'one', 'day', ',', 'a', 'grand', 'ball', 'wa', 'announc', 'in', 'the', 'palac', ',', 'and', 'cinderella', 'dreamt', 'of', 'attend', 'it', '.', 'with', 'the', 'help', 'of', 'her', 'fairi', 'godmoth', ',', 'she', 'got', 'her', 'wish', 'and', 'went', 'to', 'the', 'ball', '.', 'there', ',', 'she', 'met', 'the', 'charm', 'princ', ',', 'and', 'they', 'danc', 'the', 'night', 'away', '.', 'but', 'as', 'the', 'clock', 'struck', 'midnight', ',', 'cinderella', 'had', 'to', 'flee', ',', 'leav', 'behind', 'her', 'glass', 'slipper', '.', 'the', 'princ', 'search', 'the', 'kingdom', 'to', 'find', 'the', 'owner', 'of', 'the', 'glass', 'slipper', '.', 'eventu', ',', 'he', 

In [19]:
print("Lemmatized Tokens:", lemmatized_tokens)

Lemmatized Tokens: ['Once', 'upon', 'a', 'time', ',', 'in', 'a', 'faraway', 'kingdom', ',', 'there', 'lived', 'a', 'beautiful', 'princess', 'named', 'Cinderella', '.', 'She', 'had', 'two', 'stepsister', 'who', 'were', 'jealous', 'of', 'her', 'beauty', '.', 'Despite', 'the', 'hardship', ',', 'Cinderella', 'remained', 'kind-hearted', 'and', 'hopeful', '.', 'One', 'day', ',', 'a', 'grand', 'ball', 'wa', 'announced', 'in', 'the', 'palace', ',', 'and', 'Cinderella', 'dreamt', 'of', 'attending', 'it', '.', 'With', 'the', 'help', 'of', 'her', 'fairy', 'godmother', ',', 'she', 'got', 'her', 'wish', 'and', 'went', 'to', 'the', 'ball', '.', 'There', ',', 'she', 'met', 'the', 'charming', 'prince', ',', 'and', 'they', 'danced', 'the', 'night', 'away', '.', 'But', 'a', 'the', 'clock', 'struck', 'midnight', ',', 'Cinderella', 'had', 'to', 'flee', ',', 'leaving', 'behind', 'her', 'glass', 'slipper', '.', 'The', 'prince', 'searched', 'the', 'kingdom', 'to', 'find', 'the', 'owner', 'of', 'the', 'glass'

In [20]:
print("Filtered Tokens :", filtered_tokens)

Filtered Tokens : ['upon', 'time', ',', 'faraway', 'kingdom', ',', 'lived', 'beautiful', 'princess', 'named', 'Cinderella', '.', 'two', 'stepsisters', 'jealous', 'beauty', '.', 'Despite', 'hardships', ',', 'Cinderella', 'remained', 'kind-hearted', 'hopeful', '.', 'One', 'day', ',', 'grand', 'ball', 'announced', 'palace', ',', 'Cinderella', 'dreamt', 'attending', '.', 'help', 'fairy', 'godmother', ',', 'got', 'wish', 'went', 'ball', '.', ',', 'met', 'charming', 'prince', ',', 'danced', 'night', 'away', '.', 'clock', 'struck', 'midnight', ',', 'Cinderella', 'flee', ',', 'leaving', 'behind', 'glass', 'slipper', '.', 'prince', 'searched', 'kingdom', 'find', 'owner', 'glass', 'slipper', '.', 'Eventually', ',', 'found', 'Cinderella', ',', 'lived', 'happily', 'ever', '.']


In [21]:
print("Part-of-Speech Tags:", pos_tags)

Part-of-Speech Tags: [('Once', 'RB'), ('upon', 'IN'), ('a', 'DT'), ('time', 'NN'), (',', ','), ('in', 'IN'), ('a', 'DT'), ('faraway', 'NN'), ('kingdom', 'NN'), (',', ','), ('there', 'EX'), ('lived', 'VBD'), ('a', 'DT'), ('beautiful', 'JJ'), ('princess', 'NN'), ('named', 'VBN'), ('Cinderella', 'NNP'), ('.', '.'), ('She', 'PRP'), ('had', 'VBD'), ('two', 'CD'), ('stepsisters', 'NNS'), ('who', 'WP'), ('were', 'VBD'), ('jealous', 'JJ'), ('of', 'IN'), ('her', 'PRP$'), ('beauty', 'NN'), ('.', '.'), ('Despite', 'IN'), ('the', 'DT'), ('hardships', 'NNS'), (',', ','), ('Cinderella', 'NNP'), ('remained', 'VBD'), ('kind-hearted', 'JJ'), ('and', 'CC'), ('hopeful', 'JJ'), ('.', '.'), ('One', 'CD'), ('day', 'NN'), (',', ','), ('a', 'DT'), ('grand', 'JJ'), ('ball', 'NN'), ('was', 'VBD'), ('announced', 'VBN'), ('in', 'IN'), ('the', 'DT'), ('palace', 'NN'), (',', ','), ('and', 'CC'), ('Cinderella', 'NNP'), ('dreamt', 'NN'), ('of', 'IN'), ('attending', 'VBG'), ('it', 'PRP'), ('.', '.'), ('With', 'IN'), (

In [22]:

ner_tags = ne_chunk(pos_tags)
print("Named Entities:")
for chunk in ner_tags:
    if hasattr(chunk, 'label'):
        print(chunk.label(), ' '.join(c[0] for c in chunk))

Named Entities:
PERSON Cinderella
PERSON Cinderella
ORGANIZATION Cinderella
PERSON Cinderella
PERSON Cinderella
