## **Lab 12 - Tasks**


Activity 1: Lemmatization using NLTK


In [5]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet

# Download required NLTK data
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')

# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

# Lemmatize a sentence
sentence = "The children are playing with the toys in the gardens."
tokens = nltk.word_tokenize(sentence)
lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]

print("Original Sentence:", sentence)
print("Lemmatized Tokens:", lemmatized_tokens)

Original Sentence: The children are playing with the toys in the gardens.
Lemmatized Tokens: ['The', 'child', 'are', 'playing', 'with', 'the', 'toy', 'in', 'the', 'garden', '.']


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\nabee\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\nabee\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\nabee\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Activity 2: Stemming using NLTK


In [6]:
import nltk
from nltk.stem import PorterStemmer

# Initialize the stemmer
stemmer = PorterStemmer()

# Stem a sentence
sentence = "The children are playing with the toys in the gardens."
tokens = nltk.word_tokenize(sentence)
stemmed_tokens = [stemmer.stem(token) for token in tokens]

print("Original Sentence:", sentence)
print("Stemmed Tokens:", stemmed_tokens)

Original Sentence: The children are playing with the toys in the gardens.
Stemmed Tokens: ['the', 'children', 'are', 'play', 'with', 'the', 'toy', 'in', 'the', 'garden', '.']


Activity 3: POS Tagging using NLTK


In [7]:
import nltk

# Download required NLTK data
nltk.download('averaged_perceptron_tagger')

# POS Tagging
sentence = "The children are playing with the toys in the gardens."
tokens = nltk.word_tokenize(sentence)
pos_tags = nltk.pos_tag(tokens)

print("Original Sentence:", sentence)
print("POS Tags:", pos_tags)

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\nabee\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Original Sentence: The children are playing with the toys in the gardens.
POS Tags: [('The', 'DT'), ('children', 'NNS'), ('are', 'VBP'), ('playing', 'VBG'), ('with', 'IN'), ('the', 'DT'), ('toys', 'NNS'), ('in', 'IN'), ('the', 'DT'), ('gardens', 'NNS'), ('.', '.')]


Activity 4: Removing Stop Words using NLTK


In [8]:
import nltk
from nltk.corpus import stopwords

# Download required NLTK data
nltk.download('stopwords')

# Remove stop words from a sentence
stop_words = set(stopwords.words('english'))
sentence = "The children are playing with the toys in the gardens."
tokens = nltk.word_tokenize(sentence)
filtered_tokens = [token for token in tokens if token.lower()
                   not in stop_words]

print("Original Sentence:", sentence)
print("Filtered Tokens:", filtered_tokens)

Original Sentence: The children are playing with the toys in the gardens.
Filtered Tokens: ['children', 'playing', 'toys', 'gardens', '.']


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\nabee\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Activity 5: Named Entity Recognition using NLTK


In [9]:
import nltk

# Download required NLTK data
nltk.download('maxent_ne_chunker')
nltk.download('words')

# Named Entity Recognition
sentence = "Barack Obama was born in Hawaii."
tokens = nltk.word_tokenize(sentence)
pos_tags = nltk.pos_tag(tokens)
named_entities = nltk.ne_chunk(pos_tags)

print("Original Sentence:", sentence)
print("Named Entities:", named_entities)

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\nabee\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\nabee\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


Original Sentence: Barack Obama was born in Hawaii.
Named Entities: (S
  (PERSON Barack/NNP)
  (PERSON Obama/NNP)
  was/VBD
  born/VBN
  in/IN
  (GPE Hawaii/NNP)
  ./.)
