# Natural Language Processing 

In [1]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Text Tokenization 

In [2]:
nltk.download('punkt')

# Read the TSV file
file_path = 'Restaurant_Reviews.tsv'
data = pd.read_csv(file_path, delimiter='\t', quoting=3)

# Tokenize a single review into words
first_review = data['Review'][0]
tokens_words = word_tokenize(first_review.lower())  # Tokenization and convert to lowercase
tokens_sentences = sent_tokenize(first_review)

print("Tokenized Words:")
print(tokens_words)
print("\nTokenized Sentences:")
print(tokens_sentences)

Tokenized Words:
['wow', '...', 'loved', 'this', 'place', '.']

Tokenized Sentences:
['Wow...', 'Loved this place.']


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/joelmendonsa/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Stopword Removal 

In [3]:
nltk.download('stopwords')

# Stopwords removal
stop_words = set(stopwords.words('english'))
filtered_tokens = [token for token in tokens_words if token not in stop_words]

print("\nText after Stopwords Removal:")
print(filtered_tokens)


Text after Stopwords Removal:
['wow', '...', 'loved', 'place', '.']


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/joelmendonsa/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Stemming 

In [4]:
# Stemming
ps = PorterStemmer()
stemmed_words = [ps.stem(token) for token in filtered_tokens]

print("\nStemmed Words:")
print(stemmed_words)


Stemmed Words:
['wow', '...', 'love', 'place', '.']


## Lemmatization 

In [5]:
nltk.download('wordnet')

# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(token) for token in filtered_tokens]

print("\nLemmatized Words:")
print(lemmatized_words)

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/joelmendonsa/nltk_data...



Lemmatized Words:
['wow', '...', 'loved', 'place', '.']


## Sentiment Analysis 

In [6]:
nltk.download('vader_lexicon')

# Sentiment Analysis
sia = SentimentIntensityAnalyzer()
sentiment_scores = sia.polarity_scores(first_review)

print("\nSentiment Analysis:")
print(sentiment_scores)

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/joelmendonsa/nltk_data...



Sentiment Analysis:
{'neg': 0.0, 'neu': 0.435, 'pos': 0.565, 'compound': 0.5994}


## Combined Code 

In [7]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('vader_lexicon')

# Read the TSV file
file_path = 'Restaurant_Reviews.tsv'
data = pd.read_csv(file_path, delimiter='\t', quoting=3)

# Tokenization
first_review = data['Review'][0]
tokens_words = word_tokenize(first_review.lower())
tokens_sentences = sent_tokenize(first_review)

# Stopwords removal
stop_words = set(stopwords.words('english'))
filtered_tokens = [token for token in tokens_words if token not in stop_words]

# Stemming
ps = PorterStemmer()
stemmed_words = [ps.stem(token) for token in filtered_tokens]

# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(token) for token in filtered_tokens]

# Sentiment Analysis
sia = SentimentIntensityAnalyzer()
sentiment_scores = sia.polarity_scores(first_review)

print("Tokenized Words:")
print(tokens_words)
print("\nTokenized Sentences:")
print(tokens_sentences)
print("\nText after Stopwords Removal:")
print(filtered_tokens)
print("\nStemmed Words:")
print(stemmed_words)
print("\nLemmatized Words:")
print(lemmatized_words)
print("\nSentiment Analysis:")
print(sentiment_scores)

Tokenized Words:
['wow', '...', 'loved', 'this', 'place', '.']

Tokenized Sentences:
['Wow...', 'Loved this place.']

Text after Stopwords Removal:
['wow', '...', 'loved', 'place', '.']

Stemmed Words:
['wow', '...', 'love', 'place', '.']

Lemmatized Words:
['wow', '...', 'loved', 'place', '.']

Sentiment Analysis:
{'neg': 0.0, 'neu': 0.435, 'pos': 0.565, 'compound': 0.5994}


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/joelmendonsa/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/joelmendonsa/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/joelmendonsa/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/joelmendonsa/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
