# Importing Dataset

In [1]:
import numpy as np
import pandas as pd

In [2]:
New_Dataset = pd.read_csv('/home/rabi/Desktop/Project/NLP/Sentiment_Analysis_on_Restaurant_Reviews/Data/Fresh_Restaurant_Reviews.tsv', delimiter = '\t')

In [3]:
New_Dataset.head()

Unnamed: 0,Review
0,Spend your money elsewhere.
1,Their regular toasted bread was equally satisf...
2,The Buffet at Bellagio was far from what I ant...
3,"And the drinks are WEAK, people!"
4,-My order was not correct.


# Pre-Processing Data

In [4]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import pickle

corpus = []
# Determine the number of rows in New_dataset
num_rows = len(New_Dataset)

for i in range(num_rows):
    review = re.sub('[^a-zA-Z]', ' ', New_Dataset['Review'][i])
    review = review.lower()
    review = review.split()
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)


# Loading and Transforming Text Data Using CountVectorizer

In [5]:
# Creating the Bag of Words model using CountVectorizer
from sklearn.feature_extraction.text import CountVectorizer
import pickle
cvFile='/home/rabi/Desktop/Project/NLP/Sentiment_Analysis_on_Restaurant_Reviews/Sentiment_Analysis_Models/bow_dictionary.pkl'
cv = pickle.load(open(cvFile, "rb"))
X_fresh = cv.transform(corpus).toarray()
X_fresh.shape

(100, 1500)

# Loading Models

In [6]:
import joblib
classifier1 = joblib.load('/home/rabi/Desktop/Project/NLP/Sentiment_Analysis_on_Restaurant_Reviews/Sentiment_Analysis_Models/Multinomial_Naive_Bayes_Model')
classifier2 = joblib.load('/home/rabi/Desktop/Project/NLP/Sentiment_Analysis_on_Restaurant_Reviews/Sentiment_Analysis_Models/Bernoulli_Naive_Bayes_Model')
classifier3 = joblib.load('/home/rabi/Desktop/Project/NLP/Sentiment_Analysis_on_Restaurant_Reviews/Sentiment_Analysis_Models/Logistic_Regression_model')

# Predictions for Different Classifiers on Fresh Data

In [7]:
y_pred1 = classifier1.predict(X_fresh)
y_pred2 = classifier2.predict(X_fresh)
y_pred3 = classifier3.predict(X_fresh)
print(y_pred1)
print(y_pred2)
print(y_pred3)

[0 1 1 0 0 1 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 1 1 1 0 1 0 1 0 1 0 0 0 1 0 0 1 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
[0 1 1 0 1 1 0 1 0 1 0 0 0 1 0 0 0 0 0 1 0 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [8]:
New_Dataset['predicted_label'] = y_pred1.tolist()
New_Dataset.head()

Unnamed: 0,Review,predicted_label
0,Spend your money elsewhere.,0
1,Their regular toasted bread was equally satisf...,1
2,The Buffet at Bellagio was far from what I ant...,1
3,"And the drinks are WEAK, people!",0
4,-My order was not correct.,0


In [9]:
New_Dataset.to_csv("Sentiment_Analysis_Predicted_Values_by_Multinomial_Naive_Bayes_Model.tsv", sep='\t', encoding='UTF-8', index=False)

In [10]:
New_Dataset['predicted_label'] = y_pred2.tolist()
New_Dataset.head()

Unnamed: 0,Review,predicted_label
0,Spend your money elsewhere.,0
1,Their regular toasted bread was equally satisf...,1
2,The Buffet at Bellagio was far from what I ant...,1
3,"And the drinks are WEAK, people!",1
4,-My order was not correct.,0


In [11]:
New_Dataset.to_csv("Sentiment_Analysis_Predicted_Values_by_Bernoulli_Naive_Bayes_Model.tsv", sep='\t', encoding='UTF-8', index=False)

In [12]:
New_Dataset['predicted_label'] = y_pred3.tolist()
New_Dataset.head()

Unnamed: 0,Review,predicted_label
0,Spend your money elsewhere.,0
1,Their regular toasted bread was equally satisf...,1
2,The Buffet at Bellagio was far from what I ant...,1
3,"And the drinks are WEAK, people!",0
4,-My order was not correct.,1


In [13]:
New_Dataset.to_csv("Sentiment_Analysis_Predicted_Values_by_Logistic_Regression_Model.tsv", sep='\t', encoding='UTF-8', index=False)

# Conclusion

"Evaluating Sentiment Analysis Models through Predicted Values: Multinomial Naive Bayes, Bernoulli Naive Bayes, and Logistic Regression"

In this study, we applied three different machine learning algorithms—Multinomial Naive Bayes, Bernoulli Naive Bayes, and Logistic Regression—to predict sentiment in restaurant reviews. Having previously presented the accuracy, recall, and precision values for each model, we now shift our focus to comparing their predicted values.

The models generated predicted sentiment labels for the restaurant reviews, and these predictions were saved in separate CSV files. When analyzing these predicted values, we found that Multinomial Naive Bayes performed the best among the three models, consistently providing more accurate predictions.

In summary, Multinomial Naive Bayes was the most effective model for predicting sentiment in restaurant reviews, and its predicted values demonstrated superior performance compared to Bernoulli Naive Bayes and Logistic Regression.