# Importing Dataset

In [1]:
import numpy as np
import pandas as pd

In [2]:
Dataset = pd.read_csv('/home/rabi/Desktop/Project/NLP/Sentiment_Analysis_on_Restaurant_Reviews/Data/Restaurant_Reviews.tsv', delimiter = '\t')

In [3]:
Dataset.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [4]:
Dataset.shape

(1000, 2)

# Pre-Processing Data

In [5]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import pickle
corpus = []
for i in range(0, 1000):
    review = re.sub('[^a-zA-Z]', ' ', Dataset['Review'][i])
    review = review.lower()
    review = review.split()
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)

# Vectorization

In [6]:
# Creating the Bag of Words model using CountVectorizer
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1500)
X = cv.fit_transform(corpus).toarray()
y = Dataset.iloc[:, 1].values

In [7]:
# Save the BoW dictionary
bow_path = 'bow_dictionary.pkl'
pickle.dump(cv, open(bow_path, "wb"))

# Training & Classification

In [8]:
# splitting the dataset into the Training set & Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=40)

# Multinomial Naive Bayes

In [9]:
# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB(alpha=0.1)
classifier.fit(X_train, y_train)

# Exporting the Multinomial_Naive_Bayes_Model for prediction
import joblib
joblib.dump(classifier, 'Multinomial_Naive_Bayes_Model') 

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

score1 = accuracy_score(y_test, y_pred)
score2 = precision_score(y_test, y_pred)
score3 = recall_score(y_test, y_pred)

print('*'*20)
print("The Accuracy of the model is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))

Confusion Matrix:
 [[133  40]
 [ 23 104]]
********************
The Accuracy of the model is  79.0 %
Precision is  0.72
Recall is  0.82


# Bernoulli Naive Bayes

In [10]:
# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import BernoulliNB
classifier = BernoulliNB(alpha=0.8)
classifier.fit(X_train, y_train)

# Exporting the Multinomial_Naive_Bayes_Model for prediction
import joblib
joblib.dump(classifier, 'Bernoulli_Naive_Bayes_Model') 

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print ("Confusion Matrix:\n",cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred)
score3= recall_score(y_test,y_pred)
print('*'*20)
print("The Accuracy of the model is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))

Confusion Matrix:
 [[116  57]
 [ 19 108]]
********************
The Accuracy of the model is  74.67 %
Precision is  0.65
Recall is  0.85


# Logistic Regression

In [11]:
# Fitting Logistic Regression to the Training set
from sklearn import linear_model
classifier = linear_model.LogisticRegression(C=1.5)
classifier.fit(X_train, y_train)

# Exporting the Multinomial_Naive_Bayes_Model for prediction
import joblib
joblib.dump(classifier, 'Logistic_Regression_model') 

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print ("Confusion Matrix:\n",cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred)
score3= recall_score(y_test,y_pred)
print('*'*20)
print("The Accuracy of the model is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))

Confusion Matrix:
 [[131  42]
 [ 24 103]]
********************
The Accuracy of the model is  78.0 %
Precision is  0.71
Recall is  0.81


# Conclusion

This study explores the application of machine learning techniques to categorize sentiment in restaurant reviews. Three algorithms, Multinomial Naive Bayes, Bernoulli Naive Bayes & Logistic Regression were utilized for this purpose.

The evaluation criteria employed encompass accuracy, precision, and recall.

Under the implementation of Multinomial Naive Bayes:

- The prediction accuracy stands at 79.0%.
- The precision of predictions is 0.72.
- The recall of predictions is 0.82.

In the case of Bernoulli Naive Bayes:

- The prediction accuracy is 74.67%.
- The precision of predictions is 0.65.
- The recall of predictions is 0.85.

Additionally, when employing Logistic Regression:

- The prediction accuracy reaches 78.0%.
- The precision of predictions is 0.71.
- The recall of predictions is 0.81.

Based on the above outcomes, it is evident that Multinomial Naive Bayes slightly outperforms Bernoulli Naive Bayes and Logistic Regression, achieving an accuracy of 79.0%. This implies that the model developed for sentiment prediction in restaurant reviews accurately predicts sentiment in 79.0% of cases.