#Sentiment Analysis of Restaurant Reviews

In [41]:
import pandas as pd
import numpy as np

In [42]:
#importing Datset
df = pd.read_csv("/content/Restaurant_Reviews.tsv",delimiter = '\t')

####Data Analysis

In [43]:
df.shape

(1000, 2)

In [44]:
df.columns

Index(['Review', 'Liked'], dtype='object')

In [45]:
df.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [46]:
df.info

<bound method DataFrame.info of                                                 Review  Liked
0                             Wow... Loved this place.      1
1                                   Crust is not good.      0
2            Not tasty and the texture was just nasty.      0
3    Stopped by during the late May bank holiday of...      1
4    The selection on the menu was great and so wer...      1
..                                                 ...    ...
995  I think food should have flavor and texture an...      0
996                           Appetite instantly gone.      0
997  Overall I was not impressed and would not go b...      0
998  The whole experience was underwhelming, and I ...      0
999  Then, as if I hadn't wasted enough of my life ...      0

[1000 rows x 2 columns]>

###Data Processing

In [47]:
import nltk
import re
nltk.download("stopwords")
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [48]:
#DATA PREPROCESSING
corpus = []
for i in range(0,1000):
  #removing any character other than letters
  review = re.sub(pattern = "[^a-zA-Z]",repl = ' ', string = df['Review'][i])
  #converting to lower case
  review = review.lower()
  review_words = review.split()
  #removig stop words
  review_words = [word for word in review_words if not word in set(stopwords.words('english'))]
  #stemming the words
  ps = PorterStemmer()
  review = [ps.stem(word) for word in review_words]
  review = ' '.join(review)
  corpus.append(review)

In [49]:
corpus[0:10]

['wow love place',
 'crust good',
 'tasti textur nasti',
 'stop late may bank holiday rick steve recommend love',
 'select menu great price',
 'get angri want damn pho',
 'honeslti tast fresh',
 'potato like rubber could tell made ahead time kept warmer',
 'fri great',
 'great touch']

###Obtaining Vectors for Each Sentence

In [50]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1600)
x = cv.fit_transform(corpus).toarray()
y = df.iloc[:,1].values

####Spliting the dataset

In [51]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test  = train_test_split(x,y,test_size = 0.20,random_state = 0)


In [52]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((800, 1565), (200, 1565), (800,), (200,))

###Model Training



*   I am using GradientBoostClassifier because it is producing higher accuracy



In [53]:
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier(learning_rate=0.1,max_features = 11,subsample = 1,n_estimators = 200)
gbc = gbc.fit(x_train,y_train)



*   Predicting the test dataset




In [54]:
yhat = gbc.predict(x_test)



*   Obtaining Accuracy






In [55]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score


score1 = accuracy_score(y_test,yhat)
score2 = precision_score(y_test,yhat)
print("Accuracy Score is : {}%".format(score1*100))
print("precision score is: {}%".format(score2*100))

Accuracy Score is : 75.0%
precision score is: 84.4155844155844%


##HyperParameter tuning of the GradientBoostClassifier


*   I am tuning only 'max_parameters' parameters in the GradientBoostClassifier




In [69]:
best_accuracy = 0
max_features = 0
for i in range(10,25,1):
  temp_classifier =  GradientBoostingClassifier(learning_rate=0.1,max_features = i,subsample = 1,n_estimators = 200)
  temp_classifier.fit(x_train,y_train)
  temp_y_predict = temp_classifier.predict(x_test)
  score = accuracy_score(y_test,temp_y_predict)
  print("Accuracy Score for max_features: {} is  {}%".format(i,score*100))
  if score > best_accuracy:
    best_accuracy = score
    max_features = i
print("******************************************")
print("the best accuracy is {}% with max_feature value: {}".format(best_accuracy*100,max_features))


Accuracy Score for max_features: 10 is  79.5%
Accuracy Score for max_features: 11 is  76.0%
Accuracy Score for max_features: 12 is  77.0%
Accuracy Score for max_features: 13 is  76.0%
Accuracy Score for max_features: 14 is  75.0%
Accuracy Score for max_features: 15 is  74.5%
Accuracy Score for max_features: 16 is  75.0%
Accuracy Score for max_features: 17 is  76.0%
Accuracy Score for max_features: 18 is  77.5%
Accuracy Score for max_features: 19 is  77.5%
Accuracy Score for max_features: 20 is  77.5%
Accuracy Score for max_features: 21 is  77.5%
Accuracy Score for max_features: 22 is  77.5%
Accuracy Score for max_features: 23 is  76.0%
Accuracy Score for max_features: 24 is  76.0%
******************************************
the best accuracy is 79.5% with max_feature value: 10


In [57]:
final_classifier =  GradientBoostingClassifier(learning_rate=0.1,max_features = 10,subsample = 1,n_estimators = 200)
final_classifier.fit(x_train,y_train)

##Predicting

In [58]:
def get_sentiment(test_review):
   test_review = re.sub(pattern = "[^a-zA-Z]",repl = ' ', string = test_review)
   test_review = test_review.lower()
   test_review_words = test_review.split()
   test_review_words = [word for word in test_review_words if not word in set(stopwords.words('english'))]
   ps = PorterStemmer()
   final_review = [ps.stem(word) for word in test_review_words]
   final_review = ' '.join(final_review)

   temp = cv.transform([final_review]).toarray()
   return final_classifier.predict(temp)

###Predicting using sample reviews

In [59]:
sample_review = "The food is very bad and worst"
if get_sentiment(sample_review):
  print("This is a Positive Review")
else:
  print("This is a Negative review")

This is a Negative review


In [60]:
sample_review = "The food was really amazing and delicious"
if get_sentiment(sample_review):
  print("This is a Positive Review")
else:
  print("This is a Negative review")

This is a Positive Review


In [61]:
sample_review = "The Service quality is very poor in this restaurent"
if get_sentiment(sample_review):
  print("This is a Positive Review")
else:
  print("This is a Negative review")

This is a Negative review


In [64]:
sample_review = "This food was not good but bad"
if get_sentiment(sample_review):
  print("This is a Positive Review")
else:
  print("This is a Negative review")

This is a Negative review
