## Importing Dependencies:

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

dataset = pd.read_csv('Restaurant_Reviews.tsv', delimiter = '\t', quoting = 3)
dataset.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [2]:
len(dataset)

1000

In [3]:
dataset.isnull().sum()

Review    0
Liked     0
dtype: int64

In [4]:
# Cleaning the texts
import re
import nltk
nltk.download('stopwords')
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
corpus = []
for i in range(0,1000):
    review = re.sub('[^a-zA-Z]', ' ',dataset['Review'][i])
    review = review.lower()
    review = review.split()
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
review

'wast enough life pour salt wound draw time took bring check'

In [6]:
# Creating the Bag of Words model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1500)
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values


In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

## SVM

In [8]:
from sklearn.svm import SVC
classifier = SVC()
classifier.fit(X_train,y_train)



SVC()

In [9]:
y_pred = classifier.predict(X_test)

In [10]:
from sklearn import metrics
print(metrics.classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.66      0.93      0.77        97
           1       0.89      0.55      0.68       103

    accuracy                           0.73       200
   macro avg       0.78      0.74      0.73       200
weighted avg       0.78      0.73      0.73       200



In [11]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[90  7]
 [46 57]]


In [12]:
print(metrics.accuracy_score(y_test,y_pred))

0.735


## Naive Bayes: GaussianNB

In [13]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)


GaussianNB()

In [14]:
y_pred = classifier.predict(X_test)

In [15]:
from sklearn import metrics
print(metrics.classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.82      0.57      0.67        97
           1       0.68      0.88      0.77       103

    accuracy                           0.73       200
   macro avg       0.75      0.73      0.72       200
weighted avg       0.75      0.73      0.72       200



In [16]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[55 42]
 [12 91]]


In [17]:
print(metrics.accuracy_score(y_test,y_pred))

0.73


## KNeighborsClassifier

In [18]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, y_train)

KNeighborsClassifier()

In [19]:
y_pred = classifier.predict(X_test)

In [20]:
from sklearn import metrics
print(metrics.classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.56      0.72      0.63        97
           1       0.64      0.46      0.53       103

    accuracy                           0.58       200
   macro avg       0.60      0.59      0.58       200
weighted avg       0.60      0.58      0.58       200



In [21]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[70 27]
 [56 47]]


In [22]:
print(metrics.accuracy_score(y_test,y_pred))

0.585


## Naive Bayes: MultinomialNB

In [23]:
from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB()
classifier.fit(X_train, y_train)

MultinomialNB()

In [24]:
y_pred = classifier.predict(X_test)


In [25]:
from sklearn import metrics
print(metrics.classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.77      0.74      0.75        97
           1       0.76      0.79      0.78       103

    accuracy                           0.77       200
   macro avg       0.77      0.76      0.76       200
weighted avg       0.77      0.77      0.76       200



In [26]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[72 25]
 [22 81]]


In [27]:
print(metrics.accuracy_score(y_test,y_pred))

0.765


## LogisticRegression

In [28]:

from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(X_train,y_train)

LogisticRegression()

In [29]:
y_pred = classifier.predict(X_test)


In [30]:
from sklearn import metrics
print(metrics.classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.67      0.78      0.72        97
           1       0.76      0.64      0.69       103

    accuracy                           0.71       200
   macro avg       0.72      0.71      0.71       200
weighted avg       0.72      0.71      0.71       200



In [31]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[76 21]
 [37 66]]


In [32]:
print(metrics.accuracy_score(y_test,y_pred))

0.71


# All Final Result:

Logistic Regression = 71%
SVC = 73.5%
GaussianNB  = 73%
MultinomialNB = 76.5%
KNeighborsClassifier = 58.5%

# Thank You!