# Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [4]:
dataset = pd.read_csv('Restaurant_Reviews.tsv', delimiter = '\t')

In [5]:
dataset.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


# Data cleaning and preprocessing

In [6]:
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer

In [7]:
stemming=PorterStemmer()
lemma=WordNetLemmatizer()

In [8]:
final=[]
for i in range(len(dataset)):
    review=re.sub('[^a-zA-Z]',' ',dataset.Review[i])
    review=review.split()
    long_island=[stemming.stem(each) for each in review if each not in stopwords.words('english')]
    margarita=' '.join(long_island)
    final.append(margarita)


## Creating the Bag of Words 

In [12]:
print(final[:5])

['wow love place', 'crust good', 'not tasti textur nasti', 'stop late may bank holiday rick steve recommend love', 'the select menu great price']


In [13]:
from sklearn.feature_extraction.text import CountVectorizer
ccv=CountVectorizer(max_features=1500) # selecting the number of columns/features,coz some words may be present only 
                                       ## once or twice

In [14]:
X=ccv.fit_transform(final)

In [29]:
X.toarray()

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [30]:
Y=dataset[['Liked']]

In [31]:
Y.head()

Unnamed: 0,Liked
0,1
1,0
2,0
3,1
4,1


### Train Test Split

In [32]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.20, random_state = 0)

## Training the Naive Bayes model 

In [33]:
from sklearn.naive_bayes import MultinomialNB
mnb=MultinomialNB()
import warnings
warnings.filterwarnings("ignore")
mnb.fit(X_train, y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [34]:
mnb.score(X_test,y_test)

0.745

In [36]:
y_pred=mnb.predict(X_test)

In [37]:
from sklearn.metrics import confusion_matrix
confusionmatrix=confusion_matrix(y_test,y_pred)
confusionmatrix

array([[68, 29],
       [22, 81]], dtype=int64)