# Importing libraries

In [1]:
import numpy as np
import pandas as pd

# Importing dataset

In [2]:
dataset = pd.read_csv('./drive/MyDrive/Restaurant_Review_System/a1_RestaurantReviews_HistoricDump.tsv', delimiter = '\t', quoting = 3)

In [3]:
dataset.shape

(900, 2)

In [4]:
dataset.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


# Dataset cleaning

In [5]:
import re
import nltk

nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

all_stopwords = stopwords.words('english')
all_stopwords.remove('not')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [6]:
corpus=[]

for i in range(0, 900):
  review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
  review = review.lower()
  review = review.split()
  review = [ps.stem(word) for word in review if not word in set(all_stopwords)]
  review = ' '.join(review)
  corpus.append(review)

In [None]:
corpus

# Data transformation

In [8]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1420)

In [9]:
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, -1].values

In [11]:
# Saving BoW dictionary to later use in prediction
import pickle
bow_path = './drive/MyDrive/Restaurant_Review_System/c1_BoW_Sentiment_Model.pkl'
pickle.dump(cv, open(bow_path, "wb"))

# Dividing dataset into training and testing

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

# Model fitting (Naive Bayes)

In [13]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [15]:
# Exporting NB Classifier to later use in prediction
import joblib
joblib.dump(classifier, './drive/MyDrive/Restaurant_Review_System/c2_Classifier_Sentiment_Model') 

['./drive/MyDrive/Restaurant_Review_System/c2_Classifier_Sentiment_Model']

# Model performance

In [16]:
y_pred = classifier.predict(X_test)

from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)

accuracy_score(y_test, y_pred)

[[67 11]
 [38 64]]


0.7277777777777777