In [1]:
# Importing libraries
import numpy as np
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Importing the dataset
dataset = pd.read_csv('./drive/My Drive/a2_RestaurantReviews_FreshDump.tsv', delimiter = '\t', quoting = 3)
dataset.head()

Unnamed: 0,Review
0,Spend your money elsewhere.
1,Their regular toasted bread was equally satisf...
2,The Buffet at Bellagio was far from what I ant...
3,"And the drinks are WEAK, people!"
4,-My order was not correct.


In [4]:
# Importing python's regular expression and natural language toolkit
import re
import nltk

# downloading stopwords file
nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

all_stopwords = stopwords.words('english')
all_stopwords.remove('not')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [6]:
# corpus declared for storing data after data cleaning
corpus=[]

for i in range(0, 100):
  # removing all special characters and numbers
  review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
  # changes reviews into lower case
  review = review.lower()
  # splitting the sentence into words
  review = review.split()
  # removing all stopwords and stemming/lemmatizing the remaining words
  review = [ps.stem(word) for word in review if not word in set(all_stopwords)]
  review = ' '.join(review)
  corpus.append(review)

In [7]:
corpus

['spend money elsewher',
 'regular toast bread equal satisfi occasion pat butter mmmm',
 'buffet bellagio far anticip',
 'drink weak peopl',
 'order not correct',
 'also feel like chip bought not made hous',
 'disappoint dinner went elsewher dessert',
 'chip sal amaz',
 'return',
 'new fav vega buffet spot',
 'serious cannot believ owner mani unexperienc employe run around like chicken head cut',
 'sad',
 'felt insult disrespect could talk judg anoth human like',
 'call steakhous properli cook steak understand',
 'not impress concept food',
 'thing crazi guacamol like pur ed',
 'realli noth postino hope experi better',
 'got food poison buffet',
 'brought fresh batch fri think yay someth warm',
 'hilari yummi christma eve dinner rememb biggest fail entir trip us',
 'needless say go back anytim soon',
 'place disgust',
 'everi time eat see care teamwork profession degre',
 'ri style calamari joke',
 'howev much garlic fondu bare edibl',
 'could bare stomach meal complain busi lunch',
 '

In [8]:
# Loading BoW dictionary we saved before
from sklearn.feature_extraction.text import CountVectorizer
import pickle
cvFile='./drive/MyDrive/c1_BoW_Sentiment_Model.pkl'
cv = pickle.load(open(cvFile, "rb"))


In [9]:
X_fresh = cv.transform(corpus).toarray()
X_fresh.shape

(100, 1420)

In [11]:
# Importing NB Classifier we saved for later use in sentiment prediction
import joblib
classifier = joblib.load('./drive/MyDrive/c2_Classifier_Sentiment_Model')

In [12]:
# Sentiment prediction for fresh reviews
Y_pred = classifier.predict(X_fresh)
print(Y_pred)

[0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 0
 0 1 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0]


In [13]:
dataset['predicted_label'] = Y_pred.tolist()
dataset.head()

Unnamed: 0,Review,predicted_label
0,Spend your money elsewhere.,0
1,Their regular toasted bread was equally satisf...,1
2,The Buffet at Bellagio was far from what I ant...,1
3,"And the drinks are WEAK, people!",0
4,-My order was not correct.,0


In [14]:
dataset.to_csv("./drive/MyDrive/c3_Predicted_Sentiments_Fresh_Dump.tsv", sep='\t', encoding='UTF-8', index=False)