In [138]:
import pandas as pd
import numpy as np

In [139]:
from google.colab import drive
drive.mount('/drive')

Mounted at /drive


In [165]:
df_new = pd.read_csv('/drive/MyDrive/Sentiment_analysis/a2_RestaurantReviews_FreshDump.tsv',delimiter = '\t',quoting = 3)
df = pd.read_csv('/drive/MyDrive/Sentiment_analysis/a2_RestaurantReviews_FreshDump.tsv',delimiter = '\t',quoting = 3)

In [166]:
df.head()

Unnamed: 0,Review
0,Spend your money elsewhere.
1,Their regular toasted bread was equally satisf...
2,The Buffet at Bellagio was far from what I ant...
3,"And the drinks are WEAK, people!"
4,-My order was not correct.


# Data Cleaning

In [167]:
import re
import nltk

from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

ps = PorterStemmer()
nltk.download('stopwords')

all_stopword = stopwords.words('english')
all_stopword.remove('not')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [168]:
import string

punc = string.punctuation
def remove_punc(text):
  return text.translate(str.maketrans("","",punc))

In [169]:
df['Review']=df['Review'].apply(remove_punc)

In [170]:
df['Review'][0]

'Spend your money elsewhere'

In [171]:
## lower

df['Review']=df['Review'].str.lower()

In [172]:
### Tokenization

from nltk.tokenize import word_tokenize
nltk.download('punkt')


def word_token(text):
  return word_tokenize(text)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [173]:
## Tokenization Apply on review column

df['Review']= df['Review'].apply(word_token)

In [174]:
## Stemming

def stemming(text):
  return ' '.join([ps.stem(i) for i in text])

In [175]:
df['Review'].apply(stemming)

0                             spend your money elsewher
1     their regular toast bread wa equal satisfi wit...
2     the buffet at bellagio wa far from what i anticip
3                          and the drink are weak peopl
4                               my order wa not correct
                            ...                        
95    i think food should have flavor and textur and...
96                               appetit instantli gone
97        overal i wa not impress and would not go back
98    the whole experi wa underwhelm and i think wel...
99    then as if i hadnt wast enough of my life ther...
Name: Review, Length: 100, dtype: object

In [176]:
### bag_words or stop words 

def stop_word(text):
  new_text = []
  for word in text:
    if word in all_stopword:
      new_text.append('')
    else:
      new_text.append(word)
  x = new_text[:]
  new_text.clear()
  return ' '.join(x)


In [177]:
df['Review']=df['Review'].apply(stop_word)

In [178]:
df.head()

Unnamed: 0,Review
0,spend money elsewhere
1,regular toasted bread equally satisfying o...
2,buffet bellagio far anticipated
3,drinks weak people
4,order not correct


# Data Transformation

In [179]:
from sklearn.feature_extraction.text import CountVectorizer
import pickle

path = '/drive/MyDrive/Sentiment_analysis/c1_BoW_Sentiment_Model.pkl'
cv  = pickle.load(open(path,'rb'))

In [180]:
X_new = cv.transform(df['Review']).toarray()

In [181]:
## Predication

import joblib

classifier = joblib.load('/drive/MyDrive/Sentiment_analysis/c2_Classifier_Sentiment_Model')

In [182]:
pred = classifier.predict(X_new)

In [183]:
df_new['predication'] = pred.tolist()

In [185]:
df_new

Unnamed: 0,Review,predication
0,Spend your money elsewhere.,0
1,Their regular toasted bread was equally satisf...,1
2,The Buffet at Bellagio was far from what I ant...,1
3,"And the drinks are WEAK, people!",1
4,-My order was not correct.,0
...,...,...
95,I think food should have flavor and texture an...,0
96,Appetite instantly gone.,0
97,Overall I was not impressed and would not go b...,0
98,"The whole experience was underwhelming, and I ...",1


In [188]:
### Export File 

df_new.to_csv("/drive/MyDrive/Sentiment_analysis/c3_Predicted_Sentiments_Fresh_Dump.tsv", sep='\t', encoding='UTF-8', index=False)