In [1]:
import pandas as pd

In [3]:
df=pd.read_csv('/content/Restaurant_Reviews.tsv',sep='\t')

In [4]:
df.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [5]:
# Data Preprocessing

In [6]:
# Lower case

In [7]:
df['Review']=df['Review'].str.lower()

In [8]:
df.head()

Unnamed: 0,Review,Liked
0,wow... loved this place.,1
1,crust is not good.,0
2,not tasty and the texture was just nasty.,0
3,stopped by during the late may bank holiday of...,1
4,the selection on the menu was great and so wer...,1


In [9]:
# Remove Punctuations

In [10]:
import string
string.punctuation
exclude=string.punctuation

In [11]:
exclude

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [13]:
def remove_punc(x):
  for i in exclude:
    x=x.replace(i,'')
  return x

In [14]:
# example
remove_punc('hey!! , how are you ??')

'hey  how are you '

In [16]:
df['Review']=df['Review'].apply(remove_punc)

In [17]:
df.head()

Unnamed: 0,Review,Liked
0,wow loved this place,1
1,crust is not good,0
2,not tasty and the texture was just nasty,0
3,stopped by during the late may bank holiday of...,1
4,the selection on the menu was great and so wer...,1


In [18]:
# Feature Extraction

In [19]:
# Apply TF-IDF vectorizer on preprocessed data

In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [22]:
tf = TfidfVectorizer()
X = tf.fit_transform(df['Review']).toarray()
X

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [23]:
X.shape

(1000, 2067)

In [24]:
Y=df['Liked']

In [25]:
Y.head()

Unnamed: 0,Liked
0,1
1,0
2,0
3,1
4,1


In [26]:
# train test split

In [27]:
from sklearn.model_selection import train_test_split

In [28]:
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2)

In [29]:
xtrain.shape

(800, 2067)

In [30]:
# Build Neural Network

In [31]:
from keras.models import Sequential
from keras.layers import Input,Dense,Dropout
from keras.callbacks import EarlyStopping

In [32]:
model = Sequential()

#Add an Input Layer
model.add(Input(shape=(xtrain.shape[1],)))

#create dense Layer1
model.add(Dense(units=128,activation='relu'))
#add dropout layers
model.add(Dropout(rate=0.5))

# add dense layer 2
model.add(Dense(units=64,activation='relu'))
#add dropout layers
model.add(Dropout(rate=0.4))

# add dense layer 3
model.add(Dense(units=1,activation='sigmoid'))

In [33]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [35]:
es=EarlyStopping(monitor='val_loss',patience=15)

In [36]:
hist = model.fit(xtrain,ytrain , validation_split=0.1, epochs=200 , callbacks=[es])

Epoch 1/200
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step - accuracy: 0.5031 - loss: 0.6941 - val_accuracy: 0.5875 - val_loss: 0.6903
Epoch 2/200
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.7104 - loss: 0.6801 - val_accuracy: 0.6875 - val_loss: 0.6793
Epoch 3/200
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.7950 - loss: 0.6540 - val_accuracy: 0.7625 - val_loss: 0.6418
Epoch 4/200
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.8839 - loss: 0.5727 - val_accuracy: 0.7750 - val_loss: 0.5477
Epoch 5/200
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.9125 - loss: 0.4080 - val_accuracy: 0.7875 - val_loss: 0.4470
Epoch 6/200
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.9442 - loss: 0.2427 - val_accuracy: 0.8250 - val_loss: 0.3872
Epoch 7/200
[1m23/23[0m [

In [37]:
model.evaluate(xtrain,ytrain)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9987 - loss: 0.0059


[0.05073140189051628, 0.987500011920929]

In [38]:
model.evaluate(xtest,ytest)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7877 - loss: 0.6224


[0.5998165607452393, 0.8199999928474426]

In [39]:
# predictions

In [40]:
yprob_train=model.predict(xtrain)
yprob_test=model.predict(xtest)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step


In [41]:
ypred_tr=[]
for i in yprob_train:
  if i>0.5:
    ypred_tr.append(0)
  else:
    ypred_tr.append(1)

In [42]:
ypred_tr[:5]

[1, 1, 1, 1, 1]

In [43]:
ypred_ts=[]
for i in yprob_test:
  if i>0.5:
    ypred_ts.append(0)
  else:
    ypred_ts.append(1)

In [44]:
ypred_ts[:5]

[1, 0, 1, 0, 1]

In [45]:
st=input('enter your review')
st1=st.lower()
strm=remove_punc(st1) # we have above a function forr removing
sttf=tf.transform([strm]).toarray()

pred=model.predict(sttf)
if pred>0.5:
  print('positive review')
else:
  print('negative review')

enter your reviewI hate it
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
negative review
