In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from sklearn.tree import DecisionTreeClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Embedding,SimpleRNN,LSTM,GRU,Bidirectional

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('/content/drive/MyDrive/NLP(Classes)/alexa_reviews.tsv',sep='\t')

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,verified_reviews,feedback
0,0,Love my Echo!,1
1,1,Loved it!,1
2,2,"Sometimes while playing a game, you can answer...",1
3,3,I have had a lot of fun with this thing. My 4 ...,1
4,4,Music,1


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3150 entries, 0 to 3149
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Unnamed: 0        3150 non-null   int64 
 1   verified_reviews  3150 non-null   object
 2   feedback          3150 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 74.0+ KB


In [None]:
df['feedback'].value_counts()

1    2893
0     257
Name: feedback, dtype: int64

In [None]:
X=df['verified_reviews']
y=df['feedback']

**DIVIDING INTO TRAINING AND TESTING DATA**

In [None]:
xtrain,xtest,ytrain,ytest=train_test_split(X,y,test_size=0.3,random_state=1)

**TRAIN DATA PRE-PROCESSING**

In [None]:
#Tokenization
tok=Tokenizer()
tok.fit_on_texts(xtrain)

In [None]:
#Vocubalary length
vocab_len=len(tok.index_word)
vocab_len

3632

In [None]:
#Text to sequence
train_sequence=tok.texts_to_sequences(xtrain)

In [None]:
doc_len=[]
for doc in train_sequence:
  doc_len.append(len(doc))

In [None]:
np.quantile(doc_len,0.99)

151.96000000000004

In [None]:
max_len=151

In [None]:
#Padding
train_matrix=sequence.pad_sequences(train_sequence,maxlen=max_len)
train_matrix

array([[   0,    0,    0, ..., 1234,   19,   44],
       [   0,    0,    0, ...,  765,    6, 1521],
       [   0,    0,    0, ...,    4,   30,   45],
       ...,
       [   0,    0,    0, ...,  315,   16,   49],
       [   0,    0,    0, ...,    0,  525,  570],
       [   0,    0,    0, ...,    1,   11,  141]], dtype=int32)

**TEST DATA PRE-PROCESSING**

In [None]:
test_sequence=tok.texts_to_sequences(xtest)
test_matrix=sequence.pad_sequences(test_sequence,maxlen=max_len)
test_matrix

array([[  0,   0,   0, ..., 655,  67,   8],
       [  0,   0,   0, ...,   4,  50, 976],
       [  0,   0,   0, ...,   7, 611, 134],
       ...,
       [  0,   0,   0, ..., 427,  11,  48],
       [  0,   0,   0, ...,  10,   1,  48],
       [  0,   0,   0, ..., 482,  15,  24]], dtype=int32)

**RNN**

In [None]:
#Single layer RNN
model=Sequential()
model.add(Embedding(vocab_len+1,50,input_length=max_len,mask_zero=True))
model.add(SimpleRNN(128))
model.add(Dense(64,activation='tanh'))
model.add(Dense(1,activation='sigmoid'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 151, 50)           181650    
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 128)               22912     
_________________________________________________________________
dense (Dense)                (None, 64)                8256      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 212,883
Trainable params: 212,883
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer="adam",loss="binary_crossentropy")

In [None]:
model.fit(train_matrix,ytrain,epochs=15,batch_size=64)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fe88fe38790>

In [None]:
ypred=model.predict(test_matrix)

In [None]:
ypred = np.where(ypred >= 0.5,1,0)

In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.49      0.27      0.35        73
           1       0.94      0.98      0.96       872

    accuracy                           0.92       945
   macro avg       0.71      0.62      0.65       945
weighted avg       0.91      0.92      0.91       945



In [None]:
#Bidirectional RNN
model=Sequential()
model.add(Embedding(vocab_len+1,50,input_length=max_len,mask_zero=True))
model.add(Bidirectional(SimpleRNN(128)))
model.add(Dense(64,activation='tanh'))
model.add(Dense(1,activation='sigmoid'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 151, 50)           181650    
_________________________________________________________________
bidirectional (Bidirectional (None, 256)               45824     
_________________________________________________________________
dense_2 (Dense)              (None, 64)                16448     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
Total params: 243,987
Trainable params: 243,987
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer="adam",loss="binary_crossentropy")

In [None]:
model.fit(train_matrix,ytrain,epochs=15,batch_size=64)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fe88fbf32d0>

In [None]:
ypred=model.predict(test_matrix)

In [None]:
ypred = np.where(ypred >= 0.5,1,0)

In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.66      0.34      0.45        73
           1       0.95      0.99      0.97       872

    accuracy                           0.94       945
   macro avg       0.80      0.66      0.71       945
weighted avg       0.92      0.94      0.93       945



In [None]:
#Multiple layer RNN
model=Sequential()
model.add(Embedding(vocab_len+1,50,input_length=max_len,mask_zero=True))
model.add(SimpleRNN(128,return_sequences=True))
model.add(SimpleRNN(64))
model.add(Dense(64,activation='tanh'))
model.add(Dense(1,activation='sigmoid'))
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 151, 50)           181650    
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 151, 128)          22912     
_________________________________________________________________
simple_rnn_3 (SimpleRNN)     (None, 64)                12352     
_________________________________________________________________
dense_4 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 65        
Total params: 221,139
Trainable params: 221,139
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer="adam",loss="binary_crossentropy")

In [None]:
model.fit(train_matrix,ytrain,epochs=15,batch_size=64)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fe88b49de10>

In [None]:
ypred=model.predict(test_matrix)

In [None]:
ypred = np.where(ypred >= 0.5,1,0)

In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.64      0.29      0.40        73
           1       0.94      0.99      0.96       872

    accuracy                           0.93       945
   macro avg       0.79      0.64      0.68       945
weighted avg       0.92      0.93      0.92       945



**LSTM**

In [None]:
#Single layer LSTM
model=Sequential()
model.add(Embedding(vocab_len+1,50,input_length=max_len,mask_zero=True))
model.add(LSTM(128))
model.add(Dense(64,activation='tanh'))
model.add(Dense(1,activation='sigmoid'))
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 151, 50)           181650    
_________________________________________________________________
lstm (LSTM)                  (None, 128)               91648     
_________________________________________________________________
dense_6 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 65        
Total params: 281,619
Trainable params: 281,619
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer="adam",loss="binary_crossentropy")

In [None]:
model.fit(train_matrix,ytrain,epochs=15,batch_size=64)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fe88a4ab850>

In [None]:
ypred=model.predict(test_matrix)

In [None]:
ypred = np.where(ypred >= 0.5,1,0)

In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.68      0.44      0.53        73
           1       0.95      0.98      0.97       872

    accuracy                           0.94       945
   macro avg       0.82      0.71      0.75       945
weighted avg       0.93      0.94      0.93       945



In [None]:
#Bidirectional LSTM
model=Sequential()
model.add(Embedding(vocab_len+1,50,input_length=max_len,mask_zero=True))
model.add(Bidirectional(LSTM(128)))
model.add(Dense(64,activation='tanh'))
model.add(Dense(1,activation='sigmoid'))
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 151, 50)           181650    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 256)               183296    
_________________________________________________________________
dense_8 (Dense)              (None, 64)                16448     
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 65        
Total params: 381,459
Trainable params: 381,459
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer="adam",loss="binary_crossentropy")

In [None]:
model.fit(train_matrix,ytrain,epochs=15,batch_size=64)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fe888eab890>

In [None]:
ypred=model.predict(test_matrix)

In [None]:
ypred = np.where(ypred >= 0.5,1,0)

In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.75      0.53      0.62        73
           1       0.96      0.99      0.97       872

    accuracy                           0.95       945
   macro avg       0.86      0.76      0.80       945
weighted avg       0.95      0.95      0.95       945



In [None]:
#Multi-layer LSTM
model=Sequential()
model.add(Embedding(vocab_len+1,50,input_length=max_len,mask_zero=True))
model.add(LSTM(128,return_sequences=True))
model.add(LSTM(128))
model.add(Dense(64,activation='tanh'))
model.add(Dense(1,activation='sigmoid'))
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, 151, 50)           181650    
_________________________________________________________________
lstm_2 (LSTM)                (None, 151, 128)          91648     
_________________________________________________________________
lstm_3 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense_10 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 65        
Total params: 413,203
Trainable params: 413,203
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer="adam",loss="binary_crossentropy")

In [None]:
model.fit(train_matrix,ytrain,epochs=15,batch_size=64)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fe886b46410>

In [None]:
ypred=model.predict(test_matrix)

In [None]:
ypred = np.where(ypred >= 0.5,1,0)

In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.82      0.49      0.62        73
           1       0.96      0.99      0.97       872

    accuracy                           0.95       945
   macro avg       0.89      0.74      0.80       945
weighted avg       0.95      0.95      0.95       945



**GRU**

In [None]:
#GRU
model=Sequential()
model.add(Embedding(vocab_len+1,50,input_length=max_len,mask_zero=True))
model.add(GRU(128))
model.add(Dense(64,activation='tanh'))
model.add(Dense(1,activation='sigmoid'))
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, 151, 50)           181650    
_________________________________________________________________
gru (GRU)                    (None, 128)               69120     
_________________________________________________________________
dense_12 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_13 (Dense)             (None, 1)                 65        
Total params: 259,091
Trainable params: 259,091
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer="adam",loss="binary_crossentropy")

In [None]:
model.fit(train_matrix,ytrain,epochs=15,batch_size=64)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fe883696e50>

In [None]:
ypred=model.predict(test_matrix)

In [None]:
ypred = np.where(ypred >= 0.5,1,0)

In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.69      0.48      0.56        73
           1       0.96      0.98      0.97       872

    accuracy                           0.94       945
   macro avg       0.82      0.73      0.77       945
weighted avg       0.94      0.94      0.94       945



In [None]:
#Bidirectional GRU
model=Sequential()
model.add(Embedding(vocab_len+1,50,input_length=max_len,mask_zero=True))
model.add(Bidirectional(GRU(128)))
model.add(Dense(64,activation='tanh'))
model.add(Dense(1,activation='sigmoid'))
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (None, 151, 50)           181650    
_________________________________________________________________
bidirectional_2 (Bidirection (None, 256)               138240    
_________________________________________________________________
dense_14 (Dense)             (None, 64)                16448     
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 65        
Total params: 336,403
Trainable params: 336,403
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer="adam",loss="binary_crossentropy")

In [None]:
model.fit(train_matrix,ytrain,epochs=15,batch_size=64)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fe88228e410>

In [None]:
ypred=model.predict(test_matrix)

In [None]:
ypred = np.where(ypred >= 0.5,1,0)

In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.71      0.47      0.56        73
           1       0.96      0.98      0.97       872

    accuracy                           0.94       945
   macro avg       0.83      0.72      0.77       945
weighted avg       0.94      0.94      0.94       945



In [None]:
#Multi-layer GRU
model=Sequential()
model.add(Embedding(vocab_len+1,50,input_length=max_len,mask_zero=True))
model.add(GRU(128,return_sequences=True))
model.add(GRU(128))
model.add(Dense(64,activation='tanh'))
model.add(Dense(1,activation='sigmoid'))
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      (None, 151, 50)           181650    
_________________________________________________________________
gru_2 (GRU)                  (None, 151, 128)          69120     
_________________________________________________________________
gru_3 (GRU)                  (None, 128)               99072     
_________________________________________________________________
dense_16 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_17 (Dense)             (None, 1)                 65        
Total params: 358,163
Trainable params: 358,163
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer="adam",loss="binary_crossentropy")

In [None]:
model.fit(train_matrix,ytrain,epochs=15,batch_size=64)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fe87fe7dad0>

In [None]:
ypred=model.predict(test_matrix)

In [None]:
ypred = np.where(ypred >= 0.5,1,0)

In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.74      0.47      0.57        73
           1       0.96      0.99      0.97       872

    accuracy                           0.95       945
   macro avg       0.85      0.73      0.77       945
weighted avg       0.94      0.95      0.94       945

