In [None]:
#import packages
import tensorflow
import numpy as np
import sklearn.metrics


In [34]:
# My Functions Section
def custom_acc_function(y_test,y_pred):
    y_binary_pred = np.zeros(len(y_pred))
    for i in range(len(y_pred)):
        if y_pred[i] > 0.50:
            y_binary_pred[i] = 1
    acc = (sklearn.metrics.accuracy_score(y_test,y_binary_pred))
    precision = (sklearn.metrics.precision_score(y_test,y_binary_pred))
    recall = (sklearn.metrics.recall_score(y_test,y_binary_pred))
    f1 = (sklearn.metrics.f1_score(y_test,y_binary_pred))
    print(f"Accuracy: {acc}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    performarray = [acc,precision,recall,f1]
    return performarray

In [1]:
#### LSTM METHODS ###############

In [2]:
# Standard LSTM method

# LSTM for sequence classification in the IMDB dataset
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence
# fix random seed for reproducibility
tf.random.set_seed(7)
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0) #tells the loss and accuracy of the function called
print("Accuracy: %.2f%%" % (scores[1]*100))

Epoch 2/3
Epoch 3/3
Accuracy: 86.54%


In [37]:
# Get Prediction
Prediction_LSTM_Classic = model.predict(X_test)
# get performance using custom function, calls array to set binary 1 and 0 if probability is greater than 0.50 in array
print("Accuracy of LSTM Classic Prediction")
LSTM_classic_perform = custom_acc_function(y_test,Prediction_LSTM_Classic)

Accuracy of LSTM Classic Prediction
Accuracy: 0.86536
Precision: 0.8233503256867742
Recall: 0.93032
F1 Score: 0.8735727163461539


In [38]:
#LSTM Using Drop Out Layers
from tensorflow.keras.layers import Dropout
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))



Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 500, 32)           160000    
                                                                 
 dropout (Dropout)           (None, 500, 32)           0         
                                                                 
 lstm_1 (LSTM)               (None, 100)               53200     
                                                                 
 dropout_1 (Dropout)         (None, 100)               0         
                                                                 
 dense_1 (Dense)             (None, 1)                 101       
                                                                 
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 87.

In [39]:
# Get Prediction
Prediction_LSTM_dropout = model.predict(X_test)
# get performance using custom function, calls array to set binary 1 and 0 if probability is greater than 0.50 in array
print("Accuracy of LSTM Dropout Prediction")
LSTM_Dropout_perform = custom_acc_function(y_test,Prediction_LSTM_dropout)

Accuracy of LSTM Dropout Prediction
Accuracy: 0.8768
Precision: 0.8606984224230357
Recall: 0.89912
F1 Score: 0.8794897879333282


In [40]:
#Finally LSTM using Bi-directional Methods

# LSTM with dropout for sequence classification in the IMDB dataset
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence
# fix random seed for reproducibility
tf.random.set_seed(7)
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 500, 32)           160000    
                                                                 
 bidirectional (Bidirectiona  (None, 200)              106400    
 l)                                                              
                                                                 
 dense_2 (Dense)             (None, 1)                 201       
                                                                 
Total params: 266,601
Trainable params: 266,601
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 87.52%


In [41]:
# Get Prediction
Prediction_LSTM_bidirectional = model.predict(X_test)
# get performance using custom function, calls array to set binary 1 and 0 if probability is greater than 0.50 in array
print("Accuracy of LSTM Bidirectional Prediction")
LSTM_bidirectional_perform = custom_acc_function(y_test,Prediction_LSTM_bidirectional)


Accuracy of LSTM Bidirectional Prediction
Accuracy: 0.87524
Precision: 0.8782356261591807
Recall: 0.87128
F1 Score: 0.8747439861852938


In [42]:
#### RNN Methods ########

In [46]:
# Standard RNN method

# RNN for sequence classification in the IMDB dataset
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence
# fix random seed for reproducibility
tf.random.set_seed(7)
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(SimpleRNN(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0) #tells the loss and accuracy of the function called
print("Accuracy: %.2f%%" % (scores[1]*100))



Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, 500, 32)           160000    
                                                                 
 simple_rnn (SimpleRNN)      (None, 100)               13300     
                                                                 
 dense_3 (Dense)             (None, 1)                 101       
                                                                 
Total params: 173,401
Trainable params: 173,401
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 64.84%


In [47]:
# Get Prediction RNN Classic
Prediction_RNN_Classic = model.predict(X_test)
# get performance using custom function, calls array to set binary 1 and 0 if probability is greater than 0.50 in array
print("Accuracy of RNN Classic Prediction")
RNN_classic_perform = custom_acc_function(y_test,Prediction_RNN_Classic)


Accuracy of RNN Classic Prediction
Accuracy: 0.6484
Precision: 0.5917136359141699
Recall: 0.95744
F1 Score: 0.7314062213530527


In [48]:
# RNN with dropout
#RNN Using Drop Out Layers

# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(Dropout(0.2))
model.add(SimpleRNN(100))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1] * 100))

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_7 (Embedding)     (None, 500, 32)           160000    
                                                                 
 dropout_2 (Dropout)         (None, 500, 32)           0         
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 100)               13300     
                                                                 
 dropout_3 (Dropout)         (None, 100)               0         
                                                                 
 dense_4 (Dense)             (None, 1)                 101       
                                                                 
Total params: 173,401
Trainable params: 173,401
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 71.

In [49]:
# Get Prediction RNN Dropout
Prediction_RNN_dropout = model.predict(X_test)
# get performance using custom function, calls array to set binary 1 and 0 if probability is greater than 0.50 in array
print("Accuracy of RNN Dropout Prediction")
RNN_Dropout_perform = custom_acc_function(y_test, Prediction_RNN_dropout)

Accuracy of RNN Dropout Prediction
Accuracy: 0.71772
Precision: 0.7037203383486788
Recall: 0.75208
F1 Score: 0.7270969488379287


In [50]:
#Finally RNN using Bi-directional Methods

# RNN with dropout for sequence classification in the IMDB dataset
# fix random seed for reproducibility
tf.random.set_seed(7)
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(Bidirectional(SimpleRNN(100, dropout=0.2, recurrent_dropout=0.2)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1] * 100))

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_8 (Embedding)     (None, 500, 32)           160000    
                                                                 
 bidirectional_1 (Bidirectio  (None, 200)              26600     
 nal)                                                            
                                                                 
 dense_5 (Dense)             (None, 1)                 201       
                                                                 
Total params: 186,801
Trainable params: 186,801
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 49.80%


In [51]:
# Get Prediction RNN Bidirectional
Prediction_RNN_bidirectional = model.predict(X_test)
# get performance using custom function, calls array to set binary 1 and 0 if probability is greater than 0.50 in array
print("Accuracy of RNN Bidirectional Prediction")
RNN_bidirectional_perform = custom_acc_function(y_test, Prediction_RNN_bidirectional)

Accuracy of RNN Bidirectional Prediction
Accuracy: 0.498
Precision: 0.4989567684860624
Recall: 0.95656
F1 Score: 0.655824923211935


In [52]:
#### GRU Methods ########

In [53]:
# Standard GRU method

# GRU for sequence classification in the IMDB dataset
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import GRU
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence

# fix random seed for reproducibility
tf.random.set_seed(7)
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(GRU(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)  #tells the loss and accuracy of the function called
print("Accuracy: %.2f%%" % (scores[1] * 100))


Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_9 (Embedding)     (None, 500, 32)           160000    
                                                                 
 gru (GRU)                   (None, 100)               40200     
                                                                 
 dense_6 (Dense)             (None, 1)                 101       
                                                                 
Total params: 200,301
Trainable params: 200,301
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 88.22%


In [54]:
# Get Prediction RNN Classic
Prediction_GRU_Classic = model.predict(X_test)
# get performance using custom function, calls array to set binary 1 and 0 if probability is greater than 0.50 in array
print("Accuracy of GRU Classic Prediction")
GRU_classic_perform = custom_acc_function(y_test, Prediction_GRU_Classic)


Accuracy of GRU Classic Prediction
Accuracy: 0.8822
Precision: 0.8597785977859779
Recall: 0.91336
F1 Score: 0.8857597269094999


In [55]:
# GRU with dropout
#GRU Using Drop Out Layers

# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(Dropout(0.2))
model.add(GRU(100))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1] * 100))

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_10 (Embedding)    (None, 500, 32)           160000    
                                                                 
 dropout_4 (Dropout)         (None, 500, 32)           0         
                                                                 
 gru_1 (GRU)                 (None, 100)               40200     
                                                                 
 dropout_5 (Dropout)         (None, 100)               0         
                                                                 
 dense_7 (Dense)             (None, 1)                 101       
                                                                 
Total params: 200,301
Trainable params: 200,301
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 87

In [56]:
# Get Prediction GRU Dropout
Prediction_GRU_dropout = model.predict(X_test)
# get performance using custom function, calls array to set binary 1 and 0 if probability is greater than 0.50 in array
print("Accuracy of GRU Dropout Prediction")
GRU_Dropout_perform = custom_acc_function(y_test, Prediction_GRU_dropout)

Accuracy of GRU Dropout Prediction
Accuracy: 0.87908
Precision: 0.8551832696199685
Recall: 0.91272
F1 Score: 0.8830153631825393


In [57]:
#Finally GRU using Bi-directional Methods

# GRU with dropout for sequence classification in the IMDB dataset
# fix random seed for reproducibility
tf.random.set_seed(7)
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(Bidirectional(GRU(100, dropout=0.2, recurrent_dropout=0.2)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1] * 100))

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_11 (Embedding)    (None, 500, 32)           160000    
                                                                 
 bidirectional_2 (Bidirectio  (None, 200)              80400     
 nal)                                                            
                                                                 
 dense_8 (Dense)             (None, 1)                 201       
                                                                 
Total params: 240,601
Trainable params: 240,601
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 86.36%


In [58]:
# Get Prediction GRU Bidirectional
Prediction_GRU_bidirectional = model.predict(X_test)
# get performance using custom function, calls array to set binary 1 and 0 if probability is greater than 0.50 in array
print("Accuracy of GRU Bidirectional Prediction")
GRU_bidirectional_perform = custom_acc_function(y_test, Prediction_GRU_bidirectional)

Accuracy of GRU Bidirectional Prediction
Accuracy: 0.8636
Precision: 0.8480624904273243
Recall: 0.88592
F1 Score: 0.8665779794976134
