In [1]:
import pandas as pd
import numpy as np
from pprint import pprint
import seaborn as sns
from sklearn.model_selection import train_test_split
import os
os.chdir('d:\\vscode_machineLearning\\internship\\sentiment-Analysis-fellowship.ai')
import random

In [2]:
df = pd.read_csv('sentiment-analysis-dataset/IMDB_clean_data.csv',index_col=False)
df = df.drop('Unnamed: 0',axis=1) # droping the Unnamed: 0
df.dropna(inplace=True) # I had one NaN value in my dataframe.
df = df.reset_index()

In [3]:
padded_docs = pd.read_csv('sentiment-analysis-dataset/padded_docs.csv')
padded_docs = padded_docs.drop(columns='Unnamed: 0')

In [4]:
padded_docs = np.array(padded_docs)

## train test split

In [5]:
X = df ['review']
y = df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(
     np.array(padded_docs), np.array(y), test_size=0.23, random_state=42)

In [6]:
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (38499, 250)
X_test shape: (11500, 250)
y_train shape: (38499,)
y_test shape: (11500,)


## model training

In [7]:
from keras.layers import BatchNormalization, Dropout, Bidirectional, LSTM, Embedding, Dense
from keras.losses import binary_crossentropy,categorical_crossentropy
from tensorflow import keras
from keras.callbacks import LearningRateScheduler , EarlyStopping
from keras.activations import relu , softmax , sigmoid
from keras.initializers import he_normal
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from keras.optimizers import Adam


In [8]:
model_features = 100 # for embedding layer
input_len = 250
model = keras.Sequential(name='LSTM_model')

model.add(Embedding(
    input_dim=56942,  # The "+1" accounts for the reserved index 0 in the word index (since word indices start from 1 and not 0)
    output_dim=model_features, input_length=input_len,name = 'input_layer'
))
model.add(Bidirectional(
    LSTM(units=64,activation=relu,return_sequences=True),
    name='LSTM_1'
))
model.add(BatchNormalization())
model.add(Bidirectional(
    LSTM(units=32,activation=relu,return_sequences=False),
    name='LSTM_2'
))

model.add(Dense(
    units=128,activation=relu,name='fully_connected_layer'
))
model.add(Dense(
    units=1,activation=sigmoid,name='output_layer'
))

In [9]:
model.summary()

Model: "LSTM_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (Embedding)     (None, 250, 100)          5694200   
                                                                 
 LSTM_1 (Bidirectional)      (None, 250, 128)          84480     
                                                                 
 batch_normalization (BatchN  (None, 250, 128)         512       
 ormalization)                                                   
                                                                 
 LSTM_2 (Bidirectional)      (None, 64)                41216     
                                                                 
 fully_connected_layer (Dens  (None, 128)              8320      
 e)                                                              
                                                                 
 output_layer (Dense)        (None, 1)                 1

In [10]:
def lr_schedule(epoch, lr):
    if epoch < 1:
        return lr
    else:
        return lr * np.exp(-0.1)

# learning rate scheduler callback to descrese the learning rate gradually as the epochs increases So that my alogrithm could not jump out of Global minima.
lr_scheduler = LearningRateScheduler(lr_schedule)

# Early stopping to stop the Neural Network when we get same Validation accuracy
early_stopping = EarlyStopping(
    monitor="accuracy",
    min_delta=0.00001,
    patience=5,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=False
)

In [11]:
optimizer = keras.optimizers.Adam(learning_rate=0.0005)

In [12]:
model.compile(optimizer=optimizer, # Used Adam because this has not any major disadvantages with custom learning rate because the convergence was very unstable.
               loss=binary_crossentropy, # because solving the classification problem
                 metrics=['accuracy'])  # I don't need to write about this you know.

In [13]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20,
                    batch_size=32, # I had tried different batch sizes but this has given my best results
                      callbacks=[lr_scheduler, early_stopping]) # these to prevent the NN from overfitting and scheduling learning rate to get optimum solution.

Epoch 1/20
Epoch 2/20
 254/1204 [=====>........................] - ETA: 7:19 - loss: nan - accuracy: 0.6454

In [None]:
# model.save('sentimentAnalysisModel.H5')