<a href="https://colab.research.google.com/github/SisekoC/My-Notebooks/blob/main/LSTM_bidirectional_IMDB_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Bidirectional LSTM on the IMDB sentiment classification task.
### Dr. Tirthajyoti Sarkar, Fremont, CA

IMDB dataset: https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification

In [2]:
from __future__ import print_function
import numpy as np
import keras
from keras.preprocessing import sequence
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau
from keras.datasets import imdb
keras.datasets.mnist.load_data(path="mnist.npz")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


((array([[[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         ...,
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0

### How many features you will consider and how long embedding is to be used
Note, these choices will determine the size of the parameter space of your neural net.

In [3]:
max_features = 2000
# cut texts after this number of words
# (among top max_features most common words)
maxlen = 100

### Load the data

Keras IMDB data sometimes gives trouble while loading. <br>
See this Stakoverflow thread for possible resolution,

https://stackoverflow.com/questions/55890813/how-to-fix-object-arrays-cannot-be-loaded-when-allow-pickle-false-for-imdb-loa/56062555

In [4]:
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
25000 train sequences
25000 test sequences


In [5]:
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
y_train = np.array(y_train)
y_test = np.array(y_test)

Pad sequences (samples x time)
x_train shape: (25000, 100)
x_test shape: (25000, 100)


### Build your model
Feel free to experiment with number of neurons and add more LSTM layers. But the irreducible error bar of the dataset can be reched pretty easily even with a simple network.

In [6]:
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))



### Optimizer and compilation

In [10]:
opti_ = Adam(learning_rate=0.0025)
model.compile(optimizer=opti_, loss='binary_crossentropy', metrics=['accuracy'])

In [11]:
model.summary()

### Fix a batch size, number of epochs to train, and off you go...

In [12]:
batch_size = 64
epochs = 20

In [13]:
from time import time

In [14]:
t1=time()
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=[x_test, y_test])
t2=time()

Epoch 1/20
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 331ms/step - accuracy: 0.7224 - loss: 0.5185 - val_accuracy: 0.8422 - val_loss: 0.3580
Epoch 2/20
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 369ms/step - accuracy: 0.8609 - loss: 0.3231 - val_accuracy: 0.8511 - val_loss: 0.3378
Epoch 3/20
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 377ms/step - accuracy: 0.8903 - loss: 0.2645 - val_accuracy: 0.8452 - val_loss: 0.3441
Epoch 4/20
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 341ms/step - accuracy: 0.9176 - loss: 0.2115 - val_accuracy: 0.8380 - val_loss: 0.3651
Epoch 5/20
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 343ms/step - accuracy: 0.9374 - loss: 0.1674 - val_accuracy: 0.8388 - val_loss: 0.4022
Epoch 6/20
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 346ms/step - accuracy: 0.9478 - loss: 0.1385 - val_accuracy: 0.8390 - val_loss: 0.4745
Epoc

In [15]:
t_delta = round((t2-t1)/60,3)
print(f"{epochs} took total {t_delta} minutes.")

20 took total 50.845 minutes.


### Some fancy things to try - `LearningRateScheduler` and `ReduceLROnPlateau` Callbacks
These are very useful callbacks to dynamically adjust the learning rate but they don't seem to impact the performance for this dataset.

In [16]:
def lr_schedule(epoch):
    """Learning Rate Schedule

    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
    Called automatically every epoch as part of callbacks during training.

    # Arguments
        epoch (int): The number of epochs

    # Returns
        lr (float32): learning rate
    """
    lr = 0.005
    if epoch >= 3:
        lr *= 0.5
    if epoch >= 7:
        lr *= 0.25
    if epoch >= 11:
        lr *= 0.5
    if epoch >= 16:
        lr *= 0.5

    print('Learning rate: ', lr)
    return lr

In [17]:
lr_scheduler = LearningRateScheduler(lr_schedule)

In [18]:
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-6)

In [19]:
callbacks = [lr_reducer, lr_scheduler]
#callbacks = [lr_scheduler]

In [20]:
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))



In [22]:
opti_ = Adam(learning_rate=lr_schedule(0))
model.compile(optimizer=opti_, loss='binary_crossentropy', metrics=['accuracy'])

Learning rate:  0.005


In [23]:
t1=time()
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=[x_test, y_test],
          callbacks=callbacks)
t2=time()

Learning rate:  0.005
Epoch 1/20
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 335ms/step - accuracy: 0.6903 - loss: 0.5679 - val_accuracy: 0.8408 - val_loss: 0.3604 - learning_rate: 0.0050
Learning rate:  0.005
Epoch 2/20
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 337ms/step - accuracy: 0.8634 - loss: 0.3233 - val_accuracy: 0.8566 - val_loss: 0.3329 - learning_rate: 0.0050
Learning rate:  0.005
Epoch 3/20
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 338ms/step - accuracy: 0.8932 - loss: 0.2573 - val_accuracy: 0.8544 - val_loss: 0.3364 - learning_rate: 0.0050
Learning rate:  0.0025
Epoch 4/20
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 337ms/step - accuracy: 0.9281 - loss: 0.1902 - val_accuracy: 0.8478 - val_loss: 0.3821 - learning_rate: 0.0025
Learning rate:  0.0025
Epoch 5/20
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 336ms/step - accuracy: 0.9504 - loss: 0.1366 - va