<a href="https://colab.research.google.com/github/MoizAhmed2517/nlp-genai-notebooks/blob/main/RNN/Movies_sentiment_RNN_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

## Loading IMDB dataset

In [None]:
max_features = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
x_train.shape, x_test.shape

((25000,), (25000,))

## Inspect sample review and its label

In [None]:
sample_review = x_train[1]
sample_label = y_train[1]

In [None]:
sample_review, sample_label

### Mapping of word index back to words for understanding

In [None]:
word_index = imdb.get_word_index()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

In [None]:
for word_index in sample_review:
    print(reverse_word_index.get(word_index, '?'), end=' ')

the thought solid thought senator do making to is spot nomination assumed while he of jack in where picked as getting on was did hands fact characters to always life thrillers not as me can't in at are br of sure your way of little it strongly random to view of love it so principles of guy it used producer of where it of here icon film of outside to don't all unique some like of direction it if out her imagination below keep of queen he diverse to makes this stretch and of solid it thought begins br senator and budget worthwhile though ok and awaiting for ever better were and diverse for budget look kicked any to of making it out and follows for effects show to show cast this family us scenes more it severe making senator to and finds tv tend to of emerged these thing wants but and an beckinsale cult as it is video do you david see scenery it in few those are of ship for with of wild to one is very work dark they don't do dvd with those them 

## Padding the lenghts

In [None]:
max_len=500
X_train = pad_sequences(x_train, maxlen=max_len)
X_test = pad_sequences(x_test, maxlen=max_len)

In [None]:
X_train

array([[   0,    0,    0, ...,   19,  178,   32],
       [   0,    0,    0, ...,   16,  145,   95],
       [   0,    0,    0, ...,    7,  129,  113],
       ...,
       [   0,    0,    0, ...,    4, 3586,    2],
       [   0,    0,    0, ...,   12,    9,   23],
       [   0,    0,    0, ...,  204,  131,    9]], dtype=int32)

## Experiment#1:

In [None]:
model=Sequential()
model.add(Embedding(max_features,128)) ## Embedding Layers
model.add(SimpleRNN(128,activation='relu'))
model.add(Dense(1,activation="sigmoid"))

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
earlystopping=EarlyStopping(monitor='val_loss',patience=5, restore_best_weights=True)

In [None]:
history=model.fit(X_train,y_train,epochs=10,batch_size=64,validation_split=0.2,callbacks=[earlystopping])

Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 307ms/step - accuracy: 0.5812 - loss: 639714.3750 - val_accuracy: 0.6350 - val_loss: 0.6442
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 304ms/step - accuracy: 0.6924 - loss: 0.6271 - val_accuracy: 0.6392 - val_loss: 0.6317
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 304ms/step - accuracy: 0.7104 - loss: 0.6005 - val_accuracy: 0.6416 - val_loss: 0.6217
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 304ms/step - accuracy: 0.7339 - loss: 0.5669 - val_accuracy: 0.6582 - val_loss: 0.6121
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 308ms/step - accuracy: 0.7643 - loss: 0.5093 - val_accuracy: 0.7220 - val_loss: 0.5688
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 307ms/step - accuracy: 0.8323 - loss: 0.4188 - val_accuracy: 0.7452 - val_loss: 0.5347
E

In [None]:
model.save('./best_model.h5')



## Using same checkpoint for furthur training of model

In [None]:
from tensorflow.keras.models import load_model
model = load_model('./best_model.h5', compile=False)

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [34]:
history_1=model.fit(X_train,y_train,epochs=15,batch_size=64,validation_split=0.2,callbacks=[earlystopping])

Epoch 1/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 293ms/step - accuracy: 0.9300 - loss: 0.1851 - val_accuracy: 0.8074 - val_loss: 0.5238
Epoch 2/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 296ms/step - accuracy: 0.9527 - loss: 0.1342 - val_accuracy: 0.8076 - val_loss: 0.5599
Epoch 3/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 311ms/step - accuracy: 0.9662 - loss: 0.0999 - val_accuracy: 0.8124 - val_loss: 0.6395
Epoch 4/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 304ms/step - accuracy: 0.9657 - loss: 0.1017 - val_accuracy: 0.8088 - val_loss: 0.7277
Epoch 5/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 297ms/step - accuracy: 0.9819 - loss: 0.0554 - val_accuracy: 0.7974 - val_loss: 0.7336


In [35]:
model.save('./best_model.h5')

