## End to end Deep Learning Project Using Simple RNN

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence 
from tensorflow.keras.models import Sequential # for sequential model
from tensorflow.keras.layers import Embedding,SimpleRNN,Dense # for embedding, RNN and Dense layers

In [None]:
## Load the imdb dataset (it gets downloaded automatically and is stored in cashe memory) 

max_features=10000 # vocabulary size
# maximum number of words to consider
(X_train,y_train),(X_test,y_test)=imdb.load_data(num_words=max_features)

# Print the shape of the data
print(f'Training data shape: {X_train.shape}, Training labels shape: {y_train.shape}')
print(f'Testing data shape: {X_train.shape}, Testing labels shape: {y_test.shape}')

In [None]:
# One hot representation of the labels
X_train[0],y_train[0]

In [None]:
## Inspect a sample review and its label
sample_review=X_train[0]
sample_label=y_train[0]

print(f"Sample review (as integers):{sample_review}")
print(f'Sample label: {sample_label}')


In [None]:
### MApping of words index back to words(for our understanding)
word_index=imdb.get_word_index()
word_index


In [None]:
# reverse_word_index is a dictionary that maps the integer index back to the word
reverse_word_index = {value: key for key, value in word_index.items()}
reverse_word_index

In [None]:
# it does not include the first three indices (0, 1, 2) which are reserved for padding, start of sequence and unknown words
# We need to add 3 to the indices to get the correct word
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in sample_review])
decoded_review

In [None]:
from tensorflow.keras.preprocessing import sequence

max_len=500

# We will pad the sequences to a maximum length of 500 words
# This means that if a review has less than 500 words, it will be padded with zeros at the beginning
# If a review has more than 500 words, it will be truncated to 500 words
X_train=sequence.pad_sequences(X_train,maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)
X_train

In [None]:
X_train[0]

In [None]:
## Train Simple RNN

## We will use a simple RNN model with an embedding layer, a simple RNN layer and a dense output layer
## The embedding layer will convert the integer indices into dense vectors of fixed size
## The simple RNN layer will process the sequences and the dense output layer will give the final output
model=Sequential()
model.add(Embedding(max_features,128,input_length=max_len)) ## Embedding Layers
model.add(SimpleRNN(128,activation='tanh')) # RNNs typically perform better and are more stable with tanh or sigmoid activations inside the recurrent layer.
model.add(Dense(1,activation="sigmoid"))

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
## Create an instance of EarlyStoppping Callback
from tensorflow.keras.callbacks import EarlyStopping
earlystopping=EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)
earlystopping

In [None]:
## Train the model with early sstopping
history=model.fit(
    X_train,y_train,epochs=10,batch_size=32,
    validation_split=0.2,
    callbacks=[earlystopping]
)

In [None]:
model.summary()

In [None]:
## Save model file
model.save('simple_rnn_imdb.h5')
# or
# model.save('my_model.keras')