In [84]:
#imports

# keras imports
from keras.layers import Embedding, SimpleRNN, Flatten, Dense
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.datasets import imdb
import keras.backend as K
import keras
import os

# general imports
from IPython.display import display, Markdown #just to display markdown
import numpy as np
import pandas as pd

In [85]:
display(Markdown("## USING IMDB DATA FOR MODEL TRAINING "))

# pre-processing initializations
max_features = 10000  # number of words to consider as features
maxlen = 50  # cut texts after this number of words (among top max_features most common words)
batch_size = 32

# data pre-prcessing
### IMDB data preparation was extensively cover in "text_pre-processing_basic model_building"(earlier module) ###


print('Loading data...')
(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)

# cutting sentences to max length of 50
input_train = sequence.pad_sequences(input_train, maxlen=maxlen)
input_test = sequence.pad_sequences(input_test, maxlen=maxlen)

print ("after data preprocessing")
print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape, "\n")

print("\nImdb review 1 sample input data\n")
print(input_train[:1]) 

print("\nImdb review data lables")
print(y_train[:2])
print(
"""
Note:  
0 : "Negative review"
1 : "Positive review"
post_padding in input data helps gragient in LSTM flow better 

"""
)

## USING IMDB DATA FOR MODEL TRAINING 

Loading data...
after data preprocessing
input_train shape: (25000, 50)
input_test shape: (25000, 50) 


Imdb review 1 sample input data

[[2071   56   26  141    6  194 7486   18    4  226   22   21  134  476
    26  480    5  144   30 5535   18   51   36   28  224   92   25  104
     4  226   65   16   38 1334   88   12   16  283    5   16 4472  113
   103   32   15   16 5345   19  178   32]]

Imdb review data lables
[1 0]

Note:  
0 : "Negative review"
1 : "Positive review"
post_padding in input data helps gragient in LSTM flow better 




In [86]:
keras.callbacks.ModelCheckpoint
keras.callbacks.EarlyStopping
keras.callbacks.LearningRateScheduler
keras.callbacks.ReduceLROnPlateau
keras.callbacks.CSVLogger

keras.callbacks.CSVLogger

## WRITING THE PRE-DEFINED CALLBACKS FOR MODEL TRAINING

In [87]:
# This callback will save all the training logs in a file after every epoch
dir_path = "./"
csv_logger = keras.callbacks.CSVLogger(
    # This is the path to the file where logs data will be stored
    os.path.join(dir_path,"training.log")
)


# This callback will interrupt training when we have stopped improving
early_stopping = keras.callbacks.EarlyStopping(
    # This callback will monitor the validation accuracy of the model
    monitor='val_acc', 
    # Min improvement requirement in val_accuracy
    min_delta=0,
    # Training will be interrupted when the accuracy
    # has stopped improving for *more* than 3 epochs
    patience=3,
    # Logs the earlystaging activities
    verbose=1, 
    # Automatically decide if loss/acc as to be min/maximized for evaluation
    mode='auto',
    # Save only the best weights from the current epoch
    restore_best_weights=True
)

reduce_learning_rate = keras.callbacks.ReduceLROnPlateau(
    # This callback will monitor the validation loss of the model
    monitor='val_loss',
    # It will divide the learning by half when it gets triggered
    factor=0.5,
    # It will get triggered after the validation loss has stopped improving
    # for at least 2 epochs
    patience=2,
    # Logs the ReduceLROnPlateau activities 
    verbose=1,
    # Note that since the callback will be monitor validation loss,
    # we need to pass some `validation_data` to our call to `fit`.
    
    )

# This callback will save the current weights after every epoch
# The name of weight file contains epoch number, val accuracy
file_path="./stored_weights/weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoints = keras.callbacks.ModelCheckpoint(
        filepath=file_path,  # Path to the destination model file
        # The two arguments below mean that we will not overwrite the
        # model file unless `val_loss` has improved, which
        # allows us to keep the best model every seen during training.
        monitor='val_loss',
        save_best_only=True,
    )

## WRITING A CUSTOM CALLING FOR TEST EVALUATION DURING TRAINING

Here is a simple example of a custom callback, where we save to disk (as Numpy arrays)
the activations of every layer of the model at the end of every epoch, computed on the 
first sample of the validation set.

In [88]:
# Called at the start of every epoch
# -->on_epoch_begin

# Called at the end of every epoch
# -->on_epoch_end

# Called right before processing each batch
# -->on_batch_begin

# Called right after processing each batch
# -->on_batch_end

# Called at the start of training
# -->on_train_begin

# Called at the end of training
# -->on_train_end

class TestAccuracyCalculator(keras.callbacks.Callback):
    
    def __init__(self, test_data=None,test_lables=None):
        # Passing test data separately
        # to test accuracy changes while training
        self.test_data = test_data
        self.test_lables = test_lables
        
    def set_model(self, model):
        # This method is called by the parent model
        # before training, to inform the callback
        # of what model will be calling it
        self.model = model
    
    def calculate_binary_accuracy(self,true_lables,pred_lables):
        #accuracy calculation
        accuracy = ((np.sum(true_lables == pred_lables))/(len(pred_lables)))*100
        return round(accuracy,2)
        
    def on_epoch_end(self, epoch, logs=None):
    
        if (type(self.test_data) !=  type(None)) and (type(self.test_lables) !=  type(None)):
            # Using current best weight from this epoch
            # to test on provided test data-set
            y_pred = self.model.predict(self.test_data)
                
            # restructuring predicted vector for accuracy calculation
            y_pred_ = np.where(y_pred > 0.5, 1, 0)
            y_pred_ = y_pred_.flatten()
            accuracy = self.calculate_binary_accuracy(self.test_lables,y_pred_)
            print ("Test accuracy after {} epocs is {}% \n".format(epoch+1,accuracy))
        else:
            pass

test_accuracy_calculator = TestAccuracyCalculator(test_data=input_test, test_lables=y_test)

In [89]:
# Callbacks are passed to the model fit the `callbacks` argument in `fit`,
# which takes a list of callbacks. You can pass any number of callbacks.
callbacks_list = [
                  test_accuracy_calculator
                  csv_logger, early_stopping,
                  reduce_learning_rate,checkpoints,
                 ]

In [90]:
display(Markdown("## TRAINING MODEL WITH CALLBACKS "))

# model architecture and training
print("starting model training...")
model = Sequential()
model.add(Embedding(max_features, 32))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))
model.summary()
print ("\n")
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(input_train, y_train,
                    epochs=10,
                    batch_size=128,
                    validation_split=0.2,
                    callbacks=callbacks_list)

## TRAINING MODEL WITH CALLBACKS 

starting model training...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_15 (Embedding)     (None, None, 32)          320000    
_________________________________________________________________
simple_rnn_15 (SimpleRNN)    (None, 32)                2080      
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 33        
Total params: 322,113
Trainable params: 322,113
Non-trainable params: 0
_________________________________________________________________


Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Test accuracy after 1 epocs is 74.28% 

Epoch 2/10
Test accuracy after 2 epocs is 80.16% 

Epoch 3/10
Test accuracy after 3 epocs is 80.47% 

Epoch 4/10
Test accuracy after 4 epocs is 79.17% 

Epoch 5/10

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Test accuracy after 5 epoc

## Custom callback result

As we can see custom call back is printing the accuracy on test after every epoch. We can directly save this accuracy in a seperate log file

In [102]:
display(Markdown("## CSVLogger callback results"))
log_data = pd.read_csv("training.log")

print("""
CSVLogger has saved all the accuracies and losses to "training.log" 
""")
display(Markdown("### results"))
print (log_data.head().to_string(index=False))

## CSVLogger callback results


CSVLogger has saved all the accuracies and losses to "training.log" 



### results

epoch      acc      loss  val_acc  val_loss
    0  0.71830  0.542800   0.7964  0.452406
    1  0.84550  0.364096   0.8132  0.413793
    2  0.88630  0.281825   0.7938  0.444432
    3  0.93035  0.191171   0.7750  0.591774
    4  0.97125  0.098036   0.7716  0.572219


## EarlyStopping callback result

Best validation accuracy was achieved during 3rd epoch. After that as per the "patience=3" parameter in EarlyStopping definition, the callback waited for "val_acc" improvement. Since there was no improvement better than best val_acc, after 6th epoch model training was stopped. 

Note: In model training we have mentioned number of epochs to be 10. As per earlystopping model training stopped after 6th epoch 

## ReduceLROnPlateau callback result

Best validation accuracy was achieved during 3rd epoch. As per the "patience=2" parameter in ReduceLROnPlateau definition, the callback waited for "val_acc" improvement for 2 more epochs. Then reduced the learning rate to half (factor=0.5) for further iterations.

In [103]:
display(Markdown("## ModelCheckpoint callback results"))

print("""ModelCheckpoint has saved all the model weights to "stored_weights" folder""")
display(Markdown("### results"))
print (os.listdir("stored_weights/"))

## ModelCheckpoint callback results

ModelCheckpoint has saved all the model weights to "stored_weights" folder


### results

['.DS_Store', 'weights-improvement-01-0.74.hdf5', 'weights-improvement-03-0.80.hdf5', 'weights-improvement-02-0.80.hdf5', 'weights-improvement-02-0.81.hdf5']
