In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Model
from utils import creat_datasets, reset_graph, grid_serach,  creating_val_data
from utils import creating_train_val_test_datasets
from layers import  bln_layer, dense_layer
from callbacks import bln_callback , tensorboard_callback, create_callback_list, save_best_model_callback


from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras.datasets import imdb

In [2]:
tf.config.experimental_run_functions_eagerly(True)

In [3]:
tf.__version__

'2.1.0'

### Loading data

In [4]:
random_seed=100
minibatch = 20
buffersize= 60000
number_valid_sampels = 5000 # number of validation data
epochs=5
learning_rate = 0.001
number_batches_train = 500 # number of batches to train, each batch of size minibatch parameter
number_batches_valid = 50 # number of batches to validate, each batch of size minibatch parameter
num_classes = 1

max_features = 20000
maxlen = 80  # cut texts after this number of words (among top max_features most common words)


In [5]:
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen = maxlen)
x_test = sequence.pad_sequences(x_test, maxlen = maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Loading data...


  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 80)
x_test shape: (25000, 80)


In [6]:
x_train, y_train, x_valid, y_valid = creating_val_data(x_train, y_train,
                                                       number_valid_sampels = number_valid_sampels,
                                                       random_seed=random_seed)

train_dataset, valid_dataset, test_dataset = creating_train_val_test_datasets(x_train, y_train,
                                                                              x_test, y_test,
                                                                              x_valid, y_valid, 
                                                                              minibatch = minibatch,
                                                                              buffersize= buffersize,
                                                                              random_seed=random_seed)

In [7]:
train_dataset

<PrefetchDataset shapes: ((20, 80), (20,)), types: (tf.int32, tf.int64)>

# 1. Using Custom Batch and Layer Normalization Layer(cBLN)

In [8]:
def BLNLayer_model(inputshape= (80), max_features = 20000, embed_size=128, random_seed = 100,
                          lstm_unit = 128 , dense_units= 1,
                          batch_size = 10,
                          b_mm = True, b_mv = True,
                          f_mm = False, f_mv = False):
    

    
    input_lyr = tf.keras.Input(shape = inputshape, batch_size=batch_size, name = 'input')
    
    x = Embedding(max_features, embed_size,
                  embeddings_initializer=tf.keras.initializers.GlorotUniform(seed=random_seed))(input_lyr)
    
    x = LSTM(lstm_unit, dropout=0.2, recurrent_dropout=0.2,
            recurrent_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed),
            kernel_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed) ,
            return_sequences=True)(x)
    
    x = bln_layer(stateful = True, batchsize= batch_size, name = 'bn1', 
                  batch_moving_mean = b_mm, batch_moving_var = b_mv,
                  feature_moving_mean = f_mm, feature_moving_var = f_mv)(x) 
    
    x = LSTM(lstm_unit//2, dropout=0.2,  recurrent_dropout=0.2,
            recurrent_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed),
            kernel_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed) ,
            return_sequences=False)(x)
    
    output_lyr = dense_layer(units = dense_units, name = 'dense1', random_seed=random_seed)(x)    
    return tf.keras.Model(inputs = [input_lyr], outputs = [output_lyr])


In [9]:
model_bln_layer = BLNLayer_model(inputshape= (80), max_features = max_features, 
                                        embed_size= 128, random_seed = random_seed,
                                        lstm_unit = 128 , dense_units= num_classes,
                                        batch_size = minibatch,
                                        b_mm = True, b_mv = True,
                                        f_mm = False, f_mv = False
                                        )



In [10]:
model_bln_layer.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(20, 80)]                0         
_________________________________________________________________
embedding (Embedding)        (20, 80, 128)             2560000   
_________________________________________________________________
lstm (LSTM)                  (20, 80, 128)             131584    
_________________________________________________________________
bn1 (bln_layer)              (20, 80, 128)             23938     
_________________________________________________________________
lstm_1 (LSTM)                (20, 64)                  49408     
_________________________________________________________________
dense1 (dense_layer)         (20, 1)                   65        
Total params: 2,764,995
Trainable params: 2,741,313
Non-trainable params: 23,682
______________________________________________

# callbacks

In [11]:
# Callbacks for saving best model and tensorboard
folder_name = str(number_batches_train) + '_model_bln_layer_TTFF_imdb'
save_bm_cb = save_best_model_callback(folder_name)
tb_cb = tensorboard_callback(folder_name)

# Callback for resetting moving mean and variances at the end of each epoch
bln_layer_cb = bln_callback()

bln_layer_cb_list = create_callback_list(save_bm_cb, tb_cb, bln_layer_cb)

In [12]:
model_bln_layer.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate),
                               loss = tf.keras.losses.BinaryCrossentropy(from_logits=True),
                               metrics = [tf.keras.metrics.BinaryAccuracy()])

In [13]:
model_bln_layer_history =  model_bln_layer.fit(train_dataset.take(number_batches_train), epochs=epochs,
                                                verbose=1, callbacks=bln_layer_cb_list,
                                                validation_data=valid_dataset.take(number_batches_valid),
                                                shuffle=True)

Train for 500 steps, validate for 50 steps
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
model_bln_layer.evaluate(test_dataset)



[0.6609152596205473, 0.77308]

In [15]:
weights_path = "./models/" + folder_name + '/'+ str(number_batches_train) +"_pretrained_weights_TTFF.h5"
model_bln_layer.save_weights(weights_path)

In [16]:
del model_bln_layer, save_bm_cb, tb_cb, bln_layer_cb, bln_layer_cb_list, model_bln_layer_history
reset_graph()

session is clear


In [17]:
save_eval_path = "./logs/" + folder_name + '/'+ str(number_batches_train) +"_sorted_evaluation.pkl"
evaluation = grid_serach(BLNLayer_model, test_dataset,
                         batch_size = minibatch, sort=True,
                         save_eval_path = save_eval_path,
                         weights_path = weights_path,
                         loss = tf.keras.losses.BinaryCrossentropy(from_logits=True),
                         metrics = tf.keras.metrics.BinaryAccuracy())

{'Bmm_True Bmv_True Fmm_True Fmv_True': [0.6582831666946412, 0.77328]}
session is clear
{'Bmm_True Bmv_True Fmm_True Fmv_True': [0.6582831666946412, 0.77328], 'Bmm_True Bmv_True Fmm_True Fmv_False': [0.6608336184233427, 0.77304]}
session is clear
{'Bmm_True Bmv_True Fmm_True Fmv_True': [0.6582831666946412, 0.77328], 'Bmm_True Bmv_True Fmm_True Fmv_False': [0.6608336184233427, 0.77304], 'Bmm_True Bmv_True Fmm_False Fmv_True': [0.6583203714460134, 0.77328]}
session is clear
{'Bmm_True Bmv_True Fmm_True Fmv_True': [0.6582831666946412, 0.77328], 'Bmm_True Bmv_True Fmm_True Fmv_False': [0.6608336184233427, 0.77304], 'Bmm_True Bmv_True Fmm_False Fmv_True': [0.6583203714460134, 0.77328], 'Bmm_True Bmv_True Fmm_False Fmv_False': [0.6609152596205473, 0.77308]}
session is clear
{'Bmm_True Bmv_True Fmm_True Fmv_True': [0.6582831666946412, 0.77328], 'Bmm_True Bmv_True Fmm_True Fmv_False': [0.6608336184233427, 0.77304], 'Bmm_True Bmv_True Fmm_False Fmv_True': [0.6583203714460134, 0.77328], 'Bmm_Tru

{'Bmm_True Bmv_True Fmm_True Fmv_True': [0.6582831666946412, 0.77328], 'Bmm_True Bmv_True Fmm_True Fmv_False': [0.6608336184233427, 0.77304], 'Bmm_True Bmv_True Fmm_False Fmv_True': [0.6583203714460134, 0.77328], 'Bmm_True Bmv_True Fmm_False Fmv_False': [0.6609152596205473, 0.77308], 'Bmm_True Bmv_False Fmm_True Fmv_True': [0.6825969819128513, 0.77044], 'Bmm_True Bmv_False Fmm_True Fmv_False': [0.6850093473628164, 0.77052], 'Bmm_True Bmv_False Fmm_False Fmv_True': [0.6826354142576456, 0.77044], 'Bmm_True Bmv_False Fmm_False Fmv_False': [0.6850927071258426, 0.77048], 'Bmm_False Bmv_True Fmm_True Fmv_True': [0.6904619035258889, 0.7738], 'Bmm_False Bmv_True Fmm_True Fmv_False': [0.6922044619083405, 0.7732], 'Bmm_False Bmv_True Fmm_False Fmv_True': [0.6904719975620508, 0.7738], 'Bmm_False Bmv_True Fmm_False Fmv_False': [0.6922299877732992, 0.77308], 'Bmm_False Bmv_False Fmm_True Fmv_True': [0.7137897999718785, 0.77124], 'Bmm_False Bmv_False Fmm_True Fmv_False': [0.7155631319105625, 0.7708]

In [18]:
evaluation

[('Bmm_True Bmv_True Fmm_True Fmv_True', [0.6582831666946412, 0.77328]),
 ('Bmm_True Bmv_True Fmm_False Fmv_True', [0.6583203714460134, 0.77328]),
 ('Bmm_True Bmv_True Fmm_True Fmv_False', [0.6608336184233427, 0.77304]),
 ('Bmm_True Bmv_True Fmm_False Fmv_False', [0.6609152596205473, 0.77308]),
 ('Bmm_True Bmv_False Fmm_True Fmv_True', [0.6825969819128513, 0.77044]),
 ('Bmm_True Bmv_False Fmm_False Fmv_True', [0.6826354142576456, 0.77044]),
 ('Bmm_True Bmv_False Fmm_True Fmv_False', [0.6850093473628164, 0.77052]),
 ('Bmm_True Bmv_False Fmm_False Fmv_False', [0.6850927071258426, 0.77048]),
 ('Bmm_False Bmv_True Fmm_True Fmv_True', [0.6904619035258889, 0.7738]),
 ('Bmm_False Bmv_True Fmm_False Fmv_True', [0.6904719975620508, 0.7738]),
 ('Bmm_False Bmv_True Fmm_True Fmv_False', [0.6922044619083405, 0.7732]),
 ('Bmm_False Bmv_True Fmm_False Fmv_False', [0.6922299877732992, 0.77308]),
 ('Bmm_False Bmv_False Fmm_True Fmv_True', [0.7137897999718785, 0.77124]),
 ('Bmm_False Bmv_False Fmm_False

# 2.Using Batch Normalization layer implemented in Keras

In [19]:
def bn_keras_model(inputshape= (80), max_features = 20000, embed_size=128, random_seed = 100,
                          lstm_unit = 128 , dense_units= 1,
                          batch_size = 60):
    

    
    input_lyr = tf.keras.Input(shape = inputshape, batch_size=batch_size, name = 'input')
    
    x = Embedding(max_features, embed_size,
                  embeddings_initializer=tf.keras.initializers.GlorotUniform(seed=random_seed))(input_lyr)
    
    x = LSTM(lstm_unit, dropout=0.2, recurrent_dropout=0.2,
            recurrent_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed),
            kernel_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed) ,
            return_sequences=True)(x)
    
    x =  tf.keras.layers.BatchNormalization(momentum = 0.99,  name = 'bn1') (x) 
    
    x = LSTM(lstm_unit//2, dropout=0.2,  recurrent_dropout=0.2,
            recurrent_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed),
            kernel_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed) ,
            return_sequences=False)(x)

    
    output_lyr = dense_layer(units = dense_units, name = 'dense1', random_seed=random_seed)(x)    
    return tf.keras.Model(inputs = [input_lyr], outputs = [output_lyr])


In [20]:
model_bn_keras = bn_keras_model(inputshape= (80), max_features = max_features, 
                                embed_size= 128, random_seed = random_seed,
                                lstm_unit = 128 , dense_units= num_classes,
                                batch_size = minibatch)
model_bn_keras.summary()

#### Compiling 
model_bn_keras.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate),
                       loss = tf.keras.losses.BinaryCrossentropy(from_logits=True),
                       metrics = [tf.keras.metrics.BinaryAccuracy()])

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(20, 80)]                0         
_________________________________________________________________
embedding (Embedding)        (20, 80, 128)             2560000   
_________________________________________________________________
lstm (LSTM)                  (20, 80, 128)             131584    
_________________________________________________________________
bn1 (BatchNormalization)     (20, 80, 128)             512       
_________________________________________________________________
lstm_1 (LSTM)                (20, 64)                  49408     
_________________________________________________________________
dense1 (dense_layer)         (20, 1)                   65        
Total params: 2,741,569
Trainable params: 2,741,313
Non-trainable params: 256
_________________________________________________

In [21]:
# Callbacks for saving best model and tensorboard
folder_name = str(number_batches_train) + '_bn_Keras_imdb'
save_bm_cb = save_best_model_callback(folder_name)
tb_cb = tensorboard_callback(folder_name)


bn_keras_cb_list = [save_bm_cb, tb_cb]

In [22]:
model_bn_keras_history =  model_bn_keras.fit(train_dataset.take(number_batches_train),
                                             epochs = epochs, verbose=1, 
                                             callbacks = bn_keras_cb_list,
                                             validation_data = valid_dataset.take(number_batches_valid),
                                             shuffle = True)

Train for 500 steps, validate for 50 steps
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [23]:
model_bn_keras.evaluate(test_dataset)



[0.8220648340150714, 0.7768]

In [24]:
del model_bn_keras, save_bm_cb, tb_cb, bn_keras_cb_list 
reset_graph()

session is clear


# 3.Using  Layer normalization  implemented in Keras

In [25]:
def ln_keras_model(inputshape= (80), max_features = 20000, embed_size=128, random_seed = 100,
                          lstm_unit = 128 , dense_units= 1,
                          batch_size = 60):
    

    
    input_lyr = tf.keras.Input(shape = inputshape, batch_size=batch_size, name = 'input')
    
    x = Embedding(max_features, embed_size,
                  embeddings_initializer=tf.keras.initializers.GlorotUniform(seed=random_seed))(input_lyr)
    
    x = LSTM(lstm_unit, dropout=0.2, recurrent_dropout=0.2,
            recurrent_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed),
            kernel_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed) ,
            return_sequences=True)(x)
    
    x =  tf.keras.layers.LayerNormalization()(x) 

    x = LSTM(lstm_unit//2, dropout=0.2,  recurrent_dropout=0.2,
            recurrent_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed),
            kernel_initializer = tf.keras.initializers.GlorotUniform(seed=random_seed) ,
            return_sequences=False)(x)

    
    output_lyr = dense_layer(units = dense_units, name = 'dense1', random_seed=random_seed)(x)    
    return tf.keras.Model(inputs = [input_lyr], outputs = [output_lyr])



In [26]:
model_ln_keras = ln_keras_model(inputshape= (80), max_features = max_features, 
                                embed_size= 128, random_seed = random_seed,
                                lstm_unit = 128 , dense_units= num_classes,
                                batch_size = minibatch)
model_ln_keras.summary()

#### Compiling 
model_ln_keras.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate),
                       loss = tf.keras.losses.BinaryCrossentropy(from_logits=True),
                       metrics = [tf.keras.metrics.BinaryAccuracy()])

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(20, 80)]                0         
_________________________________________________________________
embedding (Embedding)        (20, 80, 128)             2560000   
_________________________________________________________________
lstm (LSTM)                  (20, 80, 128)             131584    
_________________________________________________________________
layer_normalization (LayerNo (20, 80, 128)             256       
_________________________________________________________________
lstm_1 (LSTM)                (20, 64)                  49408     
_________________________________________________________________
dense1 (dense_layer)         (20, 1)                   65        
Total params: 2,741,313
Trainable params: 2,741,313
Non-trainable params: 0
___________________________________________________

In [27]:
# Callbacks for saving best model and tensorboard
folder_name = str(number_batches_train) + '_ln_Keras_imdb'
save_bm_cb = save_best_model_callback(folder_name)
tb_cb = tensorboard_callback(folder_name)

ln_keras_cb_list = [save_bm_cb, tb_cb]

In [28]:
model_ln_keras_history =  model_ln_keras.fit(train_dataset.take(number_batches_train),
                                             epochs=epochs, verbose=1, 
                                             callbacks=ln_keras_cb_list,
                                             validation_data=valid_dataset.take(number_batches_valid),
                                             shuffle=True)

Train for 500 steps, validate for 50 steps
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [29]:
model_ln_keras.evaluate(test_dataset)



[0.8189312511198222, 0.76284]

In [31]:
del model_ln_keras,save_bm_cb, ln_keras_cb_list
reset_graph()

session is clear
