In [2]:
from keras.datasets import imdb
import matplotlib.pyplot as plt
import numpy as np
import keras

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


# 0) Data preparation

In [4]:
def data_prepare(sequences):
    res = np.zeros(shape=(len(sequences), 10001), dtype=int)
    for i, sequence in enumerate(sequences):
        res[i, np.array(sequence)] = 1
    return res[:, 1:].copy()

(X_train_raw, y_train), (X_test_raw, y_test) = imdb.load_data(
    num_words = 10001,
    start_char = 0,
    oov_char = 0,
    index_from = 0
)

X_test = data_prepare(X_test_raw)
X_train = data_prepare(X_train_raw)

print("X_test.shape =", X_test.shape)
print("X_train.shape =", X_train.shape)

X_test.shape = (25000, 10000)
X_train.shape = (25000, 10000)


# 1) Base Model (from lab 6) 

### Define model

In [5]:
model1 = keras.Sequential()

layer1 = keras.layers.Dense(units=100, activation='relu')
layer2 = keras.layers.Dense(units=50, activation='relu')
layer3 = keras.layers.Dense(units=10, activation='relu')
output_layer = keras.layers.Dense(units=1, activation='sigmoid')

model1.add(layer1)
model1.add(layer2)
model1.add(layer3)
model1.add(output_layer)

### Compile Model

In [6]:
model1.compile(
    optimizer='rmsprop',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

### Fit model & plot result

In [7]:
history1 = model1.fit(X_train, y_train, epochs=8, validation_data=(X_test, y_test))

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


# 2) Model in early stopping

In [8]:
model2 = keras.Sequential()

layer1 = keras.layers.Dense(units=100, activation='relu')
layer2 = keras.layers.Dense(units=50, activation='relu')
layer3 = keras.layers.Dense(units=10, activation='relu')
output_layer = keras.layers.Dense(units=1, activation='sigmoid')

model2.add(layer1)
model2.add(layer2)
model2.add(layer3)
model2.add(output_layer)

model2.compile(
    optimizer='rmsprop',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_cb = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=2,
    restore_best_weights=True
)

history2 = model2.fit(X_train, y_train, epochs=8, validation_data=(X_test, y_test), callbacks=[early_stop_cb])

Epoch 1/8
Epoch 2/8
Epoch 3/8


# 3) Model in early stopping
# and batch normalization

## For batch size of 32

In [14]:
model3 = keras.Sequential()

layer1 = keras.layers.Dense(units=100, activation='relu')
layer2 = keras.layers.BatchNormalization(epsilon=0.001)
layer3 = keras.layers.Dense(units=50, activation='relu')
layer4 = keras.layers.Dense(units=10, activation='relu')
output_layer = keras.layers.Dense(units=1, activation='sigmoid')

model3.add(layer1)
model3.add(layer2)
model3.add(layer3)
model3.add(layer4)
model3.add(output_layer)

model3.compile(
    optimizer='rmsprop',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_cb2 = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=2,
    restore_best_weights=True
)

history3 = model3.fit(X_train, y_train, epochs=8, validation_data=(X_test, y_test), callbacks=[early_stop_cb2])

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8


## Batch size of 16

In [15]:
model4 = keras.Sequential()

layer1 = keras.layers.Dense(units=100, activation='relu')
layer2 = keras.layers.BatchNormalization(epsilon=0.001)
layer3 = keras.layers.Dense(units=50, activation='relu')
layer4 = keras.layers.Dense(units=10, activation='relu')
output_layer = keras.layers.Dense(units=1, activation='sigmoid')

model4.add(layer1)
model4.add(layer2)
model4.add(layer3)
model4.add(layer4)
model4.add(output_layer)

model4.compile(
    optimizer='rmsprop',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_cb3 = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=2,
    restore_best_weights=True
)

history4 = model4.fit(X_train, y_train, batch_size=16, epochs=8, validation_data=(X_test, y_test), callbacks=[early_stop_cb3])

Epoch 1/8
Epoch 2/8
Epoch 3/8


# 4) Model in early stopping 
# and batch normalization of all hidden layers in the network

In [16]:
model5 = keras.Sequential()

layer1 = keras.layers.Dense(units=100, activation='relu')
layer2 = keras.layers.BatchNormalization(epsilon=0.001)
layer3 = keras.layers.Dense(units=50, activation='relu')
layer4 = keras.layers.BatchNormalization(epsilon=0.001)
layer5 = keras.layers.Dense(units=10, activation='relu')
layer6 = keras.layers.BatchNormalization(epsilon=0.001)
output_layer = keras.layers.Dense(units=1, activation='sigmoid')

model5.add(layer1)
model5.add(layer2)
model5.add(layer3)
model5.add(layer4)
model5.add(layer5)
model5.add(layer6)
model5.add(output_layer)

model5.compile(
    optimizer='rmsprop',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_cb4 = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=2,
    restore_best_weights=True
)

history5 = model5.fit(X_train, y_train, epochs=8, validation_data=(X_test, y_test), callbacks=[early_stop_cb4])

Epoch 1/8
Epoch 2/8
Epoch 3/8


# 5) Model in early stopping 
# and batch normalization before RLU activation of each hidden layer

In [17]:
model6 = keras.Sequential()

layer1 = keras.layers.Dense(units=100)
layer2 = keras.layers.BatchNormalization(epsilon=0.001)
layer3 = keras.layers.ReLU()
layer4 = keras.layers.Dense(units=50)
layer5 = keras.layers.BatchNormalization(epsilon=0.001)
layer6 = keras.layers.ReLU()
layer7 = keras.layers.Dense(units=10)
layer8 = keras.layers.BatchNormalization(epsilon=0.001)
layer9 = keras.layers.ReLU()
output_layer = keras.layers.Dense(units=1, activation='sigmoid')

model6.add(layer1)
model6.add(layer2)
model6.add(layer3)
model6.add(layer4)
model6.add(layer5)
model6.add(layer6)
model6.add(layer7)
model6.add(layer8)
model6.add(layer9)
model6.add(output_layer)

model6.compile(
    optimizer='rmsprop',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_cb5 = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=2,
    restore_best_weights=True
)

history6 = model6.fit(X_train, y_train, epochs=8,validation_data=(X_test, y_test), callbacks=[early_stop_cb5])

Epoch 1/8
Epoch 2/8
Epoch 3/8


# 6) Summary

In [18]:
_, trainAcc1 = model1.evaluate(X_train, y_train, verbose=0)
_, testAcc1 = model1.evaluate(X_test, y_test, verbose=0)
print('Base Model (from lab 6):')
print('Train:', trainAcc1,'\tTest:',testAcc1,'\n')

_, trainAcc2 = model2.evaluate(X_train, y_train, verbose=0)
_, testAcc2 = model2.evaluate(X_test, y_test, verbose=0)
print('Model in early stopping:')
print('Train:', trainAcc2,'\tTest:',testAcc2,'\n')


print('Model in early stopping and batch normalization:')
_, trainAcc3 = model3.evaluate(X_train, y_train, verbose=0)
_, testAcc3 = model3.evaluate(X_test, y_test, verbose=0)
print('Batch size of 32:\nTrain:', trainAcc3,'\tTest:',testAcc3)
_, trainAcc4 = model4.evaluate(X_train, y_train, verbose=0)
_, testAcc4 = model4.evaluate(X_test, y_test, verbose=0)
print('Batch size of 16:\nTrain:', trainAcc4,'\tTest:',testAcc4,'\n')


_, trainAcc5 = model5.evaluate(X_train, y_train, verbose=0)
_, testAcc5 = model5.evaluate(X_test, y_test, verbose=0)
print('Model in early stopping and batch normalization of all hidden layers in the network:')
print('Train:', trainAcc5,'\tTest:',testAcc5,'\n')


_, trainAcc6 = model6.evaluate(X_train, y_train, verbose=0)
_, testAcc6 = model6.evaluate(X_test, y_test, verbose=0)
print('Model in early stopping and batch normalization before RLU activation of each hidden layer:')
print('Train:', trainAcc6,'\tTest:',testAcc6,'\n')

Base Model (from lab 6):
Train: 0.9976000189781189 	Test: 0.8645200133323669 

Model in early stopping:
Train: 0.9330400228500366 	Test: 0.8841599822044373 

Model in early stopping and batch normalization:
Batch size of 32:
Train: 0.9693599939346313 	Test: 0.8728399872779846
Batch size of 16:
Train: 0.9323199987411499 	Test: 0.8796799778938293 

Model in early stopping and batch normalization of all hidden layers in the network:
Train: 0.9373599886894226 	Test: 0.8805599808692932 

Model in early stopping and batch normalization before RLU activation of each hidden layer:
Train: 0.937279999256134 	Test: 0.8727200031280518 



The basic model achieved best for training data and lowest for test data relative to the rest. \
All other models, thanks to the use of early stopping, allowed for faster model creation, because they did not need to use all epochs, usually 3 epochs were enough.Additionally, the performance of the model for test data has increased. \
The use of batch normalization and use it for all hidden layer befoe or after activation does not cause significant differences in effectiveness for both training and testing data.  \
One can only observe that for smaller amounts of batch size (like 16) the results are worse.