# Exercises: CIFAR10 training

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

## Model

In [22]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)


model = keras.models.Sequential()

model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model.add(
        keras.layers.Dense(
            units=100,
            activation='elu',
            kernel_initializer='he_normal'
        )
    )
model.add(keras.layers.Dense(10, activation='softmax'))

## Data

In [23]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()
X_train_full.shape, y_train_full.shape

((50000, 32, 32, 3), (50000, 1))

## Train

In [24]:
from sklearn.model_selection import train_test_split

X_train_full = X_train_full / 255.0
X_test = X_test / 255.0

X_train = X_train_full[5000:]
y_train = y_train_full[5000:]
X_valid = X_train_full[:5000]
y_valid = y_train_full[:5000]

In [25]:
def get_logs_dir():
    now = time.strftime('%d_%m_%Y-%H_%M_%H')
    return os.path.join('./logs', now)

gpu_name = tf.test.gpu_device_name()
logs_dir = get_logs_dir()
tensorboard_cb = keras.callbacks.TensorBoard(logs_dir)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)

with tf.device(gpu_name):
    optimizer = keras.optimizers.Nadam(lr=5e-5)
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer = optimizer,
        metrics='accuracy'
    )

    history_1 = model.fit(
        X_train,
        y_train,
        epochs=1000,
        validation_data=(X_valid, y_valid),
        callbacks=[
            tensorboard_cb,
            early_stopping_cb
        ]
    )

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000


In [28]:
model.save('model_1.h5')
model.evaluate(X_valid, y_valid)



[1.55025053024292, 0.5077999830245972]

## Train with batch normalization

In [32]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
logs_dir = get_logs_dir()

# Model
model_bn = keras.models.Sequential()

model_bn.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
model_bn.add(keras.layers.BatchNormalization())
for _ in range(20):
    model_bn.add(
        keras.layers.Dense(
            units=100,
            kernel_initializer='he_normal'
        )
    )
    model_bn.add(
        keras.layers.BatchNormalization()
    )
    model_bn.add(
        keras.layers.Activation('elu')
    )
model_bn.add(keras.layers.Dense(10, activation='softmax'))

# Training
tensorboard_cb = keras.callbacks.TensorBoard(logs_dir + '_bn_1')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("bn_model.h5", save_best_only=True)

with tf.device(gpu_name):
    optimizer = keras.optimizers.Nadam(lr=5e-4)
    model_bn.compile(
        loss='sparse_categorical_crossentropy',
        optimizer = optimizer,
        metrics='accuracy'
    )

    history_bn = model_bn.fit(
        X_train,
        y_train,
        epochs=1000,
        validation_data=(X_valid, y_valid),
        callbacks=[
            early_stopping_cb,
            model_checkpoint_cb,
            tensorboard_cb,
        ]
    )

model_bn.save('bn_model.h5')
print(model_bn.evaluate(X_valid, y_valid))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
[1.3839503526687622, 0.5365999937057495]


## Train self-normalized network

In [35]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
logs_dir = get_logs_dir()

# Model
model_snn = keras.models.Sequential()

model_snn.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model_snn.add(
        keras.layers.Dense(
            units=100,
            kernel_initializer='lecun_normal',
            activation='selu'
        )
    )
model_snn.add(keras.layers.Dense(10, activation='softmax'))

# Training
tensorboard_cb = keras.callbacks.TensorBoard(logs_dir + '_snn_2')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("snn_model.h5", save_best_only=True)

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

with tf.device(gpu_name):
    optimizer = keras.optimizers.Nadam(lr=7e-4)
    model_snn.compile(
        loss='sparse_categorical_crossentropy',
        optimizer = optimizer,
        metrics='accuracy'
    )

    history_snn = model_snn.fit(
        X_train_scaled,
        y_train,
        epochs=1000,
        validation_data=(X_valid_scaled, y_valid),
        callbacks=[
            early_stopping_cb,
            model_checkpoint_cb,
            tensorboard_cb,
        ]
    )

model_snn.save('snn_model.h5')
print(model_snn.evaluate(X_valid_scaled, y_valid))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
[1.566311240196228, 0.492000013589859]


In [38]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
logs_dir = get_logs_dir()

# Model
model_snn_da = keras.models.Sequential()

model_snn_da.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model_snn_da.add(
        keras.layers.Dense(
            units=100,
            kernel_initializer='lecun_normal',
            activation='selu'
        )
    )
model_snn_da.add(keras.layers.AlphaDropout(rate=0.1))
model_snn_da.add(keras.layers.Dense(10, activation='softmax'))

# Training
tensorboard_cb = keras.callbacks.TensorBoard(logs_dir + '_snn_da_1')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("snn_da_model.h5", save_best_only=True)

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

with tf.device(gpu_name):
    optimizer = keras.optimizers.Nadam(lr=5e-4)
    model_snn_da.compile(
        loss='sparse_categorical_crossentropy',
        optimizer = optimizer,
        metrics='accuracy'
    )

    history_snn_da = model_snn_da.fit(
        X_train_scaled,
        y_train,
        epochs=1000,
        validation_data=(X_valid_scaled, y_valid),
        callbacks=[
            early_stopping_cb,
            model_checkpoint_cb,
            tensorboard_cb,
        ]
    )

model_snn_da.save('snn_da_model.h5')
print(model_snn_da.evaluate(X_valid_scaled, y_valid))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
[1.8539845943450928, 0.5004000067710876]


In [39]:
class MCAlphaDropout(keras.layers.AlphaDropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

model_snn_mcda = keras.models.Sequential([
    MCAlphaDropout(layer.rate)
    if isinstance(layer, keras.layers.AlphaDropout)
    else layer
    for layer in model_snn_da.layers
])

In [44]:
def mc_dropout_predict_probas(mc_model, X, n_samples=10):
    predictions = [mc_model.predict(X) for _ in range(n_samples)]
    return np.mean(predictions, axis=0)

def mc_dropout_predict_classes(mc_model, X, n_samples=10):
    probas = mc_dropout_predict_probas(mc_model, X, n_samples)
    return np.argmax(probas, axis=1)

In [45]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

y_pred = mc_dropout_predict_classes(model_snn_mcda, X_valid_scaled)
accuracy = np.mean(y_pred == y_valid[:, 0])
accuracy

0.5006