In [1]:
import tensorflow as tf
from tensorflow import keras

print(tf.__version__)

2.10.0


## *Exercise 8.* Deep Learning on CIFAR10 dataset

### a. Build a DNN with 20 hidden layers of 100 neurons each (that's too many, but it's the point of this exercise). Use He initialization and the ELU activation function.

In [2]:
model = keras.models.Sequential()

model.add(keras.layers.Flatten(input_shape=(32, 32, 3)))

for _ in range(20):
    model.add(keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"))

### b. Using Nadam optimization and early stopping, train the network on the CIFAR10 dataset. You can load it with `keras.datasets.cifar10.load_data()`. The dataset is composed of 60,000 32 × 32–pixel color images (50,000 for training, 10,000 for testing) with 10 classes, so you'll need a softmax output layer with 10 neurons. Remember to search for the right learning rate each time you change the model's architecture or hyperparameters.

In [3]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

assert x_train.shape == (50000, 32, 32, 3)
assert x_test.shape == (10000, 32, 32, 3)
assert y_train.shape == (50000, 1)
assert y_test.shape == (10000, 1)

In [4]:
model.add(keras.layers.Dense(10, activation="softmax"))

In [5]:
optimizer = keras.optimizers.Nadam(learning_rate=1e-2)  # 3e-5, 1e-4, 3e-4, 1e-3, 3e-3 and 1e-2
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=['accuracy'])

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)
X_test = x_test

In [7]:
import os

early_stop_cb = keras.callbacks.EarlyStopping(patience=20)
checkpoint_cb = keras.callbacks.ModelCheckpoint("model.h5", save_best_only=True)
run_index = 5  # Increment with each run
run_logdir = os.path.join(os.curdir, "logdir", "run_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
bn_callbacks = [early_stop_cb, checkpoint_cb, tensorboard_cb]

In [8]:
model.fit(X_train, y_train, epochs=100,
          validation_data=(X_val, y_val),
          callbacks=bn_callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100


<keras.callbacks.History at 0x17024b84e50>

In [9]:
model = keras.models.load_model("model.h5")
model.evaluate(X_test, y_test)



[1.8668041229248047, 0.2971999943256378]

### c. Now try adding Batch Normalization and compare the learning curves: Is it converging faster than before? Does it produce a better model? How does it affect training speed?

In [14]:
bn_model = keras.models.Sequential()

bn_model.add(keras.layers.Flatten(input_shape=(32, 32, 3)))
bn_model.add(keras.layers.BatchNormalization())
for _ in range(20):
    bn_model.add(keras.layers.Dense(100, kernel_initializer="he_normal"))
    bn_model.add(keras.layers.BatchNormalization())
    bn_model.add(keras.layers.Activation(activation="elu"))
bn_model.add(keras.layers.Dense(10, activation="softmax"))

bn_model.compile(loss="sparse_categorical_crossentropy",
                 optimizer=keras.optimizers.Nadam(learning_rate=5e-4),
                 metrics=["accuracy"])

bn_early_stop_cb = keras.callbacks.EarlyStopping(patience=10)
bn_checkpoint_cb = keras.callbacks.ModelCheckpoint("bn_model.h5", save_best_only=True)
bn_run_index = 1  # Increment with each run
bn_run_logdir = os.path.join(os.curdir, "logdir", "run_bn_{:03d}".format(bn_run_index))
bn_tensorboard_cb = keras.callbacks.TensorBoard(bn_run_logdir)
bn_callbacks = [bn_early_stop_cb, bn_checkpoint_cb, bn_tensorboard_cb]

bn_model.fit(X_train, y_train,
             epochs=100,
             validation_data=(X_val, y_val),
             callbacks=bn_callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


<keras.callbacks.History at 0x1704a453130>

In [16]:
bn_model = keras.models.load_model("bn_model.h5")
bn_model.evaluate(X_test, y_test)



[1.3469046354293823, 0.5245000123977661]

### d. Try replacing Batch Normalization with SELU, and make the necessary adjustements to ensure the network self-normalizes (i.e., standardize the input features, use LeCun normal initialization, make sure the DNN contains only a sequence of dense layers, etc.)

In [17]:
selu_model = keras.models.Sequential()
selu_model.add(keras.layers.Flatten(input_shape=(32, 32, 3)))

for _ in range(20):
    selu_model.add(keras.layers.Dense(100,
                                      kernel_initializer="lecun_normal",
                                      activation="selu"))
selu_model.add(keras.layers.Dense(10, activation="softmax"))

optimizer = keras.optimizers.Nadam(learning_rate=7e-4)
selu_model.compile(loss="sparse_categorical_crossentropy",
                   optimizer=optimizer,
                   metrics=["accuracy"])

selu_early_stopping_cb = keras.callbacks.EarlyStopping(patience=10)
selu_model_checkpoint_cb = keras.callbacks.ModelCheckpoint("selu_model.h5", save_best_only=True)
selu_run_index = 1  # increment every time you train the model
selu_run_logdir = os.path.join(os.curdir, "logdir", "run_selu_{:03d}".format(selu_run_index))
selu_tensorboard_cb = keras.callbacks.TensorBoard(selu_run_logdir)
selu_callbacks = [selu_early_stopping_cb, selu_model_checkpoint_cb, selu_tensorboard_cb]

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_val_scaled = (X_val - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

selu_model.fit(X_train_scaled, y_train,
               epochs=100,
               validation_data=(X_val_scaled, y_val),
               callbacks=selu_callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100


<keras.callbacks.History at 0x170495ca550>

In [18]:
selu_model = keras.models.load_model("selu_model.h5")
selu_model.evaluate(X_test_scaled, y_test)



[1.485149621963501, 0.48420000076293945]

### e. Try regularizing the model with alpha dropout. Then, without retraining your model, see if you can achieve better accuracy using MC Dropout.

In [19]:
alpha_dropout_model = keras.models.Sequential()
alpha_dropout_model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    alpha_dropout_model.add(keras.layers.Dense(100,
                                               kernel_initializer="lecun_normal",
                                               activation="selu"))

alpha_dropout_model.add(keras.layers.AlphaDropout(rate=0.1))
alpha_dropout_model.add(keras.layers.Dense(10, activation="softmax"))

optimizer = keras.optimizers.Nadam(learning_rate=5e-4)
alpha_dropout_model.compile(loss="sparse_categorical_crossentropy",
                            optimizer=optimizer,
                            metrics=["accuracy"])

alpha_dropout_early_stopping_cb = keras.callbacks.EarlyStopping(patience=10)
alpha_dropout_model_checkpoint_cb = keras.callbacks.ModelCheckpoint("alpha_dropout_model.h5", save_best_only=True)
alpha_dropout_run_index = 1  # increment every time you train the model
alpha_dropout_run_logdir = os.path.join(os.curdir, "logdir", "run_alpha_dropout_{:03d}".format(alpha_dropout_run_index))
alpha_dropout_tensorboard_cb = keras.callbacks.TensorBoard(alpha_dropout_run_logdir)
alpha_dropout_callbacks = [alpha_dropout_early_stopping_cb, alpha_dropout_model_checkpoint_cb,
                           alpha_dropout_tensorboard_cb]

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_val_scaled = (X_val - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

alpha_dropout_model.fit(X_train_scaled, y_train, epochs=100,
                        validation_data=(X_val_scaled, y_val),
                        callbacks=alpha_dropout_callbacks)

alpha_dropout_model = keras.models.load_model("alpha_dropout_model.h5")
alpha_dropout_model.evaluate(X_test_scaled, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100


[1.4961271286010742, 0.4977000057697296]

In [20]:
class MCAlphaDropout(keras.layers.AlphaDropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

mc_model = keras.models.Sequential([
    MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer
    for layer in model.layers
])

In [21]:
import numpy as np

def mc_dropout_predict_probas(mc_model, X, n_samples=10):
    Y_probas = [mc_model.predict(X) for _ in range(n_samples)]
    return np.mean(Y_probas, axis=0)

def mc_dropout_predict_classes(mc_model, X, n_samples=10):
    Y_probas = mc_dropout_predict_probas(mc_model, X, n_samples)
    return np.argmax(Y_probas, axis=1)

In [23]:
y_pred = mc_dropout_predict_classes(mc_model, X_val_scaled)
accuracy = np.mean(y_pred == y_val[:, 0])
accuracy



0.1015