In [31]:
import tensorflow as tf
import tensorboard
import numpy as np
from pathlib import Path
import keras.layers

In [None]:
# model

model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=(32, 32, 3)))
model.add(tf.keras.layers.Flatten())
for _ in range(20):
    model.add(
        tf.keras.layers.Dense(100, activation="swish", kernel_initializer="he_normal")
    )
model.add(tf.keras.layers.Dense(10, activation="softmax"))

optimizer = tf.keras.optimizers.Nadam(learning_rate=5e-5)
model.compile(
    loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]
)

In [6]:
cifar_10 = tf.keras.datasets.cifar10.load_data()
(x_train_full, y_train_full), (x_test, y_test) = cifar_10

x_train = x_train_full[5000:]
y_train = y_train_full[5000:]
x_valid = x_train_full[:5000]
y_valid = y_train_full[:5000]

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 0us/step


In [23]:
# callbacks
early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    patience=20, restore_best_weights=True
)
model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    "my_cifar_10_model.keras", save_best_only=True
)
run_index = 1  # increment everytime you train the model
run_logdir = Path() / "my_cifar10_logs" / f"run_{run_index}:03d"
tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

In [24]:
%load_ext tensorboard
%tensorboard --logdir=./my_cifar10_logs

In [25]:
# model fitting

model.fit(
    x_train,
    y_train,
    epochs=100,
    validation_data=(x_valid, y_valid),
    callbacks=callbacks,
)

Epoch 1/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.1256 - loss: 18.9208 - val_accuracy: 0.1952 - val_loss: 2.2330
Epoch 2/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.2080 - loss: 2.1659 - val_accuracy: 0.2534 - val_loss: 2.0229
Epoch 3/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.2536 - loss: 2.0305 - val_accuracy: 0.2786 - val_loss: 1.9452
Epoch 4/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.2804 - loss: 1.9398 - val_accuracy: 0.3114 - val_loss: 1.8850
Epoch 5/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.3117 - loss: 1.8857 - val_accuracy: 0.3158 - val_loss: 1.9149
Epoch 6/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.3394 - loss: 1.8182 - val_accuracy: 0.3498 - val_loss: 1.7753
Epoch 7/

<keras.src.callbacks.history.History at 0x316824bf0>

In [26]:
model.evaluate(x_valid, y_valid)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4788 - loss: 1.5196


[1.5131490230560303, 0.47380000352859497]

In [None]:
# adding batch normalization

model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=(32, 32, 3)))
model.add(tf.keras.layers.Flatten())
for _ in range(20):
    model.add(tf.keras.layers.Dense(100, kernel_initializer="he_normal"))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation("swish"))

model.add(tf.keras.layers.Dense(10, activation="softmax"))

optimizer = tf.keras.optimizers.Nadam(learning_rate=5e-4)
model.compile(
    loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]
)

# callbacks

early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    patience=10, restore_best_weights=True
)
run_index = 1  # increment every time the model is run
run_logdir = Path() / "my_cifar10_logs_bn" / f"run_{run_index:03d}"
tensorboard_cb_bn = tf.keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb_bn]

model.fit(
    x_train,
    y_train,
    epochs=100,
    validation_data=(x_valid, y_valid),
    callbacks=callbacks,
)

model.evaluate(x_valid, y_valid)

Epoch 1/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 7ms/step - accuracy: 0.1979 - loss: 2.1950 - val_accuracy: 0.3038 - val_loss: 1.9209
Epoch 2/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.3553 - loss: 1.7968 - val_accuracy: 0.3276 - val_loss: 1.8665
Epoch 3/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.3990 - loss: 1.6839 - val_accuracy: 0.3404 - val_loss: 1.9136
Epoch 4/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.4212 - loss: 1.6184 - val_accuracy: 0.3880 - val_loss: 1.7043
Epoch 5/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.4427 - loss: 1.5656 - val_accuracy: 0.3902 - val_loss: 1.7118
Epoch 6/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.4626 - loss: 1.5138 - val_accuracy: 0.3938 - val_loss: 1.6807
Epoc

[1.434305191040039, 0.5005999803543091]

In [29]:
# bn with selu and le cun initializer

model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=(32, 32, 3)))
model.add(tf.keras.layers.Flatten())
for _ in range(20):
    model.add(
        tf.keras.layers.Dense(100, kernel_initializer="lecun_normal", activation="selu")
    )

model.add(tf.keras.layers.Dense(10, activation="softmax"))

optimizer = tf.keras.optimizers.Nadam(learning_rate=7e-4)
model.compile(
    loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]
)

model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    filepath="my_cifar10_selu_model.keras", save_best_only=True
)

# callbacks

early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    patience=20, restore_best_weights=True
)

run_index = 1  # increment every time the model is run
run_logdir = Path() / "my_cifar10_logs" / f"run_selu{run_index:03d}"
tensorboard_cb_bn = tf.keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb_bn]


x_means = x_train.mean(axis=0)
x_stds = x_train.std(axis=0)
x_train_scaled = (x_train - x_means) / x_stds
x_valid_scaled = (x_valid - x_means) / x_stds
x_test_scaled = (x_test - x_means) / x_stds


model.fit(
    x_train_scaled,
    y_train,
    epochs=100,
    validation_data=(x_valid_scaled, y_valid),
    callbacks=callbacks,
)

model.evaluate(x_valid_scaled, y_valid)

Epoch 1/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.2764 - loss: 2.0251 - val_accuracy: 0.3864 - val_loss: 1.7309
Epoch 2/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.3910 - loss: 1.7166 - val_accuracy: 0.4072 - val_loss: 1.6704
Epoch 3/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.4296 - loss: 1.6241 - val_accuracy: 0.4288 - val_loss: 1.6412
Epoch 4/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.4579 - loss: 1.5462 - val_accuracy: 0.4354 - val_loss: 1.6265
Epoch 5/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.4774 - loss: 1.4901 - val_accuracy: 0.4556 - val_loss: 1.5571
Epoch 6/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.4841 - loss: 1.4629 - val_accuracy: 0.4486 - val_loss: 1.5862
Epoch 7/1

[1.4850661754608154, 0.49639999866485596]

In [32]:
# adding dropout in layers using mc dropout
tf.random.set_seed(42)

model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=(32, 32, 3)))
model.add(tf.keras.layers.Flatten())
for _ in range(20):
    model.add(
        tf.keras.layers.Dense(100, kernel_initializer="lecun_normal", activation="selu")
    )

model.add(tf.keras.layers.AlphaDropout(rate=0.1))
model.add(tf.keras.layers.Dense(10, activation="softmax"))

optimizer = tf.keras.optimizers.Nadam(learning_rate=5e-4)
model.compile(
    loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]
)

model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    filepath="my_cifar10_alpha_dropout_model.keras", save_best_only=True
)

# callbacks

early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    patience=20, restore_best_weights=True
)

run_index = 2  # increment every time the model is run
run_logdir = Path() / "my_cifar10_logs" / f"run_alpha_dropout_{run_index:03d}"
tensorboard_cb_bn = tf.keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb_bn]


x_means = x_train.mean(axis=0)
x_stds = x_train.std(axis=0)
x_train_scaled = (x_train - x_means) / x_stds
x_valid_scaled = (x_valid - x_means) / x_stds
x_test_scaled = (x_test - x_means) / x_stds


model.fit(
    x_train_scaled,
    y_train,
    epochs=100,
    validation_data=(x_valid_scaled, y_valid),
    callbacks=callbacks,
)

model.evaluate(x_valid_scaled, y_valid)

Epoch 1/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.2784 - loss: 2.0520 - val_accuracy: 0.3952 - val_loss: 1.7297
Epoch 2/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.3967 - loss: 1.7029 - val_accuracy: 0.4302 - val_loss: 1.6768
Epoch 3/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.4396 - loss: 1.5975 - val_accuracy: 0.4548 - val_loss: 1.5967
Epoch 4/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.4658 - loss: 1.5258 - val_accuracy: 0.4576 - val_loss: 1.6492
Epoch 5/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.4866 - loss: 1.4720 - val_accuracy: 0.4728 - val_loss: 1.6005
Epoch 6/100
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.5073 - loss: 1.4186 - val_accuracy: 0.4662 - val_loss: 1.6430
Epoch 7/1

[1.5756477117538452, 0.49300000071525574]

In [34]:
# using mc dropout


class MCAlphaDropout(tf.keras.layers.AlphaDropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

In [35]:
mc_model = tf.keras.Sequential(
    [
        (
            MCAlphaDropout(layer.rate)
            if isinstance(layer, tf.keras.layers.AlphaDropout)
            else layer
        )
        for layer in model.layers
    ]
)

In [38]:
def mc_dropout_predict_probas(mc_model, x, n_samples=10):
    y_probas = [mc_model.predict(x) for sample in range(n_samples)]
    return np.mean(y_probas, axis=0)


def mc_dropout_predict_classes(mc_model, x, n_samples=10):
    y_probas = mc_dropout_predict_probas(mc_model, x, n_samples)
    return y_probas.argmax(axis=1)

In [39]:
tf.random.set_seed(42)

y_preds = mc_dropout_predict_classes(mc_model, x_valid_scaled)
accuracy = (y_preds == y_valid[:, 0]).mean()
accuracy

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


0.4916

In [None]:
# retaining a model using 1cycle scheduling

tf.random.set_seed(42)

model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=(32, 32, 3)))
model.add(tf.keras.layers.Flatten())

for _ in range(20):
    model.add(
        tf.keras.layers.Dense(100, kernel_initializer="lecun_normal", activation="selu")
    )

model.add(keras.layers.AlphaDropout(rate=0.1))
model.add(tf.keras.layers.Dense(10, activation="softmax"))

optimizer = tf.keras.optimizers.SGD()
model.compile(
    loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]
)