In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras

(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train_full = x_train_full / 255.0
x_test = x_test / 255.0
x_valid, x_train = x_train_full[:5000], x_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

pixel_means = x_train.mean(axis=0, keepdims=True)
pixel_stds = x_train.std(axis=0, keepdims=True)
x_train_scaled = (x_train - pixel_means) / pixel_stds
x_valid_scaled = (x_valid - pixel_means) / pixel_stds
x_test_scaled = (x_test - pixel_means) / pixel_stds

$\ell_1$  and  $\ell_2$ Regularization

In [2]:
# l1 regularization
layer = keras.layers.Dense(100, activation='elu', 
                          kernel_initializer='he_normal', 
                          kernel_regularizer=keras.regularizers.l1(0.1))

# l2 regularization
layer = keras.layers.Dense(100, activation='elu', 
                          kernel_initializer='he_normal', 
                          kernel_regularizer=keras.regularizers.l2(0.01))

# l1 and l2 rgularization
layer = keras.layers.Dense(100, activation='elu', 
                          kernel_initializer='he_normal', 
                          kernel_regularizer=keras.regularizers.l1_l2(l1=0.1, l2=0.01))

In [3]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation='elu', 
                       kernel_initializer='he_normal', 
                       kernel_regularizer=keras.regularizers.l2(0.01)),
    keras.layers.Dense(100, activation='elu', 
                       kernel_initializer='he_normal', 
                       kernel_regularizer=keras.regularizers.l2(0.01)),
    keras.layers.Dense(10, activation='softmax', 
                       kernel_regularizer=keras.regularizers.l2(0.01))    
])

model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='nadam', metrics=['acc'])
n_epochs = 2
history = model.fit(x_train_scaled, y_train, epochs=n_epochs, 
                    validation_data=(x_valid_scaled, y_valid))

Epoch 1/2
Epoch 2/2


In [4]:
from functools import partial

RegularizedDense = partial(keras.layers.Dense, activation='elu', 
                           kernel_initializer='he_normal', 
                           kernel_regularizer=keras.regularizers.l2(0.01))

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    RegularizedDense(300),
    RegularizedDense(100),
    RegularizedDense(10, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='nadam', metrics=['acc'])
n_epochs = 2
history = model.fit(x_train_scaled, y_train, epochs=n_epochs, 
                    validation_data=(x_valid_scaled, y_valid))

Epoch 1/2
Epoch 2/2


Dropout

In [5]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(300, activation='elu', 
                       kernel_initializer='he_normal'),
    keras.layers.Dropout(rate=0.2), 
    keras.layers.Dense(100, activation='elu', 
                       kernel_initializer='he_normal'),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(10, activation='softmax')    
])

model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='nadam', metrics=['acc'])
n_epochs = 2
history = model.fit(x_train_scaled, y_train, epochs=n_epochs, 
                    validation_data=(x_valid_scaled, y_valid))

Epoch 1/2
Epoch 2/2


Alpha Dropout

In [6]:
tf.random.set_seed(42)
np.random.seed(42)

In [7]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.AlphaDropout(rate=0.2),
    keras.layers.Dense(300, activation='selu', 
                       kernel_initializer='lecun_normal'),
    keras.layers.AlphaDropout(rate=0.2), 
    keras.layers.Dense(100, activation='selu', 
                       kernel_initializer='lecun_normal'),
    keras.layers.AlphaDropout(rate=0.2),
    keras.layers.Dense(10, activation='softmax')    
])

optimizer = keras.optimizers.SGD(learning_rate=0.01, 
                                 momentum=0.9, 
                                 nesterov=True)
model.compile(loss='sparse_categorical_crossentropy', 
              optimizer=optimizer, metrics=['acc'])
n_epochs = 20
history = model.fit(x_train_scaled, y_train, epochs=n_epochs, 
                    validation_data=(x_valid_scaled, y_valid))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [8]:
model.evaluate(x_test_scaled, y_test)



[0.48730704188346863, 0.8565999865531921]

In [9]:
model.evaluate(x_train_scaled, y_train)



[0.36193376779556274, 0.8800545334815979]

In [10]:
history = model.fit(x_train_scaled, y_train)



MC Dropout

In [11]:
tf.random.set_seed(42)
np.random.seed(42)

In [12]:
y_probas = np.stack([model(x_test_scaled, training=True) 
                     for sample in range(100)])
y_proba = y_probas.mean(axis=0)
y_std = y_probas.std(axis=0)

In [13]:
np.round(model.predict(x_test_scaled[:1]), 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.99]],
      dtype=float32)

In [14]:
np.round(y_probas[:, :1], 2)

array([[[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.57, 0.  , 0.42]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.96, 0.  , 0.03]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.03, 0.  , 0.  , 0.  , 0.97]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.2 , 0.  , 0.79]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.6 , 0.  , 0.39]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.49, 0.  , 0.51]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.08, 0.  , 0.49, 0.  , 0.43]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.08, 0.  , 0.24, 0.  , 0.68]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.24, 0.  , 0.04, 0.  , 0.71]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.99]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.02, 0.  , 0.1 , 0.  , 0.88]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.1 , 0.  , 0.15, 0.  , 0.75]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.27, 0.  , 0.72]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.17, 0.  , 0.13, 0.  , 0

In [15]:
np.round(y_proba[:1], 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.09, 0.  , 0.2 , 0.  , 0.7 ]],
      dtype=float32)

In [16]:
np.round(y_std[:1], 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.17, 0.01, 0.22, 0.  , 0.28]],
      dtype=float32)

In [17]:
y_pred = np.argmax(y_proba, axis=1)

In [18]:
accuracy = np.sum(y_pred == y_test) / len(y_test)
accuracy

0.8647

In [19]:
class MCDropout(keras.layers.Dropout):
    def call(self, inputs):
        return super().call(inputs, training=True)
    
class MCAlphaDropout(keras.layers.AlphaDropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

In [20]:
tf.random.set_seed(42)
np.random.seed(42)

In [21]:
mc_model = keras.models.Sequential([
    MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer
    for layer in model.layers
])

mc_model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_3 (Flatten)          (None, 784)               0         
_________________________________________________________________
mc_alpha_dropout (MCAlphaDro (None, 784)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 300)               235500    
_________________________________________________________________
mc_alpha_dropout_1 (MCAlphaD (None, 300)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 100)               30100     
_________________________________________________________________
mc_alpha_dropout_2 (MCAlphaD (None, 100)               0         
_________________________________________________________________
dense_14 (Dense)             (None, 10)               

In [22]:
optimizer = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
mc_model.compile(loss='sparse_categorical_crossentropy', 
                 optimizer=optimizer, 
                 metrics=['acc'])

In [23]:
mc_model.set_weights(model.get_weights())

In [24]:
np.round(np.mean([mc_model.predict(x_test_scaled[:1]) for sample in range(100)], axis=0), 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.13, 0.  , 0.23, 0.  , 0.65]],
      dtype=float32)

Max norm

In [25]:
layer = keras.layers.Dense(100, activation='selu', 
                           kernel_initializer='lecun_normal', 
                           kernel_constraint=keras.constraints.max_norm(1.))

In [26]:
MaxNormDense = partial(keras.layers.Dense, 
                       activation='selu', 
                       kernel_initializer='lecun_normal', 
                       kernel_constraint=keras.constraints.max_norm(1.))

In [27]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    MaxNormDense(300),
    MaxNormDense(100),
    keras.layers.Dense(10, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', 
                 optimizer='nadam', 
                 metrics=['acc'])

n_epochs = 2
history = model.fit(x_train_scaled, y_train, epochs=n_epochs, 
                    validation_data=(x_valid_scaled, y_valid))

Epoch 1/2
Epoch 2/2
