In [None]:
import functools

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Load Data

In [None]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [None]:
print(X_train_full.shape)
print(y_train_full.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
X_train, X_valid = X_train_full[:-10000], X_train_full[-10000:]
y_train, y_valid = y_train_full[:-10000], y_train_full[-10000:]

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_valid.shape)
print(y_valid.shape)

## Some Data Exploration

In [None]:
# Number of examples in each category
np.array(np.unique(y_train, return_counts=True)).T

In [None]:
class_names = (['airplane', 'automobile', 'bird', 'cat', 'deer'] +
   ['dog', 'frog', 'horse', 'ship', 'truck']
)

plt.figure(figsize=(10,10))

for i in range(25):
  ax = plt.subplot(5, 5, i + 1)
  plt.imshow(X_train[i])
  plt.title(f"{class_names[y_train[i][0]]}")
  plt.axis("off")

## Build a model

In [None]:
# Define a convolutional layer with default parameters
DefaultConv2D = functools.partial(tf.keras.layers.Conv2D,
    kernel_size=(3,3),
    strides=(1,1),
    activation='relu',
    padding='same'
)

tf.keras.backend.clear_session()
def get_model():
  model = tf.keras.models.Sequential()

  model.add(tf.keras.layers.Rescaling(scale=1./255, input_shape=(32,32,3)))

  model.add(DefaultConv2D(filters=32))
  model.add(DefaultConv2D(filters=32))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))

  model.add(DefaultConv2D(filters=64))
  model.add(DefaultConv2D(filters=64))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))

  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(units=128, activation='relu'))
  model.add(tf.keras.layers.Dense(units=10, activation='softmax'))

  return model

In [None]:
model = get_model()
model.summary()

## Compile the model

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy",
    min_delta=0.001,
    patience=5,
    restore_best_weights=True
)

history = model.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          epochs=100,
          batch_size=32,
          callbacks=[early_stopping]
          )

In [None]:
def plot_learning_curves(history):
  plt.figure(figsize=(8, 5))
  for key, style in zip(history.history, ["r-o", "r-*", "b-o", "b-*"]):
    epochs = np.array(history.epoch)
    plt.plot(epochs + 1, history.history[key], style, label=key)
  plt.xlabel("Epoch")
  plt.axis([1, len(history.history['loss']), 0., 1])
  plt.legend(loc="lower left")
  plt.grid()

In [None]:
plot_learning_curves(history)

### Evaluate the model

In [None]:
model.evaluate(X_test, y_test)

In [None]:
model.evaluate(X_valid, y_valid)

In [None]:
def evaluate_model(X, predictions, y_true):
  # X (n, image_shape): the images
  # predictions made by the model (n, #num_classes)
  # y_true: actual labels (n, 1)

  # Returns: three lists
  # incorrectly_classified_images, predicted_labels, expected_labels

  assert X.shape[0] == predictions.shape[0] == y_true.shape[0], "dimensions wrong"

  incorrectly_classified_images = []
  expected_labels = []
  predicted_labels = []

  y_predicted = np.argmax(predictions, axis=-1)

  for i in range(y_true.shape[0]):
    if y_predicted[i] != y_true[i]:
      incorrectly_classified_images.append(X[i])
      expected_labels.append(y_true[i,0])
      predicted_labels.append(y_predicted[i])

  return incorrectly_classified_images, predicted_labels, expected_labels

In [None]:
y_test_predictions = model.predict(X_test)

In [None]:
incorrectly_classified_images, predicted_labels, expected_labels = (
    evaluate_model(X_test, y_test_predictions, y_test)
)

In [None]:
plt.figure(figsize=(12,12))

for i in range(25):
  ax = plt.subplot(5, 5, i + 1)
  plt.imshow(incorrectly_classified_images[i])
  plt.title(f"p: {class_names[predicted_labels[i]]} e: {class_names[expected_labels[i]]}")
  plt.axis("off")

In [None]:
y_test_class_predictions = np.argmax(y_test_predictions, axis=-1)
y_test_class_predictions.shape

In [None]:
y_test.shape

In [None]:
# Create a confusion matrix using tensorflow
cm = tf.math.confusion_matrix(y_test.squeeze(), y_test_class_predictions)

In [None]:
ax= plt.subplot()
sns.heatmap(cm, annot=True, fmt='g', ax=ax, cbar=False);  #annot=True to annotate cells, ftm='g' to disable scientific notation

# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(class_names,  rotation=-45);
ax.yaxis.set_ticklabels(class_names, rotation=0);

In [None]:
# Sanity check that the labels on the confusion matrix are correct
my_cm = np.zeros(shape=(10,10),dtype=np.int32)
for (true_label, pred_label) in zip(y_test.squeeze(), y_test_class_predictions):
  my_cm[true_label, pred_label] += 1

my_cm

### Improve the model

This part of the assignment is a lot more open ended than the parts above.



In [None]:
tf.keras.backend.clear_session()
def get_model_additional_block():
  model = tf.keras.models.Sequential()

  model.add(tf.keras.layers.Rescaling(scale=1./255, input_shape=(32,32,3)))

  model.add(DefaultConv2D(filters=32))
  model.add(DefaultConv2D(filters=32))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))

  model.add(DefaultConv2D(filters=64))
  model.add(DefaultConv2D(filters=64))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))

  model.add(DefaultConv2D(filters=128))
  model.add(DefaultConv2D(filters=128))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))

  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(units=128, activation='relu'))
  model.add(tf.keras.layers.Dense(units=10, activation='softmax'))

  return model

In [None]:
model2 = get_model_additional_block()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model2.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model2.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          epochs=100,
          batch_size=32,
          callbacks=[early_stopping]
          )

Doesn't seem to make much of a difference on the validation set.

In [None]:
tf.keras.backend.clear_session()
def get_model_he():
  model = tf.keras.models.Sequential()

  model.add(tf.keras.layers.Rescaling(scale=1./255, input_shape=(32,32,3)))

  model.add(DefaultConv2D(filters=32, kernel_initializer="he_uniform"))
  model.add(DefaultConv2D(filters=32, kernel_initializer="he_uniform"))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))

  model.add(DefaultConv2D(filters=64, kernel_initializer="he_uniform"))
  model.add(DefaultConv2D(filters=64, kernel_initializer="he_uniform"))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))

  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(units=128, activation='relu',
                                  kernel_initializer="he_uniform"))
  model.add(tf.keras.layers.Dense(units=10, activation='softmax'))

  return model

In [None]:
model3 = get_model_he()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model3.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model3.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          epochs=100,
          batch_size=32,
          callbacks=[early_stopping]
          )

In [None]:
print(f"Base model {model.evaluate(X_valid, y_valid)}")
print(f"Extra conv block model {model2.evaluate(X_valid, y_valid)}")
print(f"Base model with he initialization {model3.evaluate(X_valid, y_valid)}")


In [None]:
# Add dropout to the model with extra conv block after each max pooling layer
tf.keras.backend.clear_session()
def get_model_additional_block_with_dropout():
  model = tf.keras.models.Sequential()

  model.add(tf.keras.layers.Rescaling(scale=1./255, input_shape=(32,32,3)))

  model.add(DefaultConv2D(filters=32))
  model.add(DefaultConv2D(filters=32))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))
  model.add(tf.keras.layers.Dropout(rate=0.2))

  model.add(DefaultConv2D(filters=64))
  model.add(DefaultConv2D(filters=64))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))
  model.add(tf.keras.layers.Dropout(rate=0.2))

  model.add(DefaultConv2D(filters=128))
  model.add(DefaultConv2D(filters=128))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))
  model.add(tf.keras.layers.Dropout(rate=0.2))

  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(units=128, activation='relu'))
  model.add(tf.keras.layers.Dense(units=10, activation='softmax'))

  return model

In [None]:
model4 = get_model_additional_block_with_dropout()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model4.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model4.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          epochs=100,
          batch_size=32,
          callbacks=[early_stopping]
          )

In [None]:
print(f"Base model {model.evaluate(X_valid, y_valid)}")
print(f"Extra conv block model {model2.evaluate(X_valid, y_valid)}")
print(f"Base model with he initialization {model3.evaluate(X_valid, y_valid)}")
print(f"Extra conv block model with dropout {model4.evaluate(X_valid, y_valid)}")


Adding dropout increased performance by 2 percentage points on the validation dataset.
Let's try to add some more dropout, e.g. after the dense layer.

In [None]:
# Add dropout to the model with extra conv block after each max pooling layer
tf.keras.backend.clear_session()
def get_model_additional_block_with_dropout2():
  model = tf.keras.models.Sequential()

  model.add(tf.keras.layers.Rescaling(scale=1./255, input_shape=(32,32,3)))

  model.add(DefaultConv2D(filters=32))
  model.add(DefaultConv2D(filters=32))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))
  model.add(tf.keras.layers.Dropout(rate=0.2))

  model.add(DefaultConv2D(filters=64))
  model.add(DefaultConv2D(filters=64))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))
  model.add(tf.keras.layers.Dropout(rate=0.2))

  model.add(DefaultConv2D(filters=128))
  model.add(DefaultConv2D(filters=128))
  model.add(tf.keras.layers.MaxPool2D(pool_size=2))
  model.add(tf.keras.layers.Dropout(rate=0.2))

  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(units=128, activation='relu'))
  model.add(tf.keras.layers.Dropout(rate=0.2)) # More dropout

  model.add(tf.keras.layers.Dense(units=10, activation='softmax'))

  return model

In [None]:
model5 = get_model_additional_block_with_dropout()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model5.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model5.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          epochs=100,
          batch_size=32,
          callbacks=[early_stopping]
          )

In [None]:
print(f"Base model {model.evaluate(X_valid, y_valid)}")
print(f"Extra conv block model {model2.evaluate(X_valid, y_valid)}")
print(f"Base model with he initialization {model3.evaluate(X_valid, y_valid)}")
print(f"Extra conv block model with dropout {model4.evaluate(X_valid, y_valid)}")
print(f"Extra conv block model with dropout2 {model5.evaluate(X_valid, y_valid)}")

Adding additional dropout didn't seem to help a lot.

In [None]:
# Try to improve `model4` by training it with a lower learning rate.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001 * 0.1) # lower learning rate by factor 0.1
# Continue training the model from the current weights, do NOT instantiate the model anew
model4.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model4.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          epochs=100,
          batch_size=32,
          callbacks=[early_stopping]
          )

In [None]:
print(f"Extra conv block model with dropout train longer {model4.evaluate(X_valid, y_valid)}")
# Van 78.32% naar 80.50%, Een extra 2 procentpunten.

Let's try Monte Carlo dropout. This is slow but should also increase performance?


In [None]:
# Let's try 100 samples
y_probas = np.stack([model4(X_valid, training=True) for _ in range(100)])
y_probas.shape

In [None]:
y_proba = y_probas.mean(axis=0)
y_proba.shape

In [None]:
y_valid_pred_mc = np.argmax(y_proba, axis=-1)
y_valid_pred_mc.shape

In [None]:

print(f"Accuracy with MC dropout on validation set: {np.mean(y_valid_pred_mc == y_valid.squeeze())}")

Monte Carlo dropout gained us another 0.8 percentage points.

Let's evaluate with MC dropout on the test set.

In [None]:
y_test_pred_mc = np.argmax(
    np.stack([model4(X_test, training=True) for _ in range(100)]).mean(axis=0),
    axis=-1)

print(f"Accuracy with MC dropout on test set: {np.mean(y_test_pred_mc == y_test.squeeze())}")


In [None]:
# Evaluate model on the test set without MC dropout
model4.evaluate(X_test, y_test)