In [None]:
import matplotlib.pyplot as plt

import tensorflow as tf

In [None]:
import functools


DefaultConv = functools.partial(
    tf.keras.layers.Conv2D,
    kernel_size=(1,1), padding="same",
    strides=(1,1), activation='relu'
)

class InceptionModule(tf.keras.layers.Layer):

  def __init__(self, filters11,
                     filters33_reduce, filters33,
                     filters55_reduce, filters55,
                     filters_pool_proj,
                     use_batch_norm=True,
                     **kwargs):
    super().__init__(**kwargs)

    self.conv11 = DefaultConv(filters=filters11)

    self.conv33_reduce = DefaultConv(filters=filters33_reduce)
    self.conv33 = DefaultConv(filters=filters33, kernel_size=(3,3))

    self.conv55_reduce = DefaultConv(filters=filters55_reduce)
    self.conv55 = DefaultConv(filters=filters55, kernel_size=(5,5))

    self.max_pool = tf.keras.layers.MaxPool2D(
        pool_size=(3,3), strides=1, padding='same')
    self.pool_proj = DefaultConv(filters=filters_pool_proj)

    self.use_batch_norm = use_batch_norm
    if use_batch_norm:
      self.batch_norm = tf.keras.layers.BatchNormalization()

  def call(self, x):

    path1 = self.conv11(x)

    path2 = self.conv33_reduce(x)
    path2 = self.conv33(path2)

    path3 = self.conv55_reduce(x)
    path3 = self.conv55(path3)

    path4 = self.max_pool(x)
    path4 = self.pool_proj(path4)

    concatenation = tf.keras.layers.concatenate([path1, path2, path3, path4])

    return self.batch_norm(concatenation) if self.use_batch_norm else concatenation


In [None]:
## Test dimensions

In [None]:
X = tf.constant(0.0, shape=(1,28,28,192)) # batch of one

In [None]:
inception3a = InceptionModule(filters11=64, filters33_reduce=96, filters33=128,
                              filters55_reduce=16, filters55=32,
                              filters_pool_proj=32, use_batch_norm=False)

In [None]:
inception3a(X).shape

In [None]:
total = 0
for w in inception3a.get_weights():
  total += w.size

print(total)



In [None]:
import functools

DefaultMaxPool = functools.partial(
    tf.keras.layers.MaxPool2D,
    pool_size=(3,3), strides=(2,2), padding='same'
)
def get_googlenet_model(input_shape, num_classes, use_batch_norm=True, **kwargs):

  model = tf.keras.Sequential(**kwargs)

  model.add(tf.keras.layers.Conv2D(
      filters=64, kernel_size=7, strides=2, padding='same',
      activation='relu',
      input_shape=input_shape))

  if use_batch_norm:
    model.add(tf.keras.layers.BatchNormalization())

  model.add(DefaultMaxPool())

  model.add(tf.keras.layers.Conv2D(
      filters=64, kernel_size=1, strides=1, padding='same', activation='relu'))

  model.add(tf.keras.layers.Conv2D(
      filters=192, kernel_size=3, strides=1, padding='same', activation='relu'))

  if use_batch_norm:
    model.add(tf.keras.layers.BatchNormalization())

  model.add(DefaultMaxPool())


  filters11 = [64, 128]
  filters33_reduce = [96, 128]
  filters33 = [128, 192]
  filters55_reduce = [16, 32]
  filters55 = [32, 96]
  filter_pool_proj = [32, 64]

  for (f11, f33r, f33, f55r, f55, fp) in zip(
      filters11, filters33_reduce, filters33, filters55_reduce, filters55, filter_pool_proj):
    model.add(InceptionModule(f11, f33r, f33, f55r, f55, fp, use_batch_norm=use_batch_norm))

  model.add(DefaultMaxPool())

  filters11 = [192, 160, 128, 112, 256]
  filters33_reduce = [96,112,128,144,160]
  filters33 = [208,224,256,288,320]
  filters55_reduce = [16,24,24,32,32]
  filters55 = [48,64,64,64,128]
  filter_pool_proj = [64,64,64,64,128]

  for (f11, f33r, f33, f55r, f55, fp) in zip(
      filters11, filters33_reduce, filters33, filters55_reduce, filters55, filter_pool_proj):
    model.add(InceptionModule(f11, f33r, f33, f55r, f55, fp, use_batch_norm=use_batch_norm))

  model.add(DefaultMaxPool())

  filters11 = [256, 384]
  filters33_reduce = [160, 192]
  filters33 = [320, 384]
  filters55_reduce = [32, 48]
  filters55 = [128, 128]
  filter_pool_proj = [128, 128]

  for (f11, f33r, f33, f55r, f55, fp) in zip(
      filters11, filters33_reduce, filters33, filters55_reduce, filters55, filter_pool_proj):
    model.add(InceptionModule(f11, f33r, f33, f55r, f55, fp, use_batch_norm=use_batch_norm))


  model.add(tf.keras.layers.GlobalAveragePooling2D())

  model.add(tf.keras.layers.Dropout(0.4))

  model.add(tf.keras.layers.Dense(units=num_classes, activation='softmax'))

  return model



In [None]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [None]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
X_train, X_valid = X_train_full[:-10000], X_train_full[-10000:]
y_train, y_valid = y_train_full[:-10000], y_train_full[-10000:]

In [None]:
HEIGHT = 128
WIDTH = 128

In [None]:
preprocess_layer = tf.keras.Sequential([
    tf.keras.layers.Resizing(HEIGHT, WIDTH, input_shape=(32,32,3)),
    tf.keras.layers.Rescaling(scale=1/127.5, offset=-1)
])

In [None]:
tf.keras.backend.clear_session()
model = get_googlenet_model(
    input_shape=(128,128,3), num_classes=10,
    use_batch_norm=False,
    name="GoogLeNet")
model.summary()
full_model = tf.keras.Sequential([
    preprocess_layer,
    model
])

In [None]:
full_model.summary()

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
full_model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy",
    min_delta=0.001,
    patience=5,
    restore_best_weights=True
)

history = full_model.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          epochs=100,
          batch_size=128,
          callbacks=[early_stopping]
          )

We get 79.03 validation accuracy after 16 epochs

Introduce batch normalization

In [None]:
tf.keras.backend.clear_session()
model_with_bn = get_googlenet_model(
    input_shape=(128,128,3), num_classes=10,
    use_batch_norm=True,
    name="GoogLeNetWithBN")
model_with_bn.summary()
full_model_with_bn = tf.keras.Sequential([
    preprocess_layer,
    model_with_bn
])

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
full_model_with_bn.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history = full_model_with_bn.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          epochs=100,
          batch_size=128,
          callbacks=[early_stopping]
          )

82.70 validation accuraatheid na 20 epochs

Try to add data augmentation

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip(mode="horizontal", seed=42),
    tf.keras.layers.RandomRotation(factor=0.05, seed=42),
    tf.keras.layers.RandomContrast(factor=0.2, seed=42)
])

In [None]:
tf.keras.backend.clear_session()
model_with_bn = get_googlenet_model(
    input_shape=(128,128,3), num_classes=10,
    use_batch_norm=True,
    name="GoogLeNetWithBN")
model_with_bn.summary()
full_model_with_bn_and_da = tf.keras.Sequential([
    data_augmentation,
    preprocess_layer,
    model_with_bn
])

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
full_model_with_bn_and_da.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
# Cell per ongeluk een tweede keer gestart maar snel terug gestopt
history = full_model_with_bn_and_da.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          epochs=100,
          batch_size=128,
          callbacks=[early_stopping]
          )

88 percent validation accuracy after 38 epochs (ca. 50 seconds per epoch!)

Now try to lower the learning rate and see if it gets even better.
DO NOT recreate model.

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001 * 0.1)
full_model_with_bn_and_da.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
history = full_model_with_bn_and_da.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          epochs=100,
          batch_size=128,
          callbacks=[early_stopping]
          )

90.74 validation accuracy after 9 epochs

In [None]:
# Nog een keer verlagen
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001 * 0.1 * 0.1)
full_model_with_bn_and_da.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history = full_model_with_bn_and_da.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          epochs=100,
          batch_size=128,
          callbacks=[early_stopping]
          )

In [None]:
# evaluate on testdata
full_model_with_bn_and_da.evaluate(X_test, y_test)

90.10 % accuracy on the testdata

In [None]:
# With Monte Carlo dropout.
# We remove the data augmentation layer, because later we will set training to True
# and the data augmentation layer should not be active during testing.
model_mc_dropout = tf.keras.Sequential([
    preprocess_layer,
    model_with_bn])

In [None]:
import numpy as np

y_test_pred_mc = np.argmax(
    np.stack([model_mc_dropout(X_test, training=True) for _ in range(100)]).mean(axis=0),
    axis=-1)

print(f"Accuracy with MC dropout on test set: {np.mean(y_test_pred_mc == y_test.squeeze())}")

The previous code results in Out of Memory Error

In [None]:
accuracies = []
TEST_BATCH_SIZE = 500

for batch_nr in range(X_test.shape[0] // TEST_BATCH_SIZE):
  X_batch = X_test[batch_nr * TEST_BATCH_SIZE: (batch_nr + 1) * TEST_BATCH_SIZE]
  y_batch = y_test[batch_nr * TEST_BATCH_SIZE: (batch_nr + 1) * TEST_BATCH_SIZE]

  # Restrict to 20 samples for speed
  y_test_pred_mc = np.argmax(
    np.stack([model_mc_dropout(X_batch, training=True) for _ in range(20)]).mean(axis=0),
    axis=-1)

  accuracies.append(np.mean(y_test_pred_mc == y_batch.squeeze()))

  print(".", end='', flush=True)




In [None]:
np.mean(accuracies)

In [None]:
# MC Dropout *with* data augementation
accuracies = []
TEST_BATCH_SIZE = 500

for batch_nr in range(X_test.shape[0] // TEST_BATCH_SIZE):
  X_batch = X_test[batch_nr * TEST_BATCH_SIZE: (batch_nr + 1) * TEST_BATCH_SIZE]
  y_batch = y_test[batch_nr * TEST_BATCH_SIZE: (batch_nr + 1) * TEST_BATCH_SIZE]

  # Restrict to 20 samples for speed
  y_test_pred_mc = np.argmax(
    np.stack([full_model_with_bn_and_da(X_batch, training=True) for _ in range(20)]).mean(axis=0),
    axis=-1)

  accuracies.append(np.mean(y_test_pred_mc == y_batch.squeeze()))

  print(".", end='', flush=True)

In [None]:
np.mean(accuracies)

In [None]:
## 90.79% with data augmentation and MC dropout!