**Authored by Team 4**<br>
**Last Modified:** Aug, 11, 2022 <br>
**Description:** Implementing the Convolution model for image classification. <br>
**DataSet:** Cifar100

# Setup

In [1]:
from tensorflow.keras import layers
from tensorflow import keras

import matplotlib.pyplot as plt
import tensorflow_addons as tfa
import tensorflow as tf
import numpy as np

## Prepare the data

In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()
val_split = 0.1
val_indices = int(len(x_train) * val_split)
x_val, y_val = x_train[:val_indices], y_train[:val_indices]
x_train, y_train = x_train[val_indices:], y_train[val_indices:]

print(f"Training data: {len(x_train)}")
print(f"Validation data: {len(x_val)}")
print(f"Test data: {len(x_test)}")

Training data: 45000
Validation data: 5000
Test data: 10000


## Configure the hyperparameters

In [3]:
learning_rate = 0.001
weight_decay = 0.0001
batch_size = 128
num_epochs = 100
image_size = 32

# Prepare Dataset objects

In [4]:
auto = tf.data.AUTOTUNE
data_augmentation = keras.Sequential(
    [
        layers.Normalization(),
        layers.RandomCrop(image_size, image_size),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
    ],
    name="data_augmentation",
)

def data_prep(images, labels, is_train=False):
    
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    
    if is_train:
        dataset = dataset.shuffle(batch_size * 10)
    dataset = dataset.batch(batch_size)
    if is_train:
        dataset = dataset.map(lambda x, y: (data_augmentation(x), y), num_parallel_calls=auto)
    return dataset.prefetch(auto)

training_data = data_prep(x_train, y_train, is_train=True)
vd_data = data_prep(x_val, y_val, is_train=False)
ts_data = data_prep(x_test, y_test, is_train=False)

# Construct the ConvMixer Model

In [5]:
# model parameters
patch_size = 2
class_num = 100
depth = 8
filter_num = 256
kernel_size = 10

In [6]:
def convMixer_Model():
    # Rescale the input image
    inputs = keras.Input((image_size, image_size, 3))
    rescaled = layers.Rescaling(scale=1.0 / 255)(inputs)
    
    # Patch Embedding
    patch_embed = layers.Conv2D(filter_num, kernel_size=patch_size, strides=patch_size)(rescaled)
    # Gelu
    gelu_out = layers.Activation("gelu")(patch_embed)
    # BatchNorm
    bn_out = layers.BatchNormalization()(gelu_out)

    # ConvMixer blocks with depth iterations
    for _ in range(depth):
        # Depthwise convolution
        temp = bn_out
        dw_conv = layers.DepthwiseConv2D(kernel_size=kernel_size, padding="same")(bn_out)
        # Gelu
        gelu_out = layers.Activation("gelu")(dw_conv)
        # BatchNorm
        bn_out = layers.BatchNormalization()(gelu_out)
        # layer addition
        layer_add = layers.Add()([bn_out, temp])

        # Pointwise convolution
        pw_conv = layers.Conv2D(filter_num, kernel_size=1)(layer_add)
        # Gelu
        gelu_out = layers.Activation("gelu")(pw_conv)
        # BatchNorm
        bn_out = layers.BatchNormalization()(gelu_out)

    # Classification
    globalAvgPool_out = layers.GlobalAvgPool2D()(bn_out)
    do = tf.nn.dropout(globalAvgPool_out, 0.3)
    outputs = layers.Dense(class_num, activation="softmax")(do)

    return keras.Model(inputs, outputs)

# Compile, train, and evaluate the mode

In [7]:
def run_experiment(model):
    optimizer = tfa.optimizers.AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )

    model.compile(
        optimizer=optimizer,
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )

    checkpoint_filepath = "/tmp/checkpoint"
    checkpoint_callback = keras.callbacks.ModelCheckpoint(
        checkpoint_filepath,
        monitor="val_accuracy",
        save_best_only=True,
        save_weights_only=True,
    )

    history = model.fit(
        # x=x_train,
        # y=y_train,
        # batch_size=batch_size,
        # validation_split=0.1,
        training_data,
        validation_data=vd_data,
        epochs=num_epochs,
        callbacks=[checkpoint_callback],
    )

    model.load_weights(checkpoint_filepath)
    _, accuracy = model.evaluate(ts_data)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")
    # print(f"Test top 5 accuracy: {round(top_5_accuracy * 100, 2)}%")

    return history, model

In [None]:
cm_model = convMixer_Model()
history, conv_mixer_model = run_experiment(cm_model)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
 15/352 [>.............................] - ETA: 1:15 - loss: 0.8599 - accuracy: 0.7490

# Plot the model

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.01, 1])
plt.legend(loc='lower right')
plt.show()