Convmixer Image Classfication


In [2]:
pip install -U -q tensorflow-addons

## Imports

In [3]:
from tensorflow.keras import layers
from tensorflow import keras

import matplotlib.pyplot as plt
import tensorflow_addons as tfa
import tensorflow as tf
import numpy as np


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [4]:
learning_rate = 0.001
weight_decay = 0.0001
batch_size = 32
num_epochs = 2

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
import os
import cv2
import numpy as np

data_dir = "/content/drive/MyDrive/PreprocessSeg/"

# Initialize empty lists to store the images and labels
images = []
labels = []

# Loop through all the subfolders and their files
for subdir, _, files in os.walk(data_dir):
    for file in files:
        # Load the image using OpenCV
        image = cv2.imread(os.path.join(subdir, file))
        # Resize the image to the desired size
        image = cv2.resize(image, (64, 64))
        # Normalize the image pixel values to be between 0 and 1
        image = image / 255.0
        # Append the image to the list of images
        images.append(image)
        # Extract the label from the folder name
        label = os.path.basename(subdir)
        # Append the label to the list of labels
        labels.append(label)

# Convert the lists of images and labels to numpy arrays
images = np.array(images)
num_classes = 4
label_map = {'Mild_Demented': 0, 'Moderate_Demented': 1, 'Non_Demented': 2, 'Very_Mild_Demented': 3}
labels_int = np.array([label_map[label] for label in labels])
labels_one_hot = np.eye(num_classes)[labels_int]

# Split the data into training, validation, and testing sets
val_split = 0.1
test_split = 0.1
num_val = int(len(images) * val_split)
num_test = int(len(images) * test_split)
num_train = len(images) - num_val - num_test

x_train, y_train = images[:num_train], labels_one_hot[:num_train]
x_val, y_val = images[num_train:num_train+num_val], labels_one_hot[num_train:num_train+num_val]
x_test, y_test = images[num_train+num_val:], labels_one_hot[num_train+num_val:]

len(x_train)

1537

In [None]:
#(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
val_split = 0.1

val_indices = int(len(x_train) * val_split)
new_x_train, new_y_train = x_train[val_indices:], y_train[val_indices:]
x_val, y_val = x_train[:val_indices], y_train[:val_indices]

print(f"Training data samples: {len(new_x_train)}")
print(f"Validation data samples: {len(x_val)}")
print(f"Test data samples: {len(x_test)}")

Training data samples: 29
Validation data samples: 3
Test data samples: 32


In [None]:
import tensorflow as tf
import numpy as np

# Assume y_train and y_test have a shape of [?, 4]
# Convert them to integer labels
y_train_int = np.argmax(y_train, axis=1)
y_test_int = np.argmax(y_test, axis=1)

# Now you can convert them to one-hot encoded vectors with num_classes=4
y_train = tf.keras.utils.to_categorical(y_train_int, num_classes=4)
y_test = tf.keras.utils.to_categorical(y_test_int, num_classes=4)

y_train = y_train.squeeze()
y_test = y_test.squeeze()


## ConvMixer utilities

The following figure (taken from the original paper) depicts the ConvMixer model:

![](https://i.imgur.com/yF8actg.png)

ConvMixer is very similar to the MLP-Mixer, model with the following key
differences:

* Instead of using fully-connected layers, it uses standard convolution layers.
* Instead of LayerNorm (which is typical for ViTs and MLP-Mixers), it uses BatchNorm.

Two types of convolution layers are used in ConvMixer. **(1)**: Depthwise convolutions,
for mixing spatial locations of the images, **(2)**: Pointwise convolutions (which follow
the depthwise convolutions), for mixing channel-wise information across the patches.
Another keypoint is the use of *larger kernel sizes* to allow a larger receptive field.

In [10]:

def activation_block(x):
    x = layers.Activation("gelu")(x)
    return layers.BatchNormalization()(x)


def conv_stem(x, filters: int, patch_size: int):
    x = layers.Conv2D(filters, kernel_size=patch_size, strides=patch_size)(x)
    return activation_block(x)


def conv_mixer_block(x, filters: int, kernel_size: int):
    # Depthwise convolution.
    x0 = x
    x = layers.DepthwiseConv2D(kernel_size=kernel_size, padding="same")(x)
    x = layers.Add()([activation_block(x), x0])  # Residual.

    # Pointwise convolution.
    x = layers.Conv2D(filters, kernel_size=1)(x)
    x = activation_block(x)

    return x


def get_conv_mixer_256_8(
    image_size=64, filters=256, depth=8, kernel_size=5, patch_size=2, num_classes=4
):
    """ConvMixer-256/8: https://openreview.net/pdf?id=TVHS5Y4dNvM.
    The hyperparameter values are taken from the paper.
    """
    inputs = keras.Input((image_size, image_size, 3))
    x = layers.Rescaling(scale=1.0 / 255)(inputs)

    # Extract patch embeddings.
    x = conv_stem(x, filters, patch_size)

    # ConvMixer blocks.
    for _ in range(depth):
        x = conv_mixer_block(x, filters, kernel_size)

    # Classification block.
    x = layers.GlobalAvgPool2D()(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    return keras.Model(inputs, outputs)


## Model training and evaluation utility

In [11]:
def run_experiment(model):
    optimizer = tfa.optimizers.AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )

    model.compile(
        optimizer=optimizer,
        loss="categorical_crossentropy", 
        metrics=["accuracy"],
    )

    checkpoint_filepath = "/tmp/checkpoint"
    checkpoint_callback = keras.callbacks.ModelCheckpoint(
        checkpoint_filepath,
        monitor="val_accuracy",
        save_best_only=True,
        save_weights_only=True,
    )

    history = model.fit(
        x_train, 
        y_train,  
        epochs=num_epochs,
        validation_split=0.2,
        callbacks=[checkpoint_callback],
    )

    model.load_weights(checkpoint_filepath)
    _, accuracy = model.evaluate(x_test,y_test)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")

    return history, model


## Train and evaluate model

In [None]:
conv_mixer_model = get_conv_mixer_256_8()
history, conv_mixer_model = run_experiment(conv_mixer_model)

Epoch 1/10
Epoch 2/10
Epoch 3/10