# Image classification with Vision Transformer

**Based on:** [Image classification with Visiion Transformer](https://keras.io/examples/vision/image_classification_with_vision_transformer/)<br>

## Setup

In [None]:
pip install -U tensorflow-addons #required for the AdamW optimiser

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
import shutil
import os

In [None]:
from google.colab import drive
drive.mount('Mydrive')

Drive already mounted at Mydrive; to attempt to forcibly remount, call drive.mount("Mydrive", force_remount=True).


## Hyperparameters

In [None]:
learning_rate = 0.001
weight_decay = 0.0001
batch_size = 64
num_epochs = 50
image_size = 224  # We'll resize input images to this size
patch_size = 16  # Size of the patches to be extract from the input images
num_patches = (image_size // patch_size) ** 2
projection_dim = 64
num_heads = 4
transformer_units = [
    projection_dim * 2,
    projection_dim,
]  # Size of the transformer layers
transformer_layers = 3
mlp_head_units = [256, 128]  # Size of the dense layers of the final classifier

train_path="/content/Mydrive/MyDrive/Colab Notebooks/AML_project/train"
test_path="/content/Mydrive/MyDrive/Colab Notebooks/AML_project/test"
#IMG_SIZE = 224 # MobilenetV2
RESOLUTION = 224
PATCH_SIZE = 16

num_classes = 100
input_shape = (image_size, image_size, 3)

##Getting Dataset

In [None]:
#declare plant species dictionary
plant_species_dict = {}
with open('/content/Mydrive/MyDrive/Colab Notebooks/AML_project/list/species_list.txt') as txt_file:
    lines =  [x.strip() for x in txt_file.readlines()]
    plant_class = [x.split('; ')[0] for x in lines]
    plant_species = [x.split('; ')[1] for x in lines]

for i in range(len(plant_class)):
    plant_species_dict[plant_class[i]] = plant_species[i]

#plant_species_dict

In [None]:
x_train = tf.keras.utils.image_dataset_from_directory(train_path + "/herbarium",
                                                            shuffle=True,
                                                            batch_size=batch_size,
                                                            image_size=(image_size, image_size),
                                                            )
                                                      
x_test = tf.keras.utils.image_dataset_from_directory(test_path,
                                                                 shuffle=True,
                                                                 batch_size=batch_size,
                                                                 image_size=(image_size, image_size)
                                                                 )




Found 3018 files belonging to 100 classes.
Found 1933 files belonging to 100 classes.


## Use data augmentation

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.Normalization(),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
    ],
    name="data_augmentation",
)
# Compute the mean and the variance of the training data for normalization.


## Implement multilayer perceptron (MLP)

In [None]:

def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation='softmax')(x)
        x = layers.Dropout(dropout_rate)(x)
    return x


## Implement patch creation as a layer

In [None]:

class Patches(layers.Layer):
    def __init__(self, patch_size):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches


## Implement the patch encoding layer

In [None]:

class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded


## Build the ViT model

In [None]:
def create_vit_classifier():
    inputs = layers.Input(shape=input_shape)
    # Augment data.
    augmented = data_augmentation(inputs)
    # Create patches.
    patches = Patches(patch_size)(augmented)
    # Encode patches.
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization()(encoded_patches)
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, encoded_patches])
        # Layer normalization 2.
        x3 = layers.LayerNormalization()(x2)
        # MLP.
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2.
        encoded_patches = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor.
    representation = layers.LayerNormalization()(encoded_patches)
    representation = layers.Flatten()(representation)
    #representation = layers.Dropout(0.2)(representation)
    # Add MLP.
    #features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.2)
    features = layers.Dense(256)(representation)
    features = layers.Dense(128)(features)
    features = layers.Dropout(0.2)(features)
    # Classify outputs.
    logits = layers.Dense(num_classes, kernel_regularizer=tf.keras.regularizers.l2(0.1))(features)
    # Create the Keras model
    model = keras.Model(inputs=inputs, outputs=logits)
    return model

## Compile, train, and evaluate the mode

In [None]:

def run_experiment(model):
    optimizer = tfa.optimizers.AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )
    model.compile(
        optimizer=optimizer,
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
            keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy"),
        ],
    )

    checkpoint_filepath = "/tmp/checkpoint"
    checkpoint_callback = keras.callbacks.ModelCheckpoint(
        checkpoint_filepath,
        monitor="val_accuracy",
        save_best_only=True,
        save_weights_only=True,
    )

    history = model.fit(
        x_train,
        epochs=num_epochs,
        validation_data = x_test,
        callbacks=[checkpoint_callback],
    )

    model.load_weights(checkpoint_filepath)
    _, accuracy, top_5_accuracy = model.evaluate(x_test)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")
    print(f"Test top 5 accuracy: {round(top_5_accuracy * 100, 2)}%")

    return history


vit_classifier = create_vit_classifier()
history = run_experiment(vit_classifier)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test accuracy: 13.61%
Test top 5 accuracy: 30.63%


In [None]:
train_acc = vit_classifier.evaluate(x_train)
test_acc = vit_classifier.evaluate(x_test )



In [None]:
print(f'Train accuracy: {train_acc[1]*100}')
print(f'Test accuracy: {test_acc[1]*100}')

Train accuracy: 74.4532823562622
Test accuracy: 13.605794310569763
