# Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/') 

# Imports, Constants and Functions

## Constants

In [2]:
#@markdown Dataset
#@markdown ---
IMG_DIMS = 72 #@param {type:"slider", min:16, max:256, step:1}
NUM_CHANS = 3 #@param {type:"slider", min:1, max:4, step:1}
#number maximum of images used from the dataset (For small RAMs...)
MAX_DATA_SIZE = 7000 #@param {type:"slider", min:4000, max:35000, step:1000}
NUM_CLASSES = 8 #@param {type:"slider", min:2, max:20, step:1}
SPLIT_TRAIN_TEST = True #@param {type:"boolean"}
TRAIN_RATIO = 95 #@param {type:"slider", min:60, max:95, step:5}
#@markdown ---

#@markdown Resizing
#@markdown ---
SAVE_RESIZED = True #@param {type:"boolean"}
RESIZED_DIR = "/content/natural-images" #@param {type:"string"}
RESIZED_X=RESIZED_DIR + "train_x_{}_{}.npy".format(IMG_DIMS, MAX_DATA_SIZE)
RESIZED_Y=RESIZED_DIR + "train_y_{}_{}.npy".format(IMG_DIMS, MAX_DATA_SIZE)
RESIZED_PATH={
    'x': RESIZED_X,
    'y': RESIZED_Y
}
#@markdown ---


#@markdown Hyperparameters
#@markdown ---
#default: 50
NUM_EPOCHS = 50 #@param {type:"slider", min:10, max:300, step:5}
#default: 128
BATCH_SIZE = 256 #@param {type:"slider", min:32, max:512, step:32}
INIT_LR = 0.001 #@param {type:"number"}
#@markdown ---

#@markdown Benchmarks, tests and Outputs
#@markdown --
IMG_DIR = "/content/natural-images/data" #@param {type:"string"}
OUTPUT_PATH = "/content/gdrive/MyDrive/Colab Files/vision transformer/classification/natural-images/" #@param {type:"string"}
#@markdown ---


#@markdown Transformer Parameters
#@markdown --
patch_size = 6 #@param {type:"slider", min:2, max:20, step:1} # Size of the patches to be extract from the input images
projection_dim = 64 #@param {type:"slider", min:16, max:96, step:16}
num_heads = 4 #@param {type:"slider", min:1, max:10, step:1}
transformer_layers = 8 #@param {type:"slider", min:4, max:16, step:1}

# weight_decay = 0.0001
num_patches = (IMG_DIMS // patch_size) ** 2
transformer_units = [
    projection_dim * 2,
    projection_dim,
]  # Size of the transformer layers
mlp_head_units = [2048, 1024]  # Size of the dense layers of the final classifier

## Imports

In [3]:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
K.clear_session()
import numpy as np

import matplotlib
matplotlib.use("Agg")

from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import cv2

import os
import random
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import MultiHeadAttention
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LayerNormalization
from tensorflow.keras.layers import Add
from tensorflow.keras import Model
from sklearn.preprocessing import LabelEncoder
import tensorflow.keras

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa

## Custom Layers

In [4]:
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation=tf.nn.gelu)(x)
        x = layers.Dropout(dropout_rate)(x)
    return x

In [5]:
class Patches(layers.Layer):
    def __init__(self, patch_size):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches


In [6]:
class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = Dense(units=projection_dim)
        self.position_embedding = Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded


## Prepare Kaggle-cli

In [7]:
%%shell

#!/bin/bash
if [ -e "~/.kaggle/kaggle.json" ]
then
	echo "kaggle.json already exists"
	echo "Skipping to next cell..."
else
	echo "Preparing Kaggle-cli..."
	pip install -q kaggle
	pip install -q kaggle-cli

	mkdir -p ~/.kaggle
	cp "/content/gdrive/MyDrive/Colab Files/Kaggle/kaggle.json" ~/.kaggle/
	cat ~/.kaggle/kaggle.json 
	chmod 600 ~/.kaggle/kaggle.json
fi

Preparing Kaggle-cli...
[K     |████████████████████████████████| 81kB 5.6MB/s 
[K     |████████████████████████████████| 5.3MB 9.6MB/s 
[K     |████████████████████████████████| 51kB 8.5MB/s 
[K     |████████████████████████████████| 112kB 52.2MB/s 
[K     |████████████████████████████████| 143kB 61.8MB/s 
[?25h  Building wheel for kaggle-cli (setup.py) ... [?25l[?25hdone
  Building wheel for PrettyTable (setup.py) ... [?25l[?25hdone
  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone
{"username":"ihabtaleb","key":"59d8093446387a51abb5f7e82b9dd58e"}



# Data Preprocessing

## Download and Unzip The DataSet



In [None]:
%%shell

#!/bin/bash
if [ -e "/content/natural-images.zip" ]
then
		echo "Dataset already downloaded"
		echo "Skipping to next cell..."
else
		echo "Downloading Dataset..."
    kaggle datasets download -d prasunroy/natural-images
		echo "Unzipping Dataset..."
		unzip "/content/natural-images.zip" -d "/content/natural-images"
    echo "Done."
fi

## Prepare and Save The DataSet

In [9]:
def prepare_dataset(ds_exists = False, img_dim = IMG_DIMS, save_resized = SAVE_RESIZED, resized_path = RESIZED_PATH,
                    max_data_size = MAX_DATA_SIZE, split_train_test = SPLIT_TRAIN_TEST, train_ratio = TRAIN_RATIO):
  if ds_exists:
    totalImages = np.load(resized_path['x'])
    totalY = np.load(resized_path['y'])
    if split_train_test:
      train_test=int(max_data_size*(train_ratio/100))
      print ("train/test = ", train_test, "/", totalImages.shape[0]-train_test)
      return (totalImages[:train_test,:,:,:], totalY[:train_test]), (totalImages[train_test:,:,:,:], totalY[train_test:])
    else:
      return (totalImages, totalY), (np.array(), np.array())
  else:
    images=[]
    y=[]
    length=1
    dim=0
    all_img_paths = [os.path.join(dp, f) for dp, dn, fn in os.walk(IMG_DIR) for f in fn]
    random.shuffle(all_img_paths)
    for image_path in all_img_paths:
      img = cv2.imread(image_path)
      resized = cv2.resize(img,(img_dim,img_dim))
      images.append(resized)
      y.append(os.path.dirname(image_path))
      length += 1
      if length>max_data_size:
        break
    del all_img_paths
    totalImages=np.array(images)
    y_array=np.array(y)
    label_encoder = LabelEncoder()
    y_enc = label_encoder.fit_transform(y_array)
    totalY = y_enc
    del images
    totalImages = totalImages.astype("float")/255
    if save_resized:
      np.save(resized_path['x'],totalImages)
      np.save(resized_path['y'],totalY)
      print("Resized images saved in path: ", RESIZED_PATH)
    if split_train_test:
      train_test=int(max_data_size*(train_ratio/100))
      print ("train/test = ", train_test, "/", max_data_size-train_test)
      return (totalImages[:train_test,:,:,:], totalY[:train_test]), (totalImages[train_test:,:,:,:], totalY[train_test:])
    else:
      return (totalImages, totalY), (np.array(), np.array())

In [None]:
if os.path.isfile(RESIZED_PATH['y']):
	print(".npy file already exists")
	print("Reading the existing file...")
	(x_train, y_train), (x_test, y_test) = prepare_dataset(ds_exists = True)
else:
	(x_train, y_train), (x_test, y_test) = prepare_dataset()
	print("Dataset prepared")

print(f"x_train shape: {x_train.shape} - y_train shape: {y_train.shape}")
print(f"x_test shape: {x_test.shape} - y_test shape: {y_test.shape}")

# Transformer Model

In [11]:
def create_vit_classifier():
    inputs = Input((IMG_DIMS, IMG_DIMS, NUM_CHANS))
    # Create patches.
    patches = Patches(patch_size)(inputs)
    # Encode patches.
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        attention_output = MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(encoded_patches, encoded_patches)
        x2 = layers.Add()([attention_output, encoded_patches])
        x3 = LayerNormalization(epsilon=1e-6)(x2)
        ff = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        encoded_patches = Add()([x3, ff])
        encoded_patches = LayerNormalization(epsilon=1e-6)(encoded_patches)

    # Create a [batch_size, projection_dim] tensor.
    representation = Flatten()(encoded_patches)
    representation = Dropout(0.5)(representation)
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    # Classify outputs.
    logits = Dense(NUM_CLASSES)(features)
    # Create the Keras model.
    model = Model(inputs=inputs, outputs=logits)
    return model


# Training

In [12]:
transformer = create_vit_classifier()

transformer.compile(
    optimizer=Adam(lr=INIT_LR),
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[
             keras.metrics.SparseCategoricalAccuracy(name="accuracy")#,
    ],
)
history = transformer.fit(x=x_train, y=y_train, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, shuffle=True, validation_split=0.1,)
print("\nEvaluating System...")
_, accuracy = transformer.evaluate(x_test, y_test)
print("Accuracy: ", round(accuracy * 100, 2), "%")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50

Evaluating System...
Accuracy:  92.37 %
