<a href="https://colab.research.google.com/github/MorningStarTM/Transformers-in-Vision/blob/main/ViT_for_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install patchify

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import random
from glob import glob
import tensorflow as tf
from sklearn.utils import shuffle
from patchify import patchify
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import *
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [3]:
#Hyperparameter
hp = {}
hp['image_size'] = 200
hp['num_channel'] = 3
hp['patch_size'] = 25
hp['num_patches'] = (hp['image_size']**2) // (hp['patch_size']**2)
hp['flat_patches_shape'] = (hp['num_patches'], hp['patch_size']*hp['patch_size']*hp['num_channel'])

hp['batch_size'] = 16
hp['lr'] = 1e-4
hp['num_epochs'] = 100 #500
hp['num_classes'] = 9
hp['class_names'] = ["Ant", "Butterfly", "Cockroach", "Frog", "Grasshopper", "Honey bee", "Spider", "dragonfly", "lizard"]

hp["num_layers"] = 12
hp["hidden_dim"] = 500 #768
hp["mlp_dim"] = 3072
hp["num_heads"] = 12
hp["dropout_rate"] = 0.1

In [4]:
#!unzip "/content/drive/MyDrive/archive podiwije.zip" -d "/content/drive/MyDrive/DataSet/Insects/"

In [5]:
dataset_path = "/content/drive/MyDrive/DataSet/Insects/Reptiles-Insects"

In [6]:
def create_dir(path):
  if not os.path.exists(path):
    os.makedirs(path)

In [7]:
#function for load the data file
def load_data(path, split=0.1):
  images = shuffle(glob(os.path.join(path, "*", "*.jpg")))
  
  split_size = int(len(images) * split)
  #split the data 
  train_x, valid_x = train_test_split(images, test_size=split_size, random_state=42)
  train_x, test_x = train_test_split(train_x, test_size=split_size, random_state=42)

  return train_x, valid_x, test_x

In [8]:
def process_image_label(path):
  path = path.decode()
  #read image
  image = cv2.imread(path, cv2.IMREAD_COLOR)
  #resize the image
  image = cv2.resize(image, (hp['image_size'], hp['image_size']))
  #scale the image
  image = image/255.0
  

  #image into patch
  patch_shape = (hp['patch_size'], hp['patch_size'], hp['num_channel'])
  patches = patchify(image, patch_shape, hp['patch_size'])

  patches = np.reshape(patches, hp['flat_patches_shape'])
  patches = patches.astype(np.float32)
  

  #labeling
  class_name = path.split('/')[-2]
  class_idx = hp['class_names'].index(class_name)
  class_idx = np.array(class_idx, dtype=np.int32)
  

  return patches, class_idx

In [9]:
#we used opencv to read images not tensorflow. so we need to use tf.numpy_function to use these function in tensorflow
def parse(path):
  patches, labels = tf.numpy_function(process_image_label, [path], [tf.float32, tf.int32])
  labels = tf.one_hot(labels, hp['num_classes'])

  patches.set_shape(hp['flat_patches_shape'])
  labels.set_shape(hp['num_classes'])

  return patches, labels

In [10]:
def tf_dataset(images, batch=32):
  dataset = tf.data.Dataset.from_tensor_slices((images))
  dataset = dataset.map(parse).batch(batch).prefetch(8)
  return dataset

In [11]:
train_x, valid_x, test_x = load_data(dataset_path)

In [12]:
print(f"Train: {len(train_x)} Valid: {len(valid_x)} Test: {len(test_x)}")

Train: 680 Valid: 85 Test: 85


In [13]:
train_dataset = tf_dataset(train_x, batch=hp['batch_size'])
valid_dataset = tf_dataset(valid_x, batch=hp['batch_size'])

#Model

##Transformer Encoder

In [14]:
#Configuratin parameters
config = {}
config['num_layers'] = 12
config['hidden_dim'] = 768
config['mlp_dim'] = 3072
config['num_heads'] = 12
config['dropout_rate'] = 0.1
config['num_patches'] = 256
config['patch_size'] = 32
config['num_channels'] = 3
config["num_classes"] = 9

In [15]:
def mlp(inputs, config):
  inputs = Dense(config["mlp_dim"], activation='gelu')(inputs)
  inputs = Dropout(config["dropout_rate"])(inputs)
  inputs = Dense(config["hidden_dim"])(inputs)
  inputs = Dropout(config["dropout_rate"])(inputs)
  return inputs  

In [16]:
def transformer_encoder(inputs, config):
  skip_1 = inputs
  inputs = LayerNormalization()(inputs)
  inputs = MultiHeadAttention(
      num_heads=config["num_heads"],
      key_dim=config["hidden_dim"]
  )(inputs, inputs)
  inputs = Add()([inputs, skip_1])

  skip_2 = inputs
  inputs = LayerNormalization()(inputs)
  inputs = mlp(inputs, config)
  inputs = Add()([inputs, skip_2])

  return inputs

##Embedding

In [17]:
class ClassToken(Layer):
  def __init__(self):
    super().__init__()

  def build(self, input_shape):
    w_init = tf.random_normal_initializer()
    self.w = tf.Variable(
        initial_value = w_init(shape=(1, 1, input_shape[-1]), dtype=tf.float32), 
        trainable = True
    )

  def call(self, inputs):
    batch_size = tf.shape(inputs)[0]
    hidden_dim = self.w.shape[-1]

    #reshape
    cls = tf.broadcast_to(self.w, [batch_size, 1, hidden_dim])
    #change data type
    cls = tf.cast(cls, dtype=inputs.dtype)
    return cls

In [18]:
def ViT(config):
  #input layer
  input_shape = (hp['num_patches'], hp['patch_size']*hp['patch_size']*hp['num_channel'])
  inputs = Input(input_shape)
  
  #patch and position embedding
  patch_embedding = Dense(config['hidden_dim'])(inputs)
  
  positions = tf.range(start=0, limit=hp['num_patches'], delta=1)
  position_embedding = Embedding(input_dim=hp['num_patches'], output_dim=config['hidden_dim'])(positions)
  embed = patch_embedding + position_embedding
  
  #Adding class token
  token = ClassToken()(embed)
  concate = Concatenate(axis=1)([token, embed])
  for _ in range(config["num_layers"]):
    concate = transformer_encoder(concate, config)
  
  #Classification Head
  mlp_head_input = LayerNormalization()(concate)
  mlp_head_input = mlp_head_input[:, 0, :]
  mlp_head_input = Dense(config["num_classes"], activation="softmax")(mlp_head_input)

  model = Model(inputs, mlp_head_input)
  return model

In [19]:
model_path = "/content/drive/MyDrive/CNN_Models/ViT_for_insect_classification.h5"
csv_path = "/content/drive/MyDrive/Model CSV/ViT_for_insect_classification.csv"

In [20]:
model = ViT(hp)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 64, 1875)]   0           []                               
                                                                                                  
 dense (Dense)                  (None, 64, 500)      938000      ['input_1[0][0]']                
                                                                                                  
 tf.__operators__.add (TFOpLamb  (None, 64, 500)     0           ['dense[0][0]']                  
 da)                                                                                              
                                                                                                  
 class_token (ClassToken)       (None, 1, 500)       500         ['tf.__operators__.add[0][0]'

In [21]:
model.compile(
    loss="categorical_crossentropy",
    optimizer=tf.keras.optimizers.Adam(hp["lr"], clipvalue=1.0),
    metrics=['acc']
)

In [22]:
callbacks = [
    ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1,   patience=10, min_lr=1e-10),
    CSVLogger(csv_path),
    EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=False)
]

In [23]:
model.fit(
    train_dataset,
    epochs=hp["num_epochs"],
    validation_data=valid_dataset,
    callbacks=callbacks
)

Epoch 1/100
Epoch 1: val_loss improved from inf to 2.47605, saving model to /content/drive/MyDrive/CNN_Models/ViT_for_insect_classification.h5
Epoch 2/100
Epoch 2: val_loss improved from 2.47605 to 2.34911, saving model to /content/drive/MyDrive/CNN_Models/ViT_for_insect_classification.h5
Epoch 3/100
Epoch 3: val_loss improved from 2.34911 to 2.30035, saving model to /content/drive/MyDrive/CNN_Models/ViT_for_insect_classification.h5
Epoch 4/100
Epoch 4: val_loss improved from 2.30035 to 2.27159, saving model to /content/drive/MyDrive/CNN_Models/ViT_for_insect_classification.h5
Epoch 5/100
Epoch 5: val_loss improved from 2.27159 to 2.23623, saving model to /content/drive/MyDrive/CNN_Models/ViT_for_insect_classification.h5
Epoch 6/100
Epoch 6: val_loss improved from 2.23623 to 2.14624, saving model to /content/drive/MyDrive/CNN_Models/ViT_for_insect_classification.h5
Epoch 7/100
Epoch 7: val_loss improved from 2.14624 to 2.05248, saving model to /content/drive/MyDrive/CNN_Models/ViT_for_

<keras.callbacks.History at 0x7f6610ec9bb0>

#Prediction

In [24]:
test_dataset = tf_dataset(test_x, batch=hp['batch_size'])

In [25]:
model.evaluate(test_dataset)



[3.0083696842193604, 0.364705890417099]