<a href="https://colab.research.google.com/github/Nobel712/Deep-learning-project/blob/main/ViT_flower_clf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import numpy as np
import cv2 
from glob import glob
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from patchify import patchify
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint,CSVLogger,ReduceLROnPlateau,EarlyStopping
from tensorflow.keras.layers import*
from tensorflow.keras.models import Model

In [2]:

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!cd /content/drive/MyDrive/Deep_Db/
!tar -xvzf flower_photos.tgz

tar (child): flower_photos.tgz: Cannot open: No such file or directory
tar (child): Error is not recoverable: exiting now
tar: Child returned status 2
tar: Error is not recoverable: exiting now


Cofigure

In [4]:
hp = {}
hp["image_size"] = 200
hp["num_channels"] = 3
hp["patch_size"] = 25
hp["num_patches"] = (hp["image_size"]**2) // (hp["patch_size"]**2)
hp["flat_patches_shape"] = (hp["num_patches"], hp["patch_size"]*hp["patch_size"]*hp["num_channels"])

hp["batch_size"] = 32
hp["lr"] = 1e-4
hp["num_epochs"] = 500
hp["num_classes"] = 5
hp["class_names"] = ["daisy", "dandelion", "roses", "sunflowers", "tulips"]

hp["num_layers"] = 12
hp["hidden_dim"] = 768
hp["mlp_dim"] = 3072
hp["num_heads"] = 12
hp["dropout_rate"] = 0.1
hp['num_heads']=12

VIT 

In [5]:
class ClassToken(Layer):
  def __init__(self):
    super().__init__()
  def build(self,input_shape):
    w_init=tf.random_normal_initializer()
    self.w=tf.Variable(initial_value=w_init(shape=(1,1,input_shape[-1]),dtype=tf.float32),
                       trainable=True)
  def call(self,inputs):
    batch_size=tf.shape(inputs)[0]
    hidden_dim=self.w.shape[-1]
    cls=tf.broadcast_to(self.w,[batch_size,1,hidden_dim])
    cls=tf.cast(cls,dtype=inputs.dtype)
    return cls

In [6]:
def mlp(x,cf):
  x=Dense(cf['mlp_dim'],activation='gelu')(x)
  x=Dropout(cf['dropout_rate'])(x)
  x=Dense(cf['hidden_dim'])(x)
  return x

In [7]:
def transformer_encoder(x ,cf):
  skip_1=x
  x=LayerNormalization()(x)
  x=MultiHeadAttention(
      num_heads=cf['num_heads'],key_dim=cf['hidden_dim']
  )(x,x)
  x=Add()([x,skip_1])
  skip_2=x
  x=LayerNormalization()(x)
  x=mlp(x,cf)
  x=Add()([x,skip_2])
  return x

In [8]:
def ViT(cf):
  input_shape=(cf['num_patches'],cf['patch_size']*cf['patch_size']*cf['num_channels'])
  inputs=Input(input_shape)
  patch_embed=Dense(cf['hidden_dim'])(inputs)
  positions=tf.range(start=0,limit=cf['num_patches'],delta=1)
  pos_embed=Embedding(input_dim=cf['num_patches'],output_dim=cf['hidden_dim'])(positions)
  embed=patch_embed+pos_embed ##(none,256,768)

  #adding class token
  token=ClassToken()(embed)
  x=Concatenate(axis=1)([token,embed]) ##(None, 257, 768)
  for _ in range(cf['num_layers']):
    x=transformer_encoder(x,cf) ##(None, 257, 768)
  x=LayerNormalization()(x)
  x=x[:,0,:]##(None, 768)
  x=Dense(cf['num_classes'],activation='softmax')(x)
  model=Model(inputs,x)
  return model

data load and processing

In [9]:

def creat_dir(path):
  if not os.path.exists(path):
    os.makedirs(path)

In [10]:
def load_data(path,split=0.1):
  images=shuffle(glob(os.path.join(path,'*',"*jpg")))
  split_size=int(len(images)*split)
  xtrain,xvalid=train_test_split(images,test_size=split_size,random_state=42)
  xtrain,xtest=train_test_split(xtrain,test_size=.1,random_state=42)
  return xtrain,xvalid,xtest


In [11]:
def process_image_label(path):
    """ Reading images """
    path = path.decode()
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (hp["image_size"], hp["image_size"]))
    image = image/255.0

    """ Preprocessing to patches """
    patch_shape = (hp["patch_size"], hp["patch_size"], hp["num_channels"])
    patches = patchify(image, patch_shape, hp["patch_size"])

    # patches = np.reshape(patches, (64, 25, 25, 3))
    # for i in range(64):
    #     cv2.imwrite(f"files/{i}.png", patches[i])

    patches = np.reshape(patches, hp["flat_patches_shape"])
    patches = patches.astype(np.float32)

    """ Label """
    class_name = path.split("/")[-2]
    class_idx = hp["class_names"].index(class_name)
    class_idx = np.array(class_idx, dtype=np.int32)

    return patches, class_idx


In [12]:
def parse(path):
    patches, labels = tf.numpy_function(process_image_label, [path], [tf.float32, tf.int32])
    labels = tf.one_hot(labels, hp["num_classes"])

    patches.set_shape(hp["flat_patches_shape"])
    labels.set_shape(hp["num_classes"])

    return patches, labels

def tf_dataset(images, batch=32):
    ds = tf.data.Dataset.from_tensor_slices((images))
    ds = ds.map(parse).batch(batch).prefetch(8)
    return ds

In [13]:
if __name__=='__main__':
  np.random.seed(42)
  tf.random.set_seed(42)
  creat_dir('files')
  dataset_path='/content/drive/MyDrive/Deep_Db/flower_photos'
  model_path=os.path.join('files','model.h5')
  csv_path=os.path.join('files','log.csv')
  xtrain,xvalid,xtest=load_data(dataset_path)
  train_ds=tf_dataset(xtrain,batch=hp['batch_size'])
  valid_ds=tf_dataset(xvalid,batch=hp['batch_size'])
  

In [14]:
model=ViT(hp)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 64, 1875)]   0           []                               
                                                                                                  
 dense (Dense)                  (None, 64, 768)      1440768     ['input_1[0][0]']                
                                                                                                  
 tf.__operators__.add (TFOpLamb  (None, 64, 768)     0           ['dense[0][0]']                  
 da)                                                                                              
                                                                                                  
 class_token (ClassToken)       (None, 1, 768)       768         ['tf.__operators__.add[0][0]'

In [15]:
model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(hp['lr'],clipvalue=1.0),
    metrics=['acc']
)
callbacks=[
    ModelCheckpoint(model_path,monitor='val_loss',verbose=1,save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=10,min_lr=1e-4),
    CSVLogger(csv_path),
    EarlyStopping(monitor='val_loss',patience=50,restore_best_weights=False)
]

In [None]:
model.fit(train_ds,epochs=10,validation_data=valid_ds,callbacks=callbacks)

Epoch 1/10
 1/93 [..............................] - ETA: 4:19:27 - loss: 2.3541 - acc: 0.0625