In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
import tensorflow_probability as tfp
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, InputLayer,BatchNormalization,Input,Layer,Dropout,Resizing,Rescaling,RandomRotation,RandomFlip,RandomContrast,Embedding,LayerNormalization,MultiHeadAttention,Add,Permute
from tensorflow.keras.models import Model
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy,CategoricalCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy , FalsePositives,FalseNegatives,TruePositives,TrueNegatives,Precision,Recall,AUC,CategoricalAccuracy,TopKCategoricalAccuracy
from tensorflow.keras.callbacks import Callback,CSVLogger,EarlyStopping,ModelCheckpoint,ReduceLROnPlateau,TensorBoard
from tensorflow.keras.regularizers import L1,L2
from tensorflow.image import flip_left_right,random_flip_up_down,rot90,adjust_brightness,random_saturation,central_crop,adjust_saturation,crop_to_bounding_box,pad_to_bounding_box
from tensorflow.train import BytesList,FloatList,Int64List,Example,Features,Feature
import sklearn as sl
from sklearn.metrics import confusion_matrix,roc_curve
import seaborn as sns
import numpy as np
import cv2
from google.colab import drive

In [None]:
!pip install -q kaggle

In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/

In [None]:
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d muhammadhananasghar/human-emotions-datasethes

Downloading human-emotions-datasethes.zip to /content
 97% 299M/309M [00:02<00:00, 120MB/s] 
100% 309M/309M [00:02<00:00, 115MB/s]


In [None]:
!unzip "/content/human-emotions-datasethes.zip" -d "/content/dataset/"

In [None]:
train_dic = "/content/dataset/Emotions Dataset/Emotions Dataset/train"
val_dic = "/content/dataset/Emotions Dataset/Emotions Dataset/test"
CONFIGURATION = {
    "BATCH_SIZE": 32,
    "IMG_SIZE": 256,
    "LEARNING_RATE": 1e-3,

    "N_EPOCHS": 20,
    "DROPOUT_RATE": 0.0,
    "REGULARIZATION_RATE": 0.0,
    "N_FILTERS": 6,
    "KERNEL_SIZE": 3,
    "N_STRIDES": 1,
    "POOL_SIZE": 2,
    "N_DENSE_1": 1024,
    "N_DENSE_2": 128,
    "NUM_CLASSES": 3,
    "PATCH_SIZE": 16,
    "PROJ_DIM": 768,
    "CLASS_NAMES": ["angry", "happy", "sad"],
}

In [None]:
train_data = tf.keras.utils.image_dataset_from_directory(
    train_dic,
    labels='inferred',
    label_mode='categorical',
    class_names=CONFIGURATION["CLASS_NAMES"],
    color_mode='rgb',
    batch_size=CONFIGURATION["BATCH_SIZE"],
    image_size=(CONFIGURATION["IMG_SIZE"], CONFIGURATION["IMG_SIZE"]),
    shuffle=True,
    seed=99,
)

Found 6799 files belonging to 3 classes.


In [None]:
val_data = tf.keras.utils.image_dataset_from_directory(
    val_dic,
    labels='inferred',
    label_mode='categorical',
    class_names=CONFIGURATION["CLASS_NAMES"],
    color_mode='rgb',
    batch_size=CONFIGURATION["BATCH_SIZE"],
    image_size=(CONFIGURATION["IMG_SIZE"], CONFIGURATION["IMG_SIZE"]),
    shuffle=True,
    seed=99,
)

Found 2278 files belonging to 3 classes.


In [None]:
train_data = (
    train_data.prefetch(tf.data.AUTOTUNE)
)

In [None]:
val_data = (
    val_data
    .prefetch(tf.data.AUTOTUNE)
)

In [None]:
class PatchEncoder(Layer):
  def __init__ (self,N_PATCHES,HIDDEN_SIZE):
    super(PatchEncoder,self).__init__(name= "Patch_Encoder")
    self.liner_projection = Dense(HIDDEN_SIZE)
    self.Embedding_projection = Embedding(N_PATCHES,HIDDEN_SIZE)
    self.N_PATCHES = N_PATCHES
  def call(self,x):
    patches = tf.image.extract_patches(
              images=x,
              sizes = [1,CONFIGURATION["PATCH_SIZE"],CONFIGURATION["PATCH_SIZE"],1],
              strides = [1,CONFIGURATION["PATCH_SIZE"],CONFIGURATION["PATCH_SIZE"],1],
              rates = [1,1,1,1],
              padding='VALID')
    patches = tf.reshape(patches,(tf.shape(patches)[0],-1,patches.shape[-1]))
    embadding_input = tf.range(0,self.N_PATCHES,1)
    output = self.liner_projection(patches) + self.Embedding_projection(embadding_input)
    return output

In [None]:
class TransformerEncoder(Layer):
  def __init__ (self,N_HEADS,HIDDEN_SIZE):
    super(TransformerEncoder,self).__init__(name="Transfomer_Encoder")

    self.layer_norm_1 = LayerNormalization()
    self.layer_norm_2 = LayerNormalization()

    self.mult_head_att = MultiHeadAttention(N_HEADS,HIDDEN_SIZE)

    self.dense_1 = Dense(HIDDEN_SIZE,activation = tf.nn.gelu)
    self.dense_2 = Dense(HIDDEN_SIZE,activation = tf.nn.gelu)
  def call(self,input):
    x_1 = self.layer_norm_1(input)
    x_1 = self.mult_head_att(x_1,x_1)

    x_1 = Add()([x_1,input])

    x_1 = self.layer_norm_2(x_1)

    x_2 = self.dense_1(x_1)
    x_2 = self.dense_2(x_2)
    output = Add()([x_2,x_1])

    return output


In [None]:
class ViT(Model):
  def __init__(self,N_HEADS,HIDDEN_SIZE,N_PATCHES,N_LAYERS,N_DENSE_UNITS):
    super(ViT,self).__init__(name="ViT")
    self.patch_encoder = PatchEncoder(N_PATCHES,HIDDEN_SIZE)
    self.trans_encoder = [TransformerEncoder(N_HEADS,HIDDEN_SIZE) for _ in range(N_LAYERS)]
    self.N_LAYERS = N_LAYERS
    self.dense_1 = Dense(N_DENSE_UNITS,activation =tf.nn.gelu )
    self.dense_2 = Dense(N_DENSE_UNITS,activation =tf.nn.gelu )
    self.dense_3 = Dense(CONFIGURATION["NUM_CLASSES"], activation = 'softmax')
  def call(self,input,training = True):

    x = self.patch_encoder(input)

    for i in range(self.N_LAYERS):
      x = self.trans_encoder[i](x)

    x = Flatten()(x)
    x = self.dense_1(x)
    x = self.dense_2(x)

    return self.dense_3(x)

In [None]:
vit  = ViT(N_HEADS = 8,HIDDEN_SIZE = 768,N_PATCHES = 256,N_LAYERS = 4 ,N_DENSE_UNITS=1024)
vit(tf.zeros([32,256,256,3]))

<tf.Tensor: shape=(32, 3), dtype=float32, numpy=
array([[0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
       [0.45988318, 0.3327576 , 0.20735922],
      

In [None]:
vit.summary()

Model: "ViT"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Patch_Encoder (PatchEncode  multiple                  787200    
 r)                                                              
                                                                 
 Transfomer_Encoder (Transf  multiple                  20077824  
 ormerEncoder)                                                   
                                                                 
 Transfomer_Encoder (Transf  multiple                  20077824  
 ormerEncoder)                                                   
                                                                 
 Transfomer_Encoder (Transf  multiple                  20077824  
 ormerEncoder)                                                   
                                                                 
 Transfomer_Encoder (Transf  multiple                  20077824

In [None]:
vit.compile(
    optimizer = Adam(learning_rate=CONFIGURATION["LEARNING_RATE"]),
    loss = CategoricalCrossentropy(),
    metrics = 'accuracy'
)

In [None]:
history = vit.fit(
    train_data,
    epochs = CONFIGURATION["N_EPOCHS"],
    verbose = 1
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20

In [None]:
!pip install transformers

In [None]:
resize_resacle_hf_vit = Sequential([
    Resizing(224,224),
    Rescaling(1./255),
    Permute((3,1,2))
])

In [None]:
from transformers import AutoImageProcessor, TFViTModel

base_model = TFViTModel.from_pretrained("google/vit-base-patch16-224-in21k")

inputs = Input(shape= (256,256,3))
x = resize_resacle_hf_vit(inputs)
x = base_model.vit(x)[0][:,0,:]
output = Dense(CONFIGURATION["NUM_CLASSES"], activation = 'softmax')(x)
hf_vit_model = tf.keras.Model(inputs = inputs,outputs = output)

In [None]:
hf_vit_model.summary()

In [None]:
hf_vit_model.compile(
    optimizer = Adam(learning_rate=5e-5),
    loss = CategoricalCrossentropy(),
    metrics = 'accuracy'
)

In [None]:
history = hf_vit_model.fit(
    train_data,
    epochs = 10,
    verbose = 1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
