# HUGGING FACE TRANSFORMER FINE TUNING

In [1]:
# ! pip install transformers
# ! pip install datasets

In [2]:
import os
import cv2
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.layers import Input, Normalization, Conv2D, MaxPooling2D, Dense, Flatten, BatchNormalization, Dropout
from tensorflow.keras.metrics import BinaryAccuracy, FalsePositives, FalseNegatives, TrueNegatives, TruePositives, Precision, Recall, F1Score, AUC
from tensorflow.keras.regularizers import L2
import sklearn
from transformers import ViTFeatureExtractor, TFViTModel
import wandb
from wandb.integration.keras import WandbMetricsLogger, WandbModelCheckpoint

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
wandb.init(name = "Hugging Face Transformer", project="Emotion-Detection", entity="amanjn2003-santa-clara-university", settings=wandb.Settings(init_timeout=300))

[34m[1mwandb[0m: Currently logged in as: [33mamanjn2003[0m ([33mamanjn2003-santa-clara-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [4]:
wandb.config = {
    "CLASS_NAMES" : ['angry', 'happy', 'sad'],
    "BATCH_SIZE" : 32,
    "IMAGE_SIZE" : 224,
    "LEARNING_RATE" : 5e-5, # Very low LR for Fine-Tuning
    "N_EPOCHS" : 20,
    "DROPOUT_RATE": 0.0,
    "REGULARIZATION_RATE" : 0.0,
    "N_FILTERS" : 6,
    "KERNEL_SIZE" : 3,
    "N_STRIDES" : 1,
    "POOL_SIZE" : 2,
    "N_DENSE_1" : 128,
    "N_DENSE_2" : 128,
    "NUM_CLASSES" : 3,
    "PATCH_SIZE" : 16,
}

CONFIGURATION = wandb.config

lossFunction = tf.keras.losses.CategoricalCrossentropy() # If Labels as One-Hot-Encodings
METRICS = [tf.keras.metrics.CategoricalAccuracy(name="accuracy"), tf.keras.metrics.TopKCategoricalAccuracy(k=2, name="top_k_accuracy")]

trainDirectory = "/Users/aman/Documents/Work/Machine Learning/Computer-Vision-TensorFlow/Human-Emotions-Detection/Dataset/Emotions Dataset/Emotions Dataset/train"
testDirectory = "/Users/aman/Documents/Work/Machine Learning/Computer-Vision-TensorFlow/Human-Emotions-Detection/Dataset/Emotions Dataset/Emotions Dataset/test"

trainDataset = tf.keras.utils.image_dataset_from_directory(
    trainDirectory,
    labels='inferred',
    label_mode='categorical',
    class_names=CONFIGURATION["CLASS_NAMES"],
    color_mode='rgb',
    batch_size=CONFIGURATION["BATCH_SIZE"],
    image_size=(CONFIGURATION["IMAGE_SIZE"], CONFIGURATION["IMAGE_SIZE"]),
    shuffle=True,
    seed=99,
    validation_split=0.2,
    subset='training',
)

valDataset = tf.keras.utils.image_dataset_from_directory(
    trainDirectory,
    labels='inferred',
    label_mode='categorical',
    class_names=CONFIGURATION["CLASS_NAMES"],
    color_mode='rgb',
    batch_size=CONFIGURATION["BATCH_SIZE"],
    image_size=(CONFIGURATION["IMAGE_SIZE"], CONFIGURATION["IMAGE_SIZE"]),
    shuffle=True,
    seed=99,
    validation_split=0.2,
    subset='validation',
)

testDataset = tf.keras.utils.image_dataset_from_directory(
    testDirectory,
    labels='inferred',
    label_mode='categorical',
    class_names=CONFIGURATION["CLASS_NAMES"],
    color_mode='rgb',
    batch_size=CONFIGURATION["BATCH_SIZE"],
    image_size=(CONFIGURATION["IMAGE_SIZE"], CONFIGURATION["IMAGE_SIZE"]),
    shuffle=True,
    seed=99,
    validation_split=None,
    subset=None,
)

trainDataset = trainDataset.prefetch(tf.data.AUTOTUNE)
testDataset = testDataset.prefetch(tf.data.AUTOTUNE)
valDataset = valDataset.prefetch(tf.data.AUTOTUNE)

Found 6799 files belonging to 3 classes.
Using 5440 files for training.


2025-02-01 23:17:20.310354: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-02-01 23:17:20.310381: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-02-01 23:17:20.310388: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2025-02-01 23:17:20.310418: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-02-01 23:17:20.310433: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Found 6799 files belonging to 3 classes.
Using 1359 files for validation.
Found 2278 files belonging to 3 classes.


In [5]:
resizeRescaleHuggingFace = tf.keras.Sequential([
    tf.keras.layers.Resizing(224, 224, interpolation='bilinear', name='resize'),
    tf.keras.layers.Rescaling(1./255, name='rescale'),
    tf.keras.layers.Permute((3, 1, 2), name='permute')
])

## Loading Base Pre-Trained Model & Adding CLassifier Layers to it

In [6]:
baseModel= TFViTModel.from_pretrained("google/vit-base-patch16-224-in21k") # Feature Extractor

input = tf.keras.layers.Input(shape=(224, 224, 3), name="image")
x = resizeRescaleHuggingFace(input)
x = baseModel.vit(x)[0][:, 0, :]

output = tf.keras.layers.Dense(CONFIGURATION["NUM_CLASSES"], activation="softmax")(x)

HFModel = tf.keras.Model(input, output)

All PyTorch model weights were used when initializing TFViTModel.

All the weights of TFViTModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTModel for predictions without further training.


In [7]:
testImage = cv2.imread("/Users/aman/Documents/Work/Machine Learning/Computer-Vision-TensorFlow/Human-Emotions-Detection/Dataset/Emotions Dataset/Emotions Dataset/test/happy/2705.jpg_rotation_1.jpg")
testImage = cv2.resize(testImage, (CONFIGURATION["IMAGE_SIZE"], CONFIGURATION["IMAGE_SIZE"]))

HFModel.predict(tf.expand_dims(testImage, axis=0))

2025-02-01 23:17:23.675469: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.




array([[0.4328826 , 0.2650262 , 0.30209118]], dtype=float32)

In [8]:
HFModel.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image (InputLayer)          [(None, 224, 224, 3)]     0         
                                                                 
 sequential (Sequential)     (None, 3, 224, 224)       0         
                                                                 
 vit (TFViTMainLayer)        TFBaseModelOutputWithPo   86389248  
                             oling(last_hidden_state             
                             =(None, 197, 768),                  
                              pooler_output=(None, 7             
                             68),                                
                              hidden_states=None, at             
                             tentions=None)                      
                                                                 
 tf.__operators__.getitem (  (None, 768)               0     

In [9]:
HFModel.compile(
    optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=CONFIGURATION["LEARNING_RATE"]),
    loss=lossFunction,
    metrics=METRICS
)

In [10]:
class logConfusionMatrix(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        y_true = []
        y_pred = []
        for x, y in valDataset:
            y_true.extend(np.argmax(y, axis=1))
            y_pred.extend(np.argmax(self.model.predict(x), axis=1))
        wandb.log({"Confusion Matrix": wandb.plot.confusion_matrix(y_true=y_true,
                                                                    preds=y_pred,
                                                                    class_names=CONFIGURATION["CLASS_NAMES"])})

In [11]:
class logPredictionTable(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        columns = ['Image', 'Label', 'Prediction']
        table = wandb.Table(columns=columns)
        for batch in valDataset.take(1):
            for image, label in zip(*batch):
                im = wandb.Image(image)
                y_pred= np.argmax(self.model.predict(tf.expand_dims(image, axis=0)), axis=1)[0]
                row = [im, CONFIGURATION['CLASS_NAMES'][np.argmax(label)], CONFIGURATION['CLASS_NAMES'][y_pred]]
                table.add_data(*row)
        wandb.log({"Predictions": table})

In [12]:
history = HFModel.fit(trainDataset,
                  validation_data = valDataset, 
                  epochs=CONFIGURATION["N_EPOCHS"],
                  verbose=1,
                  callbacks = [WandbMetricsLogger(), WandbModelCheckpoint("Models/EmotionDetectionViT.keras"), logConfusionMatrix(), logPredictionTable()]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [13]:
HFModel.evaluate(testDataset)



[0.5522751808166504, 0.8748902678489685, 0.9679543375968933]

In [14]:
wandb.run

In [15]:
wandb.finish()

0,1
epoch/accuracy,▁▅▇▇▇███████████████
epoch/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
epoch/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/top_k_accuracy,▁▅▇▇████████████████
epoch/val_accuracy,▁▆▆▇▄▅▇▅▇▅▆▆▆▆▆▇▇▆█▇
epoch/val_loss,▄▁▂▂▄▅▅▆▆█▆█▇▇▅▅▆▆▆▆
epoch/val_top_k_accuracy,▁▅█▇▄▆▄▆▅▂▅▆▅▄▅███▇▇

0,1
epoch/accuracy,0.97978
epoch/epoch,19.0
epoch/learning_rate,5e-05
epoch/loss,0.03924
epoch/top_k_accuracy,0.99871
epoch/val_accuracy,0.88595
epoch/val_loss,0.49485
epoch/val_top_k_accuracy,0.97057
