In [1]:
# #In lambdalabs jupyter lab instance, run these:
# pip install transformers
# pip install tf-keras
# pip install --upgrade "numpy<2"
# pip install datasets
# pip install --upgrade datasets pillow
# pip install --upgrade "accelerate>=0.26.0"
# #then check dependency warnings
# pip check
# #if any issues run
# pip install debugpy
# pip install --upgrade argcomplete
# sudo apt-get install python3-cairo

In [4]:
import os
import numpy as np
import subprocess
import tensorflow as tf
import torch
import torchvision.transforms as T
from datasets import load_dataset, Image as DatasetsImage
from functools import partial
from io import BytesIO
from transformers import (
    AutoImageProcessor, 
    AutoModelForImageClassification, 
    EarlyStoppingCallback,
    TrainingArguments, 
    Trainer
)
from torch import nn
from PIL import Image, ImageOps, ExifTags, UnidentifiedImageError

In [6]:
# --------------------------
# 0. GPU Environment Setup for Multi-GPU Optimization (GPUs 0)
# --------------------------
# Limit process to specific GPUs
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
print("Process restricted to GPUs:", os.environ["CUDA_VISIBLE_DEVICES"])

# Ensure pip executables are available
os.environ["PATH"] = f"{os.path.expanduser('~/.local/bin')}:" + os.environ["PATH"]

# Enable memory growth for TensorFlow
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Memory growth enabled on GPUs.")
    except RuntimeError as e:
        print("Error configuring GPUs:", e)
print("GPUs available to this process (as seen by TensorFlow):", tf.config.list_physical_devices('GPU'))

# Optional: Monitor current GPU usage
gpu_usage = subprocess.check_output(["nvidia-smi"]).decode("utf-8")
print("Current GPU usage:\n", gpu_usage)

Process restricted to GPUs: 1
Memory growth enabled on GPUs.
GPUs available to this process (as seen by TensorFlow): [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Current GPU usage:
 Sat Mar 22 18:30:10 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.124.06             Driver Version: 570.124.06     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A6000               On  |   00000000:07:00.0 Off |                  Off |
| 30%   35C    P8             27W /  300W |       5MiB /  49140MiB |      0%      Default |
|                           

In [7]:
# --------------------------
# 1. Reload Pretrained Model and Processor
# --------------------------
checkpoint = "trpakov/vit-face-expression"
processor = AutoImageProcessor.from_pretrained(checkpoint,use_fast=True)
model = AutoModelForImageClassification.from_pretrained(checkpoint)
# Load fine-tuned model weights
model.load_state_dict(torch.load("/home/ubuntu/MLexpressionsStorage/final_model_V1.pth"))
#Puts the model into evaluation model->disables dropout, batch norm to ensure consistent results
model.eval()

KeyboardInterrupt: 

In [7]:
# --------------------------
# 2. Prepare Dataset
# --------------------------
dataset = load_dataset("imagefolder", data_dir="/home/ubuntu/MLexpressionsStorage/img_datasets/combo_ferckja_dataset", split="train")

# Update mapping using lowercase keys
label_mapping = {
    'anger': 'Angry', 'contempt': 'Disgust', 'disgust': 'Disgust',
    'fear': 'Fear', 'happiness': 'Happy', 'sadness': 'Sad',
    'surprise': 'Surprise', 'neutral': 'Neutral'
}

# Numerical mapping for the pre-trained model's labels.
num_mapping = {
    'Angry': 0, 'Disgust': 1, 'Fear': 2, 'Happy': 3,
    'Sad': 4, 'Surprise': 5, 'Neutral': 6
}

def reconcile_labels(example):
    # If the label is already an integer, convert it to a string using the dataset features.
    if isinstance(example["label"], int):
        # Use dataset.features["label"].int2str to get the string label.
        original_label = dataset.features["label"].int2str(example["label"]).strip().lower()
    else:
        original_label = example["label"].strip().lower()
    
    # Map the lowercased label to the pre-trained model's expected label.
    pretrain_label = label_mapping.get(original_label)
    
    if pretrain_label is None:
        # If not recognized, mark it for filtering.
        example["label"] = -1
    else:
        # Convert the mapped label to its corresponding integer.
        example["label"] = num_mapping[pretrain_label]
    return example

# Apply reconciliation function to dataset.
dataset = dataset.map(reconcile_labels)
# Filter out any examples that were marked as unrecognized.
dataset = dataset.filter(lambda x: x["label"] != -1)
print("Total examples after filtering:", len(dataset))

Map: 100%|██████████| 37081/37081 [00:03<00:00, 10619.98 examples/s]
Filter: 100%|██████████| 37081/37081 [00:47<00:00, 788.52 examples/s]

Total examples after filtering: 37081





In [8]:
# --------------------------
# 3. Define Data Augmentation and Preprocessing Transformation
# --------------------------

# Use torchvision transforms for lightweight CPU-based augmentation.
data_augment = T.Compose([
    T.RandomHorizontalFlip(),                # Random horizontal flip
    T.RandomRotation(10),                      # Random rotation within ±10 degrees
    T.ColorJitter(brightness=0.1, contrast=0.1)  # Slight brightness and contrast changes
])

def transform_function(example, processor):
    # Ensure the image is loaded as a PIL image.
    if not isinstance(example["image"], Image.Image):
        example["image"] = Image.open(example["image"])
    
    # Convert image to RGB mode if it isn't already.
    if example["image"].mode != "RGB":
        example["image"] = example["image"].convert("RGB")
    
    # Apply data augmentation.
    augmented_image = data_augment(example["image"])
    
    # Process the augmented image using the pre-trained processor.
    inputs = processor(augmented_image, return_tensors="pt")
    inputs = {k: v.squeeze(0) for k, v in inputs.items()}
    
    # Add the label (ensure the label is in the proper format, e.g. integer).
    inputs["labels"] = example["label"]
    return inputs

# Map the transformation to every example in the dataset.
dataset = dataset.map(partial(transform_function, processor=processor))

Map: 100%|██████████| 37081/37081 [05:25<00:00, 113.84 examples/s]


In [9]:
# --------------------------
# 4. Train-Validation Split
# --------------------------
split_dataset = dataset.train_test_split(test_size=0.2)
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]

In [10]:
# --------------------------
# 5. Define Training Arguments for Robust Fine-Tuning
# --------------------------
training_args = TrainingArguments(
    output_dir="./finetuned_vit_model",    # Directory to save checkpoints and the final model
    eval_strategy="epoch",           # Evaluate at the end of each epoch
    save_strategy="epoch",                 # Save checkpoint at each epoch
    learning_rate=5e-5,                    # A conservative learning rate for fine-tuning
    per_device_train_batch_size=8,         # Adjust based on your CPU memory limits
    per_device_eval_batch_size=8,
    num_train_epochs=5,                    # Fine-tune for a few epochs (adjust as needed)
    load_best_model_at_end=True,           # Automatically load the best model when training finishes
    metric_for_best_model="accuracy",      # Monitor accuracy for best model selection
    logging_dir="./logs",                  # Directory for TensorBoard logs
)

In [11]:
# --------------------------
# 6. Define a Compute Metrics Function for Evaluation
# --------------------------
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = (predictions == labels).mean()
    return {"accuracy": accuracy}

In [12]:
# --------------------------
# 7. Initialize and Run Trainer for Fine-Tuning
# --------------------------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

# Fine-tune model
trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy
1,0.4672,0.400362,0.861669
2,0.1554,0.428464,0.870568
3,0.0464,0.535104,0.866388
4,0.0148,0.584571,0.869085
5,0.0049,0.585619,0.872994




TrainOutput(global_step=4635, training_loss=0.13368147416472306, metrics={'train_runtime': 14035.5202, 'train_samples_per_second': 10.567, 'train_steps_per_second': 0.33, 'total_flos': 1.1494126967676273e+19, 'train_loss': 0.13368147416472306, 'epoch': 5.0})

In [13]:
# --------------------------
# 8. Save Final Independent Model
# --------------------------
torch.save(model.state_dict(), '/home/ubuntu/MLexpressionsStorage/final_model_V4.pth')
model.save_pretrained("/home/ubuntu/MLexpressionsStorage/vit_final_independent_V4")
processor.save_pretrained("/home/ubuntu/MLexpressionsStorage/vit_final_independent_V4")

['/home/ubuntu/MLexpressionsStorage/vit_final_independent_V4/preprocessor_config.json']

In [14]:
# ----------------------------------------------------------------------
# 9. (Optional) Monitor GPU Usage
# ----------------------------------------------------------------------
import subprocess
gpu_usage = subprocess.check_output(["nvidia-smi"]).decode("utf-8")
print("Current GPU usage:\n", gpu_usage)

Current GPU usage:
 Fri Mar 21 22:51:48 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.124.06             Driver Version: 570.124.06     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla V100-SXM2-16GB           On  |   00000000:07:00.0 Off |                  Off |
| N/A   45C    P0             63W /  300W |    5670MiB /  16384MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla V100-SXM2-16GB 