In [1]:
# #In lambdalabs jupyter lab instance, run these:
# pip install transformers
# pip install seaborn
# pip install tf-keras
# pip install --upgrade "numpy<2"
# pip install datasets
# pip install --upgrade datasets pillow
# pip install --upgrade "accelerate>=0.26.0"
# #then check dependency warnings
# pip check
# #if any issues run these SEPARATELY!
# pip install debugpy
# pip install --upgrade argcomplete
# # then install these
# sudo apt-get update
# sudo apt-get install python3-cairo

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os
import seaborn as sns
import subprocess
import tensorflow as tf
import torch
import torchvision.transforms as T
from collections import Counter
from datasets import load_dataset, Image as DatasetsImage
from datasets import Dataset, concatenate_datasets
from datetime import datetime
from functools import partial
from io import BytesIO
from pathlib import Path
from PIL import Image, ImageOps, ExifTags, UnidentifiedImageError
from sklearn.metrics import confusion_matrix, classification_report
from torch import nn
from torch.nn import functional as F
from torchvision.transforms import ToPILImage
from tqdm import tqdm
from transformers import (
    AutoImageProcessor, 
    AutoModelForImageClassification, 
    EarlyStoppingCallback,
    TrainingArguments, 
    Trainer
)

2025-03-25 21:13:46.539043: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-25 21:13:46.559725: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742937226.579072   28305 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742937226.585333   28305 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1742937226.603387   28305 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [3]:
# --------------------------
# GPU Environment Setup for Multi-GPU Optimization (GPUs 0-n)
# --------------------------
# Limit process to specific GPUs
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" #0, 1, 2, 3,...n
print("Process restricted to GPUs:", os.environ["CUDA_VISIBLE_DEVICES"])

# Ensure pip executables are available
os.environ["PATH"] = f"{os.path.expanduser('~/.local/bin')}:" + os.environ["PATH"]

# Enable memory growth for TensorFlow
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Memory growth enabled on GPUs.")
    except RuntimeError as e:
        print("Error configuring GPUs:", e)
print("GPUs available to this process (as seen by TensorFlow):", tf.config.list_physical_devices('GPU'))

# Optional: Monitor current GPU usage
gpu_usage = subprocess.check_output(["nvidia-smi"]).decode("utf-8")
print("Current GPU usage:\n", gpu_usage)

Process restricted to GPUs: 0,1
Memory growth enabled on GPUs.
GPUs available to this process (as seen by TensorFlow): [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
Current GPU usage:
 Tue Mar 25 21:13:59 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.124.06             Driver Version: 570.124.06     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla V100-SXM2-16GB           On  |   00000000:07:00.0 Off |                    0 |
| N/A   35C    P0             42W /  300W |       4M

In [4]:
# --------------------------
# 0. Global Configuration
# --------------------------
RUN_INFERENCE = True  # Toggle this off to disable running inference
IMAGE_DIR = "/home/ubuntu/MLexpressionsStorage/img_datasets/combo_ferckja_dataset"

In [5]:
# --------------------------
# 1. Load Pretrained Model and Processor
# --------------------------
model_path = "/home/ubuntu/MLexpressionsStorage/vit_final_independent_V4"

# Load the full model with its config and architecture
model = AutoModelForImageClassification.from_pretrained(model_path)
processor = AutoImageProcessor.from_pretrained(model_path)  

# Set model to eval mode
model.eval()

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermed

In [6]:
# --------------------------
# 2. Load and Prepare Dataset
# --------------------------
dataset = load_dataset("imagefolder", data_dir="/home/ubuntu/MLexpressionsStorage/img_datasets/combo_ferckja_dataset", split="train")

# Update mapping using lowercase keys
label_mapping = {
    'anger': 'Angry', 'contempt': 'Disgust', 'disgust': 'Disgust',
    'fear': 'Fear', 'happiness': 'Happy', 'sadness': 'Sad',
    'surprise': 'Surprise', 'neutral': 'Neutral'
}

# Numerical mapping for the pre-trained model's labels.
num_mapping = {
    'Angry': 0, 'Disgust': 1, 'Fear': 2, 'Happy': 3,
    'Sad': 4, 'Surprise': 5, 'Neutral': 6
}

def reconcile_labels(example):
    # If the label is already an integer, convert it to a string using the dataset features.
    if isinstance(example["label"], int):
        # Use dataset.features["label"].int2str to get the string label.
        original_label = dataset.features["label"].int2str(example["label"]).strip().lower()
    else:
        original_label = example["label"].strip().lower()
    
    # Map the lowercased label to the pre-trained model's expected label.
    pretrain_label = label_mapping.get(original_label)
    
    if pretrain_label is None:
        # If not recognized, mark it for filtering.
        example["label"] = -1
    else:
        # Convert the mapped label to its corresponding integer.
        example["label"] = num_mapping[pretrain_label]
    return example

# Apply reconciliation function to dataset.
dataset = dataset.map(reconcile_labels)
# Filter out any examples that were marked as unrecognized.
dataset = dataset.filter(lambda x: x["label"] != -1)
print("Total examples after filtering:", len(dataset))

Total examples after filtering: 37081


In [7]:
# --------------------------
# 3. Define Data Augmentation and Preprocessing Transformation
# --------------------------

# Use torchvision transforms for lightweight CPU-based augmentation.
data_augment = T.Compose([
    T.RandomHorizontalFlip(),                # Random horizontal flip
    T.RandomRotation(10),                      # Random rotation within ±10 degrees
    T.ColorJitter(brightness=0.1, contrast=0.1)  # Slight brightness and contrast changes
])

def transform_function(example, processor):
    label = example["label"]

    # Heavier augmentation for rare classes (e.g. Disgust)
    if label == 1:  # Disgust (minority class)
        aug_pipeline = T.Compose([
            T.RandomResizedCrop(224, scale=(0.7, 1.0)),
            T.RandomHorizontalFlip(p=0.7),
            T.RandomRotation(20),
            T.ColorJitter(0.3, 0.3, 0.3, 0.1),
            T.RandomGrayscale(p=0.2)
        ])
    else:
        aug_pipeline = data_augment

    if example["image"].mode != "RGB":
        example["image"] = example["image"].convert("RGB")

    augmented_image = aug_pipeline(example["image"])
    inputs = processor(augmented_image, return_tensors="pt")
    inputs = {k: v.squeeze(0) for k, v in inputs.items()}
    inputs["labels"] = example["label"]
    return inputs

# Map the transformation to every example in the dataset.
dataset = dataset.map(partial(transform_function, processor=processor))

# def transform_function(example, processor):
#     # Ensure the image is loaded as a PIL image.
#     if not isinstance(example["image"], Image.Image):
#         example["image"] = Image.open(example["image"])
    
#     # Convert image to RGB mode if it isn't already.
#     if example["image"].mode != "RGB":
#         example["image"] = example["image"].convert("RGB")
    
#     # Apply data augmentation.
#     augmented_image = data_augment(example["image"])
    
#     # Process the augmented image using the pre-trained processor.
#     inputs = processor(augmented_image, return_tensors="pt")
#     inputs = {k: v.squeeze(0) for k, v in inputs.items()}
    
#     # Add the label (ensure the label is in the proper format, e.g. integer).
#     inputs["labels"] = example["label"]
#     return inputs

In [8]:
# --------------------------
# 4. Train-Validation Split
# --------------------------
split_dataset = dataset.train_test_split(test_size=0.2)
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]

In [9]:
# --------------------------
# 4 Oversample Underrepresented Classes
# --------------------------
def oversample_dataset(dataset):
    label_counts = Counter(dataset['label'])
    max_count = max(label_counts.values())
    label_datasets = []

    for label in sorted(label_counts):
        subset = dataset.filter(lambda x: x['label'] == label, num_proc=8)
        multiplier = max_count // len(subset)
        remainder = max_count % len(subset)
        oversampled = concatenate_datasets([subset] * multiplier + [subset.select(range(remainder))])
        label_datasets.append(oversampled)

    return concatenate_datasets(label_datasets).shuffle(seed=42)

train_dataset = oversample_dataset(train_dataset)
print("After oversampling:", Counter(train_dataset['label']))

Filter (num_proc=8): 100%|██████████| 29664/29664 [03:34<00:00, 138.22 examples/s]
Filter (num_proc=8): 100%|██████████| 29664/29664 [03:33<00:00, 139.23 examples/s]
Filter (num_proc=8): 100%|██████████| 29664/29664 [03:34<00:00, 138.49 examples/s]
Filter (num_proc=8): 100%|██████████| 29664/29664 [03:34<00:00, 138.06 examples/s]
Filter (num_proc=8): 100%|██████████| 29664/29664 [03:35<00:00, 137.73 examples/s]
Filter (num_proc=8): 100%|██████████| 29664/29664 [03:34<00:00, 138.38 examples/s]
Filter (num_proc=8): 100%|██████████| 29664/29664 [03:28<00:00, 142.23 examples/s]


After oversampling: Counter({6: 7426, 2: 7426, 0: 7426, 1: 7426, 4: 7426, 5: 7426, 3: 7426})


In [10]:
# --------------------------
# 5. Define Training Arguments for Robust Fine-Tuning
# --------------------------
training_args = TrainingArguments(
    output_dir="./finetuned_vit_model",    # Directory to save checkpoints and the final model
    eval_strategy="epoch",           # Evaluate at the end of each epoch
    save_strategy="epoch",                 # Save checkpoint at each epoch
    learning_rate=4e-5,                    # A conservative learning rate for fine-tuning
    per_device_train_batch_size=8,         # Adjust based on your CPU memory limits
    per_device_eval_batch_size=8,
    num_train_epochs=5,                    # Fine-tune for a few epochs (adjust as needed)
    load_best_model_at_end=True,           # Automatically load the best model when training finishes
    metric_for_best_model="accuracy",      # Monitor accuracy for best model selection
    logging_dir="./logs",                  # Directory for TensorBoard logs
)

In [11]:
# --------------------------
# 6. Define a Compute Metrics Function for Evaluation
# --------------------------
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = (predictions == labels).mean()
    return {"accuracy": accuracy}

# --------------------------
# Confusion Matrix and Per-Class Accuracy Tracking
# --------------------------

# Define a compute_metrics function with confusion matrix logging
def compute_metrics_with_confusion(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)

    # Log classification report (per-class precision/recall/f1)
    print("\nClassification Report:")
    print(classification_report(labels, preds, target_names=[id2label[i] for i in sorted(id2label.keys())]))

    # Confusion Matrix
    cm = confusion_matrix(labels, preds)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", xticklabels=[id2label[i] for i in sorted(id2label)], yticklabels=[id2label[i] for i in sorted(id2label)])
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Confusion Matrix")
    plt.tight_layout()
    plt.savefig("confusion_matrix_epoch.png")  # Saves confusion matrix image
    plt.close()

    # Optionally: Return overall accuracy
    accuracy = (preds == labels).mean()
    return {"accuracy": accuracy}


# 3. Ensure id2label is defined before training
id2label = {
    0: "Angry", 1: "Disgust", 2: "Fear", 3: "Happy",
    4: "Sad", 5: "Surprise", 6: "Neutral"
}


In [13]:
# --------------------------
# 7. Trainer with Class-Weighted Loss
# --------------------------

# Compute class weights from training set
label_freqs = Counter(train_dataset['label'])
total = sum(label_freqs.values())
class_weights = torch.tensor([total / label_freqs[i] for i in range(len(label_freqs))], dtype=torch.float).to("cuda" if torch.cuda.is_available() else "cpu")

# Define custom Trainer to inject class weights
class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss = F.cross_entropy(logits, labels, weight=class_weights)
        return (loss, outputs) if return_outputs else loss

# trainer initialization
trainer = WeightedTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics_with_confusion,
)

# Fine-tune model
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2019,0.378631,0.881354
2,0.0725,0.506438,0.886073
3,0.0305,0.573338,0.888095
4,0.0039,0.622112,0.889174
5,0.0012,0.606022,0.892814



Classification Report:
              precision    recall  f1-score   support

       Angry       0.85      0.89      0.87      1012
     Disgust       0.94      0.99      0.96       148
        Fear       0.77      0.85      0.81      1039
       Happy       0.96      0.95      0.95      1801
         Sad       0.89      0.75      0.82      1280
    Surprise       0.93      0.94      0.93       895
     Neutral       0.85      0.88      0.87      1242

    accuracy                           0.88      7417
   macro avg       0.88      0.89      0.89      7417
weighted avg       0.88      0.88      0.88      7417


Classification Report:
              precision    recall  f1-score   support

       Angry       0.83      0.89      0.86      1012
     Disgust       0.96      1.00      0.98       148
        Fear       0.85      0.79      0.82      1039
       Happy       0.96      0.95      0.96      1801
         Sad       0.85      0.81      0.83      1280
    Surprise       0.92      0

TrainOutput(global_step=16245, training_loss=0.07345656096733472, metrics={'train_runtime': 21936.3656, 'train_samples_per_second': 11.848, 'train_steps_per_second': 0.741, 'total_flos': 2.014184560523692e+19, 'train_loss': 0.07345656096733472, 'epoch': 5.0})

In [14]:
# --------------------------
# 8. Save Final Independent Model
# --------------------------
torch.save(model.state_dict(), '/home/ubuntu/MLexpressionsStorage/final_model_V5.pth')
model.save_pretrained("/home/ubuntu/MLexpressionsStorage/vit_final_independent_V5")
processor.save_pretrained("/home/ubuntu/MLexpressionsStorage/vit_final_independent_V5")

['/home/ubuntu/MLexpressionsStorage/vit_final_independent_V5/preprocessor_config.json']

In [15]:
# --------------------------
# 9. Inference Utilities
# --------------------------

# Load model + processor once
model_path = "/home/ubuntu/MLexpressionsStorage/vit_final_independent_V5"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForImageClassification.from_pretrained(model_path).to(device).eval()
processor = AutoImageProcessor.from_pretrained(model_path)
id2label = model.config.id2label

# Single image prediction (unbatched)
def predict_label(image_path, threshold=0.85):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(image, return_tensors="pt").to(device)
    with torch.no_grad():
        logits = model(**inputs).logits
        probs = F.softmax(logits, dim=-1)
        conf, pred_idx = torch.max(probs, dim=-1)
    return (id2label[pred_idx.item()], conf.item()) if conf.item() >= threshold else ("REVIEW", conf.item())

# Batched prediction (for large folders)
def batch_predict(image_folder, batch_size=64, threshold=0.85):
    all_preds = []
    image_paths = [p for p in Path(IMAGE_DIR).rglob("*") if p.suffix.lower() in [".jpg", ".jpeg", ".png", ".tif", ".tiff"]]

    for i in tqdm(range(0, len(image_paths), batch_size), desc="Running inference in batches"):
        batch_paths = image_paths[i:i + batch_size]
        images = []
        valid_paths = []

        for path in batch_paths:
            try:
                img = Image.open(path).convert("RGB")
                images.append(img)
                valid_paths.append(str(path))
            except Exception as e:
                print(f"Error reading {path}: {e}")
                continue

        if not images:
            continue

        inputs = processor(images=images, return_tensors="pt").to(device)
        with torch.no_grad():
            logits = model(**inputs).logits
            probs = torch.nn.functional.softmax(logits, dim=-1)
            confs, preds = torch.max(probs, dim=-1)

        for pred, conf, path in zip(preds.tolist(), confs.tolist(), valid_paths):
            if conf >= threshold:
                all_preds.append(id2label[pred])
            else:
                all_preds.append("REVIEW")  # Flag uncertain cases

    return all_preds

# Distribution plot
def plot_distribution(predictions, output_path):
    label_counts = Counter(predictions)
    labels = sorted(label_counts.keys())
    counts = [label_counts[label] for label in labels]

    plt.figure(figsize=(10, 5))
    plt.bar(labels, counts)
    plt.title("Predicted Expression Distribution")
    plt.xlabel("Expression")
    plt.ylabel("Count")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()

In [16]:
# --------------------------
# 10. Entry Point for Inference
# --------------------------
if __name__ == "__main__" and RUN_INFERENCE:
    from datetime import datetime

    OUTPUT_PATH = (
        "/home/ubuntu/MLexpressionsStorage/distribution_plot_"
        + datetime.now().strftime("%Y%m%d_%H%M%S")
        + ".png"
    )

    predictions = batch_predict(IMAGE_DIR)
    plot_distribution(predictions, OUTPUT_PATH)
    print(f"Distribution plot saved to: {OUTPUT_PATH}")

Running inference in batches: 100%|██████████| 580/580 [03:34<00:00,  2.70it/s]


Distribution plot saved to: /home/ubuntu/MLexpressionsStorage/distribution_plot_20250326_035318.png


In [None]:
# Example usage (uncomment to test):
# label = predict_label("/path/to/image.jpg")
# print("Predicted Label:", label)

In [17]:
# #THIS DID NOT WORK - NEED TO MANUALLY SHUT DOWN!
# # OPTIONAL: Final message/log
# print("✅ Training, evaluation, and saving complete. Preparing to shut down the instance...")

# # Trigger full VM shutdown
# os.system("sudo shutdown -h now")


✅ Training, evaluation, and saving complete. Preparing to shut down the instance...


0