In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os, glob
from PIL import Image
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
from scipy.stats import zscore
import wandb

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

num_epochs    = 10
batch_size    = 32
learning_rate = 1e-4
emo_dir       = '/content/drive/MyDrive/smai_a2/dataset/emotions'  # ← update as needed

wandb.login()

train_tfms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
val_tfms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

full_emo_ds = ImageFolder(emo_dir, transform=train_tfms)
n = len(full_emo_ds)
n_train = int(0.7*n)
n_val   = int(0.15*n)
n_test  = n - n_train - n_val
train_ds, val_ds, test_ds = random_split(full_emo_ds, [n_train, n_val, n_test])

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=2)

print("Classes:", full_emo_ds.classes)
print("Sizes ▶ Train:", len(train_ds), "Val:", len(val_ds), "Test:", len(test_ds))

def train_epoch(model, loader, opt, crit):
    model.train(); total_loss=0; correct=0; total=0
    for imgs, lbls in loader:
        imgs, lbls = imgs.to(device), lbls.to(device)
        opt.zero_grad()
        out = model(imgs)
        loss = crit(out, lbls)
        loss.backward(); opt.step()
        total_loss += loss.item()*imgs.size(0)
        preds = out.argmax(1)
        correct += (preds==lbls).sum().item()
        total += lbls.size(0)
    return total_loss/total, correct/total

def eval_epoch(model, loader, crit):
    model.eval(); total_loss=0; correct=0; total=0
    with torch.no_grad():
        for imgs, lbls in loader:
            imgs, lbls = imgs.to(device), lbls.to(device)
            out = model(imgs)
            loss = crit(out, lbls)
            total_loss += loss.item()*imgs.size(0)
            preds = out.argmax(1)
            correct += (preds==lbls).sum().item()
            total += lbls.size(0)
    return total_loss/total, correct/total



Mounted at /content/drive
Device: cuda


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mlakshaybaijal[0m ([33mlakshaybaijal-iiit-hyderabad[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Classes: ['angry', 'happy', 'sad']
Sizes ▶ Train: 624 Val: 133 Test: 135


In [3]:
wandb.init(
  project="smai-assignment2",
  name="emo_vgg16",
  config={
      "model": "VGG16-finetune",
      "epochs": num_epochs,
      "batch_size": batch_size,
      "lr": learning_rate
  }
)
cfg = wandb.config

model_vgg = models.vgg16(pretrained=True)
for p in model_vgg.features.parameters(): p.requires_grad=False
model_vgg.classifier[6] = nn.Linear(model_vgg.classifier[6].in_features, len(full_emo_ds.classes))
model_vgg = model_vgg.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_vgg.classifier[6].parameters(), lr=cfg.lr)

wandb.watch(model_vgg, log="all", log_freq=10)

for ep in range(cfg.epochs):
    tl, ta = train_epoch(model_vgg, train_loader, optimizer, criterion)
    vl, va = eval_epoch (model_vgg, val_loader,   criterion)
    print(f"[VGG Lakshay] Epoch {ep+1}/{cfg.epochs} ▶ Train: {ta:.4f}, Val: {va:.4f}")
    wandb.log({
      "epoch": ep+1,
      "train/loss": tl, "train/acc": ta,
      "val/loss": vl,   "val/acc": va
    })

y_true=y_pred=[]
y_true, y_pred = [], []
model_vgg.eval()
with torch.no_grad():
    for imgs, lbls in test_loader:
        imgs = imgs.to(device)
        out  = model_vgg(imgs)
        preds = out.argmax(1).cpu().numpy()
        y_pred.extend(preds); y_true.extend(lbls.numpy())

wandb.log({
  "confusion_matrix": wandb.plot.confusion_matrix(
      probs=None, y_true=y_true, preds=y_pred, class_names=full_emo_ds.classes
  )
})
print("\nClassification Report:\n", classification_report(y_true, y_pred, target_names=full_emo_ds.classes))

wandb.finish()


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:03<00:00, 160MB/s]


[VGG Lakshay] Epoch 1/10 ▶ Train: 0.3221, Val: 0.3383
[VGG Lakshay] Epoch 2/10 ▶ Train: 0.3478, Val: 0.3534
[VGG Lakshay] Epoch 3/10 ▶ Train: 0.3670, Val: 0.3985
[VGG Lakshay] Epoch 4/10 ▶ Train: 0.3958, Val: 0.4511
[VGG Lakshay] Epoch 5/10 ▶ Train: 0.4135, Val: 0.5038
[VGG Lakshay] Epoch 6/10 ▶ Train: 0.4439, Val: 0.4662
[VGG Lakshay] Epoch 7/10 ▶ Train: 0.4824, Val: 0.5263
[VGG Lakshay] Epoch 8/10 ▶ Train: 0.4647, Val: 0.5489
[VGG Lakshay] Epoch 9/10 ▶ Train: 0.5529, Val: 0.5564
[VGG Lakshay] Epoch 10/10 ▶ Train: 0.5321, Val: 0.6391

Classification Report:
               precision    recall  f1-score   support

       angry       0.91      0.56      0.69        52
       happy       0.64      0.53      0.58        43
         sad       0.52      0.88      0.65        40

    accuracy                           0.64       135
   macro avg       0.69      0.66      0.64       135
weighted avg       0.71      0.64      0.65       135



0,1
epoch,▁▂▃▃▄▅▆▆▇█
train/acc,▁▂▂▃▄▅▆▅█▇
train/loss,█▇▇▆▅▄▃▃▁▁
val/acc,▁▁▂▄▅▄▅▆▆█
val/loss,██▇▆▄▅▅▃▃▁

0,1
epoch,10.0
train/acc,0.53205
train/loss,0.98548
val/acc,0.6391
val/loss,0.96341


In [4]:
wandb.init(
  project="smai-assignment2",
  name="emo_resnet18_scratch",
  config={
      "model": "ResNet18-scratch",
      "epochs": num_epochs,
      "batch_size": batch_size,
      "lr": learning_rate
  }
)
cfg = wandb.config

model_rs = models.resnet18(pretrained=False)
model_rs.fc = nn.Linear(model_rs.fc.in_features, len(full_emo_ds.classes))
model_rs = model_rs.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_rs.parameters(), lr=cfg.lr)

wandb.watch(model_rs, log="all", log_freq=10)


for ep in range(cfg.epochs):
    tl, ta = train_epoch(model_rs, train_loader, optimizer, criterion)
    vl, va = eval_epoch (model_rs, val_loader,   criterion)
    print(f"[ResNet18 not pretrained Lakshay] Epoch {ep+1}/{cfg.epochs} ▶ Train: {ta:.4f}, Val: {va:.4f}")
    wandb.log({
      "epoch": ep+1,
      "train/loss": tl, "train/acc": ta,
      "val/loss": vl,   "val/acc": va
    })

y_true, y_pred = [], []
model_rs.eval()
with torch.no_grad():
    for imgs, lbls in test_loader:
        imgs = imgs.to(device)
        out  = model_rs(imgs)
        preds = out.argmax(1).cpu().numpy()
        y_pred.extend(preds); y_true.extend(lbls.numpy())

wandb.log({
  "confusion_matrix": wandb.plot.confusion_matrix(
      probs=None, y_true=y_true, preds=y_pred, class_names=full_emo_ds.classes
  )
})
print("\nClassification Report:\n", classification_report(y_true, y_pred, target_names=full_emo_ds.classes))

wandb.finish()




[ResNet18 not pretrained Lakshay] Epoch 1/10 ▶ Train: 0.3862, Val: 0.3233
[ResNet18 not pretrained Lakshay] Epoch 2/10 ▶ Train: 0.5369, Val: 0.4887
[ResNet18 not pretrained Lakshay] Epoch 3/10 ▶ Train: 0.6218, Val: 0.3684
[ResNet18 not pretrained Lakshay] Epoch 4/10 ▶ Train: 0.7324, Val: 0.6391
[ResNet18 not pretrained Lakshay] Epoch 5/10 ▶ Train: 0.7500, Val: 0.7368
[ResNet18 not pretrained Lakshay] Epoch 6/10 ▶ Train: 0.8189, Val: 0.7970
[ResNet18 not pretrained Lakshay] Epoch 7/10 ▶ Train: 0.8926, Val: 0.7820
[ResNet18 not pretrained Lakshay] Epoch 8/10 ▶ Train: 0.8462, Val: 0.8421
[ResNet18 not pretrained Lakshay] Epoch 9/10 ▶ Train: 0.8814, Val: 0.7820
[ResNet18 not pretrained Lakshay] Epoch 10/10 ▶ Train: 0.9295, Val: 0.8346

Classification Report:
               precision    recall  f1-score   support

       angry       0.90      0.73      0.81        52
       happy       0.97      0.74      0.84        43
         sad       0.63      0.95      0.76        40

    accuracy    

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train/acc,▁▃▄▅▆▇█▇▇█
train/loss,█▆▅▄▃▃▂▂▂▁
val/acc,▁▃▂▅▇▇▇█▇█
val/loss,█▅▇▄▃▁▂▁▂▁

0,1
epoch,10.0
train/acc,0.92949
train/loss,0.21172
val/acc,0.83459
val/loss,0.42278


In [5]:
# ▶ ResNet18 Pretrained
wandb.init(
  project="smai-assignment2",
  name="emo_resnet18_pretrained",
  config={
      "model": "ResNet18-pretrained",
      "epochs": num_epochs,
      "batch_size": batch_size,
      "lr": learning_rate
  }
)
cfg = wandb.config

# Model
model_rp = models.resnet18(pretrained=True)
model_rp.fc = nn.Linear(model_rp.fc.in_features, len(full_emo_ds.classes))
model_rp = model_rp.to(device)

# Opt & Loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_rp.parameters(), lr=cfg.lr)

# Watch
wandb.watch(model_rp, log="all", log_freq=10)

# Train
for ep in range(cfg.epochs):
    tl, ta = train_epoch(model_rp, train_loader, optimizer, criterion)
    vl, va = eval_epoch (model_rp, val_loader,   criterion)
    print(f"[ResNet18 Pretrained Lakshay] Epoch {ep+1}/{cfg.epochs} ▶ Train: {ta:.4f}, Val: {va:.4f}")
    wandb.log({
      "epoch": ep+1,
      "train/loss": tl, "train/acc": ta,
      "val/loss": vl,   "val/acc": va
    })

# Confusion Matrix on Test
y_true, y_pred = [], []
model_rp.eval()
with torch.no_grad():
    for imgs, lbls in test_loader:
        imgs = imgs.to(device)
        out  = model_rp(imgs)
        preds = out.argmax(1).cpu().numpy()
        y_pred.extend(preds); y_true.extend(lbls.numpy())

wandb.log({
  "confusion_matrix": wandb.plot.confusion_matrix(
      probs=None, y_true=y_true, preds=y_pred, class_names=full_emo_ds.classes
  )
})
print("\nClassification Report:\n", classification_report(y_true, y_pred, target_names=full_emo_ds.classes))

wandb.finish()


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 171MB/s]


[ResNet18 Pretrained Lakshay] Epoch 1/10 ▶ Train: 0.7003, Val: 0.9023
[ResNet18 Pretrained Lakshay] Epoch 2/10 ▶ Train: 0.9712, Val: 0.9699
[ResNet18 Pretrained Lakshay] Epoch 3/10 ▶ Train: 0.9888, Val: 0.9699
[ResNet18 Pretrained Lakshay] Epoch 4/10 ▶ Train: 0.9888, Val: 0.9850
[ResNet18 Pretrained Lakshay] Epoch 5/10 ▶ Train: 0.9888, Val: 0.9774
[ResNet18 Pretrained Lakshay] Epoch 6/10 ▶ Train: 0.9936, Val: 0.9925
[ResNet18 Pretrained Lakshay] Epoch 7/10 ▶ Train: 0.9920, Val: 1.0000
[ResNet18 Pretrained Lakshay] Epoch 8/10 ▶ Train: 1.0000, Val: 0.9925
[ResNet18 Pretrained Lakshay] Epoch 9/10 ▶ Train: 1.0000, Val: 0.9850
[ResNet18 Pretrained Lakshay] Epoch 10/10 ▶ Train: 0.9984, Val: 1.0000

Classification Report:
               precision    recall  f1-score   support

       angry       1.00      1.00      1.00        52
       happy       1.00      1.00      1.00        43
         sad       1.00      1.00      1.00        40

    accuracy                           1.00       135
  

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train/acc,▁▇████████
train/loss,█▂▁▁▁▁▁▁▁▁
val/acc,▁▆▆▇▆▇█▇▇█
val/loss,█▃▄▂▂▂▁▂▁▁

0,1
epoch,10.0
train/acc,0.9984
train/loss,0.00614
val/acc,1.0
val/loss,0.00673


In [6]:
# ─────────────────────────────────────────────────────────────────────────────
# Cell X: Build fresh “clean” emotion models (no W&B hooks)
# ─────────────────────────────────────────────────────────────────────────────
import torch.nn as nn
from torchvision import models

# 1) Fresh VGG16 for 3-class emotions
vgg_emo_clean = models.vgg16(pretrained=True)
vgg_emo_clean.classifier[6] = nn.Linear(
    vgg_emo_clean.classifier[6].in_features,
    len(full_emo_ds.classes)
)
vgg_emo_clean.load_state_dict(model_vgg.state_dict())
vgg_emo_clean = vgg_emo_clean.to(device)

# 2) Fresh ResNet18 (from scratch)
rs_emo_clean = models.resnet18(pretrained=False)
rs_emo_clean.fc = nn.Linear(rs_emo_clean.fc.in_features,
                            len(full_emo_ds.classes))
rs_emo_clean.load_state_dict(model_rs.state_dict())
rs_emo_clean = rs_emo_clean.to(device)

# 3) Fresh ResNet18 (pretrained)
rp_emo_clean = models.resnet18(pretrained=True)
rp_emo_clean.fc = nn.Linear(rp_emo_clean.fc.in_features,
                            len(full_emo_ds.classes))
rp_emo_clean.load_state_dict(model_rp.state_dict())
rp_emo_clean = rp_emo_clean.to(device)

print("Clean emotion models ready:",
      vgg_emo_clean, rs_emo_clean, rp_emo_clean)




Clean emotion models ready: VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, str

In [7]:
# ─────────────────────────────────────────────────────────────────────────────
# Cell Y: Generate the 3 Emotion-Recognition videos
# ─────────────────────────────────────────────────────────────────────────────
import glob, cv2
from PIL import Image

def make_emotion_video(
    model, class_names, img_paths, output_path,
    transform, device, fps=1, frame_size=(224,224)
):
    model.eval()
    vw = cv2.VideoWriter(
        output_path,
        cv2.VideoWriter_fourcc(*'mp4v'),
        fps, frame_size
    )
    for path in img_paths:
        img_pil = Image.open(path).convert('RGB')
        inp     = transform(img_pil).unsqueeze(0).to(device)
        with torch.no_grad():
            pred = model(inp).argmax(1).item()
        lbl   = class_names[pred]
        frame = cv2.resize(cv2.imread(path), frame_size)
        cv2.putText(frame, lbl, (10,frame_size[1]-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.2,
                    (0,255,0), 2, cv2.LINE_AA)
        vw.write(frame)
    vw.release()
    print(f"✅ Saved video: {output_path}")

# Gather file-paths for your emotion test split:
emo_test_paths = [ full_emo_ds.samples[i][0] for i in test_ds.indices ]

# 1) VGG16 emotion
make_emotion_video(
    vgg_emo_clean,
    full_emo_ds.classes,
    emo_test_paths,
    'vgg_emotion_recognition.mp4',
    val_tfms,
    device
)

# 2) ResNet18 pretrained emotion
make_emotion_video(
    rp_emo_clean,
    full_emo_ds.classes,
    emo_test_paths,
    'resnet18_pretrained_emotion.mp4',
    val_tfms,
    device
)

# 3) ResNet18 scratch emotion
make_emotion_video(
    rs_emo_clean,
    full_emo_ds.classes,
    emo_test_paths,
    'resnet18_scratch_emotion.mp4',
    val_tfms,
    device
)

# (Optional) Download the videos
from google.colab import files
files.download('vgg_emotion_recognition.mp4')
files.download('resnet18_pretrained_emotion.mp4')
files.download('resnet18_scratch_emotion.mp4')


✅ Saved video: vgg_emotion_recognition.mp4
✅ Saved video: resnet18_pretrained_emotion.mp4
✅ Saved video: resnet18_scratch_emotion.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>