In [1]:
import os, random, shutil
from pathlib import Path

random.seed(10)

# Relabeling and Splitting for 3 Categories

In [2]:
src = Path("/kaggle/input/autistic-children-emotions-dr-fatma-m-talaat/Autistic Children Emotions - Dr. Fatma M. Talaat/Train")
dst = Path("/kaggle/working/train_3zone")
dst.mkdir(exist_ok=True)

map_to_class = {
    "joy": "Positive",
    "anger": "NegativeActive",
    "fear": "NegativeActive",
    "surprise": "NegativeActive",
    "sadness": "NegativePassive",
    "Natural": "Positive"
}

def list_images(path):
    types = {".jpg",".jpeg"}
    return [f for f in path.rglob("*") if f.suffix.lower() in types]

items = []
for old_emotion in map_to_class:
    target = map_to_class[old_emotion]
    if target is None:
        continue
    for f in list_images(src):
        items.append((f, target))

In [3]:
from sklearn.model_selection import train_test_split

paths, labels = zip(*items)

train_paths, val_paths, train_labels, val_labels = train_test_split(
    paths, labels, test_size=0.15, random_state=10, stratify=labels
)

splits = {
    "train": (train_paths, train_labels),
    "val": (val_paths, val_labels),
}

def place_links(split_name, paths, labels, use_symlinks=True):
    for p, y in zip(paths, labels):
        out_dir = dst/split_name/y
        out_dir.mkdir(parents=True, exist_ok=True)
        out = out_dir/p.name
        if out.exists(): 
            continue
        if use_symlinks:
            try:
                os.symlink(os.path.abspath(p), out)
            except FileExistsError:
                pass
            except OSError: # If symlink doesn't work
                shutil.copy2(p, out)
        else:
            shutil.copy2(p, out)


for split_name,(ps,ls) in splits.items():
    place_links(split_name, ps, ls, use_symlinks=True)

for split in ["train","val"]:
    print(split, {c: len(list((dst/split/c).glob("*"))) for c in ["Positive","NegativeActive","NegativePassive"]})

train {'Positive': 634, 'NegativeActive': 641, 'NegativePassive': 560}
val {'Positive': 203, 'NegativeActive': 273, 'NegativePassive': 110}


# Data Loading

In [4]:
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision import datasets, transforms
from collections import Counter

IMG_SIZE = 224
BATCH_SIZE = 32
NUM_WORKERS = 4

In [5]:
train_tfms = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.15, contrast=0.15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

eval_tfms = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

train_ds = datasets.ImageFolder(dst/"train", transform=train_tfms)
val_ds   = datasets.ImageFolder(dst/"val", transform=eval_tfms)

counts = Counter([y for _,y in train_ds.samples])
idx_to_class = {v:k for k,v in train_ds.class_to_idx.items()}
num_samples = len(train_ds)
class_weights = torch.tensor([num_samples / counts[c] for c in range(len(idx_to_class))], dtype=torch.float)

In [6]:
# Random Sampling
sample_weights = [class_weights[y] for _, y in train_ds.samples]
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS, pin_memory=True)
val_dl   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

# Loss

In [8]:
import torch.nn as nn
import torch.nn.functional as F

class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction="mean"):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits, targets):
        log_probs = F.log_softmax(logits, dim=1)
        probs = log_probs.exp()
        pt = probs.gather(1, targets.unsqueeze(1)).squeeze(1)
        logpt = log_probs.gather(1, targets.unsqueeze(1)).squeeze(1)
        loss = -((1 - pt) ** self.gamma) * logpt
        if self.alpha is not None:
            at = self.alpha.to(logits.device).gather(0, targets)
            loss = at * loss
        if self.reduction == "mean":
            return loss.mean()
        elif self.reduction == "sum":
            return loss.sum()
        else:
            return loss

# CrossEntropy

In [13]:
from torch.utils.data import DataLoader, WeightedRandomSampler
from collections import Counter
import torch, torch.nn as nn

# Balanced sampler
counts = Counter([y for _,y in train_ds.samples])
class_sample_count = torch.tensor([counts[i] for i in range(len(train_ds.classes))], dtype=torch.float)
weights = 1.0 / class_sample_count
sample_weights = torch.tensor([weights[y] for _,y in train_ds.samples])
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)

train_dl = DataLoader(train_ds, batch_size=32, sampler=sampler, num_workers=4, pin_memory=True)
val_dl   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

criterion = nn.CrossEntropyLoss()

# ResNet-50

In [14]:
import torchvision.models as models

num_classes = len(train_ds.classes)
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
model.fc = nn.Linear(model.fc.in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = FocalLoss(alpha=class_weights / class_weights.mean(), gamma=2.0)  # normalize alpha
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())

  scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())


# Training

In [10]:
import numpy as np
from sklearn.metrics import f1_score, classification_report, confusion_matrix
import time
import copy
import torch

def run_epoch(dl, train=True):
    if train:
        model.train()
    else:
        model.eval()
    total, correct, losses = 0, 0, []
    all_preds, all_targets = [], []
    for xb, yb in dl:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad(set_to_none=True)
        with torch.set_grad_enabled(train):
            with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
                logits = model(xb)
                loss = criterion(logits, yb)
            if train:
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
        probs = torch.softmax(logits, dim=1)
        preds = probs.argmax(dim=1)
        total += yb.size(0)
        correct += (preds == yb).sum().item()
        losses.append(loss.item())
        all_preds.extend(preds.detach().cpu().numpy())
        all_targets.extend(yb.detach().cpu().numpy())
    acc = correct/total
    f1_macro = f1_score(all_targets, all_preds, average="macro")
    return np.mean(losses), acc, f1_macro, np.array(all_targets), np.array(all_preds)

best_wts = copy.deepcopy(model.state_dict())
best_f1 = 0.0
patience, patience_ctr = 5, 0

for epoch in range(30):
    t0 = time.time()
    train_loss, train_acc, train_f1, _, _ = run_epoch(train_dl, train=True)
    val_loss, val_acc, val_f1, y_true, y_pred = run_epoch(val_dl, train=False)
    scheduler.step()

    if val_f1 > best_f1:
        best_f1 = val_f1
        best_wts = copy.deepcopy(model.state_dict())
        torch.save({"model": best_wts, "classes": train_ds.classes}, "best_3zone.pt")
        patience_ctr = 0
    else:
        patience_ctr += 1

    print(f"Epoch {epoch+1:02d} | "
          f"train loss {train_loss:.3f} acc {train_acc:.3f} f1 {train_f1:.3f} | "
          f"val loss {val_loss:.3f} acc {val_acc:.3f} f1 {val_f1:.3f} | "
          f"{time.time()-t0:.1f}s")

    if patience_ctr >= patience:
        print("Early stopping.")
        break

model.load_state_dict(best_wts)

  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 01 | train loss 0.496 acc 0.326 f1 0.324 | val loss 0.500 acc 0.217 f1 0.211 | 12.7s


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 02 | train loss 0.489 acc 0.347 f1 0.292 | val loss 0.486 acc 0.230 f1 0.224 | 11.1s


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 03 | train loss 0.486 acc 0.362 f1 0.318 | val loss 0.501 acc 0.261 f1 0.218 | 10.8s


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 04 | train loss 0.492 acc 0.346 f1 0.303 | val loss 0.489 acc 0.244 f1 0.233 | 11.2s


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 05 | train loss 0.490 acc 0.341 f1 0.269 | val loss 0.494 acc 0.195 f1 0.165 | 10.9s


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 06 | train loss 0.488 acc 0.339 f1 0.284 | val loss 0.492 acc 0.251 f1 0.247 | 11.1s


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 07 | train loss 0.489 acc 0.348 f1 0.306 | val loss 0.494 acc 0.203 f1 0.202 | 10.9s


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 08 | train loss 0.487 acc 0.347 f1 0.287 | val loss 0.497 acc 0.203 f1 0.185 | 10.9s


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 09 | train loss 0.487 acc 0.356 f1 0.287 | val loss 0.494 acc 0.198 f1 0.185 | 11.0s


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 10 | train loss 0.487 acc 0.361 f1 0.294 | val loss 0.495 acc 0.193 f1 0.179 | 10.9s


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 11 | train loss 0.485 acc 0.368 f1 0.297 | val loss 0.495 acc 0.198 f1 0.184 | 10.9s
Early stopping.


<All keys matched successfully>

# CrossEntropy Train Loop

In [15]:
import torchvision.models as models
import torch.optim as optim

num_classes = len(train_ds.classes)
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)

# freeze backbone first
for p in model.parameters():
    p.requires_grad_(False)

model.fc = nn.Linear(model.fc.in_features, num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Stage 1: train head only
opt = optim.AdamW(model.fc.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(opt, T_max=5)

def run_epoch(dl, train=True):
    model.train(mode=train)
    tot, correct, losses = 0,0,[]
    all_preds, all_targets = [], []
    for xb, yb in dl:
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
            logits = model(xb)
            loss = criterion(logits, yb)
        if train:
            loss.backward()
            opt.step()
        preds = logits.argmax(1)
        tot += yb.size(0)
        correct += (preds==yb).sum().item()
        losses.append(loss.item())
    return sum(losses)/len(losses), correct/tot

best_state = model.state_dict()
best_val = 0.0

for epoch in range(5):
    tr_loss, tr_acc = run_epoch(train_dl, True)
    va_loss, va_acc = run_epoch(val_dl, False)
    scheduler.step()
    if va_acc > best_val:
        best_val = va_acc; best_state = {k:v.cpu() for k,v in model.state_dict().items()}
    print(f"[Stage1] {epoch+1} train {tr_loss:.3f}/{tr_acc:.3f}  val {va_loss:.3f}/{va_acc:.3f}")

model.load_state_dict(best_state)

[Stage1] 1 train 0.501/0.337  val 0.496/0.307
[Stage1] 2 train 0.502/0.321  val 0.512/0.232
[Stage1] 3 train 0.499/0.327  val 0.500/0.295
[Stage1] 4 train 0.496/0.343  val 0.522/0.217
[Stage1] 5 train 0.493/0.332  val 0.505/0.285


<All keys matched successfully>

In [16]:
# Stage 2: unfreeze backbone, use discriminative LRs
for p in model.parameters():
    p.requires_grad_(True)

backbone_params = [p for n,p in model.named_parameters() if not n.startswith('fc.')]
head_params = model.fc.parameters()

opt = optim.AdamW([
    {"params": backbone_params, "lr": 1e-4},
    {"params": head_params,     "lr": 7e-4},
], weight_decay=2e-4)

scheduler = optim.lr_scheduler.CosineAnnealingLR(opt, T_max=15)
patience, best_val, wait = 5, 0.0, 0

import copy
best_model = copy.deepcopy(model.state_dict())

for epoch in range(20):
    tr_loss, tr_acc = run_epoch(train_dl, True)
    va_loss, va_acc = run_epoch(val_dl, False)
    scheduler.step()

    improved = va_acc > best_val + 1e-3
    if improved:
        best_val, wait = va_acc, 0
        best_model = copy.deepcopy(model.state_dict())
    else:
        wait += 1

    print(f"[Stage2] {epoch+1} train {tr_loss:.3f}/{tr_acc:.3f}  val {va_loss:.3f}/{va_acc:.3f}")
    if wait >= patience:
        print("Early stop"); break

model.load_state_dict(best_model)

[Stage2] 1 train 0.493/0.347  val 0.495/0.304
[Stage2] 2 train 0.491/0.349  val 0.497/0.234
[Stage2] 3 train 0.487/0.371  val 0.515/0.191
[Stage2] 4 train 0.487/0.344  val 0.491/0.253
[Stage2] 5 train 0.487/0.343  val 0.493/0.225
[Stage2] 6 train 0.488/0.349  val 0.499/0.196
Early stop


<All keys matched successfully>

# Eval on Test Data

In [None]:
_, test_acc, test_f1, y_true, y_pred = run_epoch(test_dl, train=False)
print("Test Acc:", round(test_acc,3), " Test Macro-F1:", round(test_f1,3))
print("\nPer-class report:\n")
print(classification_report(y_true, y_pred, target_names=train_ds.classes, digits=3))

import matplotlib.pyplot as plt
import numpy as np
cm = confusion_matrix(y_true, y_pred, labels=list(range(len(train_ds.classes))))
fig, ax = plt.subplots(figsize=(4,4))
im = ax.imshow(cm, interpolation='nearest')
ax.set_xticks(range(len(train_ds.classes)))
ax.set_yticks(range(len(train_ds.classes)))
ax.set_xticklabels(train_ds.classes, rotation=45, ha='right')
ax.set_yticklabels(train_ds.classes)
ax.set_xlabel("Predicted"); ax.set_ylabel("True"); ax.set_title("Confusion Matrix")
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, cm[i, j], ha="center", va="center")
plt.tight_layout()
plt.show()

# For Inference

In [None]:
from PIL import Image
import torch

checkpoint = torch.load("best_3zone.pt", map_location="cpu")
classes = checkpoint["classes"]
model.load_state_dict(checkpoint["model"])
model.eval().to(device)

def predict_image(path):
    img = Image.open(path).convert("RGB")
    x = eval_tfms(img).unsqueeze(0).to(device)
    with torch.no_grad():
        p = torch.softmax(model(x), dim=1)[0].cpu().numpy()
    return dict(zip(classes, p.tolist()))