FER model

In [23]:
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.models import resnet18, ResNet18_Weights
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

In [24]:
DATA_ROOT = "FER"
TRAIN_DIR = os.path.join(DATA_ROOT, "train")
assert os.path.exists(TRAIN_DIR), f"Training directory not found: {TRAIN_DIR}"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

EPOCHS = 15
BATCH_SIZE = 64
LR = 1e-3
NUM_CLASSES = 7

Using device: cuda


In [25]:
train_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),     # convert 1-channel â†’ 3-channel
    transforms.RandomHorizontalFlip(),                                   
    transforms.Resize((224, 224)),                    # ResNet input size
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],                   # ImageNet stats
        std=[0.229, 0.224, 0.225]
    )
])

eval_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [26]:
full_dataset = ImageFolder(TRAIN_DIR, transform=train_transform)

train_len = int(0.8 * len(full_dataset))
val_len = len(full_dataset) - train_len

train_dataset, val_dataset = random_split(
    full_dataset, [train_len, val_len]
)

val_dataset.dataset.transform = eval_transform

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

print("Train samples:", len(train_dataset))
print("Val samples  :", len(val_dataset))

Train samples: 22967
Val samples  : 5742


In [27]:
def create_model(num_classes):

    model = resnet18(weights=ResNet18_Weights.DEFAULT)
    """ONLY FC TRAINING
    # FREEZE ENTIRE BACKBONE
    for param in model.parameters():
        param.requires_grad = False"""

    for param in model.parameters():
        param.requires_grad = True
    """FOR PARTIAL TRAINING (ONLY OF LAYER 4)
    # Unfreeze Only the last ResNet block (layer4)
    for param in model.layer4.parameters():
        param.requires_grad = True"""

    model.fc = nn.Linear(model.fc.in_features, num_classes)
    
    return model

model = create_model(NUM_CLASSES)
model = model.to(device)

print("Trainable parameters:")
for name, p in model.named_parameters():
    if p.requires_grad:
        print(name)

Trainable parameters:
conv1.weight
bn1.weight
bn1.bias
layer1.0.conv1.weight
layer1.0.bn1.weight
layer1.0.bn1.bias
layer1.0.conv2.weight
layer1.0.bn2.weight
layer1.0.bn2.bias
layer1.1.conv1.weight
layer1.1.bn1.weight
layer1.1.bn1.bias
layer1.1.conv2.weight
layer1.1.bn2.weight
layer1.1.bn2.bias
layer2.0.conv1.weight
layer2.0.bn1.weight
layer2.0.bn1.bias
layer2.0.conv2.weight
layer2.0.bn2.weight
layer2.0.bn2.bias
layer2.0.downsample.0.weight
layer2.0.downsample.1.weight
layer2.0.downsample.1.bias
layer2.1.conv1.weight
layer2.1.bn1.weight
layer2.1.bn1.bias
layer2.1.conv2.weight
layer2.1.bn2.weight
layer2.1.bn2.bias
layer3.0.conv1.weight
layer3.0.bn1.weight
layer3.0.bn1.bias
layer3.0.conv2.weight
layer3.0.bn2.weight
layer3.0.bn2.bias
layer3.0.downsample.0.weight
layer3.0.downsample.1.weight
layer3.0.downsample.1.bias
layer3.1.conv1.weight
layer3.1.bn1.weight
layer3.1.bn1.bias
layer3.1.conv2.weight
layer3.1.bn2.weight
layer3.1.bn2.bias
layer4.0.conv1.weight
layer4.0.bn1.weight
layer4.0.bn1.

In [28]:
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=1e-4
)
loss_fn = nn.CrossEntropyLoss()

In [29]:
def train_one_epoch(model, loader, optimizer, loss_fn):
    model.train()
    total_loss = 0.0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        logits = model(images)
        loss = loss_fn(logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * images.size(0)

    return total_loss / len(loader.dataset)

In [30]:
def evaluate(model, loader):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            preds = model(images).argmax(dim=1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    acc = accuracy_score(y_true, y_pred)
    p, r, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average="macro"
    )
    cm = confusion_matrix(y_true, y_pred)

    return acc, p, r, f1, cm


In [31]:
for epoch in range(EPOCHS):
    train_loss = train_one_epoch(model, train_loader, optimizer, loss_fn)
    val_acc, val_p, val_r, val_f1, _ = evaluate(model, val_loader)

    print(
        f"Epoch {epoch+1}/{EPOCHS} | "
        f"Train Loss: {train_loss:.4f} | "
        f"Val Acc: {val_acc:.4f} | "
        f"Val F1: {val_f1:.4f}"
    )

Epoch 1/15 | Train Loss: 1.1606 | Val Acc: 0.6146 | Val F1: 0.5522
Epoch 2/15 | Train Loss: 0.7820 | Val Acc: 0.6402 | Val F1: 0.5917
Epoch 3/15 | Train Loss: 0.4615 | Val Acc: 0.6426 | Val F1: 0.6073
Epoch 4/15 | Train Loss: 0.1876 | Val Acc: 0.6289 | Val F1: 0.6067
Epoch 5/15 | Train Loss: 0.0723 | Val Acc: 0.6479 | Val F1: 0.6237
Epoch 6/15 | Train Loss: 0.0421 | Val Acc: 0.6493 | Val F1: 0.6316
Epoch 7/15 | Train Loss: 0.0373 | Val Acc: 0.6304 | Val F1: 0.6111
Epoch 8/15 | Train Loss: 0.0908 | Val Acc: 0.6203 | Val F1: 0.5800
Epoch 9/15 | Train Loss: 0.1329 | Val Acc: 0.6529 | Val F1: 0.6306
Epoch 10/15 | Train Loss: 0.0556 | Val Acc: 0.6374 | Val F1: 0.6209
Epoch 11/15 | Train Loss: 0.0282 | Val Acc: 0.6458 | Val F1: 0.6264
Epoch 12/15 | Train Loss: 0.0196 | Val Acc: 0.6607 | Val F1: 0.6444
Epoch 13/15 | Train Loss: 0.0146 | Val Acc: 0.6526 | Val F1: 0.6379
Epoch 14/15 | Train Loss: 0.0338 | Val Acc: 0.6142 | Val F1: 0.5973
Epoch 15/15 | Train Loss: 0.1611 | Val Acc: 0.6392 | Val 

In [32]:
val_acc, val_p, val_r, val_f1, val_cm = evaluate(model, val_loader)

print("\nFER2013 (Frozen Backbone) Results")
print("Accuracy :", val_acc)
print("Precision:", val_p)
print("Recall   :", val_r)
print("F1-score :", val_f1)
print("Confusion Matrix:\n", val_cm)


FER2013 (Frozen Backbone) Results
Accuracy : 0.6391501219087427
Precision: 0.6284307035426985
Recall   : 0.6222899748405688
F1-score : 0.6225062441760414
Confusion Matrix:
 [[ 461    5   82   43   84   89   35]
 [  14   44    5    0    3    7    3]
 [ 102    7  356   35   78  136  106]
 [  48    1   27 1180  119   28   42]
 [  77    1   60   48  678  137   24]
 [ 123    3  144   29  188  470   26]
 [  20    2   31   34   15   11  481]]


on training only the fc layer  
  
Epoch 1/15 | Train Loss: 1.6381 | Val Acc: 0.4110 | Val F1: 0.3223  
Epoch 15/15 | Train Loss: 1.4044 | Val Acc: 0.4424 | Val F1: 0.3805  

  
FER2013 (Frozen Backbone) Results  
Accuracy : 0.44235458028561475  
Precision: 0.3928199665163275  
Recall   : 0.39119934995031885  
F1-score : 0.3805418303351867  
Confusion Matrix:  
 [[192   3  57 189 110 152  75]  
 [  9  15   4  21   8  18   8]  
 [ 83  13 134 156 115 174 163]  
 [ 61  10  36 997 138 122  86]  
 [ 60   7  42 223 395 143  76]  
 [ 97  10  64 237 159 351  68]  
 [ 30   1  45  68  37  24 456]]  

on partial training (layer 4 of resnet with fc layer)  
  
Epoch 1/15 | Train Loss: 1.2469 | Val Acc: 0.5817 | Val F1: 0.4948  
Epoch 15/15 | Train Loss: 0.0111 | Val Acc: 0.6109 | Val F1: 0.5852  
  
FER2013 (Frozen Backbone) Results  
Accuracy : 0.610936955764542  
Precision: 0.6136706991647694  
Recall   : 0.573162193188752  
F1-score : 0.5852134488354735  
Confusion Matrix:  
 [[ 410    9   62   65  126  114   30]  
 [  16   36    6    8    6   11    2]  
 [ 120    3  338   43  101  138   94]  
 [  45    0   15 1165  102   59   34]  
 [  66    0   43   97  596  167   29]  
 [  97    2   93   60  189  485   28]  
 [  30    0   39   25   36   24  478]]  

on full - fine tuning  
  
Epoch 1/15 | Train Loss: 1.1606 | Val Acc: 0.6146 | Val F1: 0.5522  
Epoch 15/15 | Train Loss: 0.1611 | Val Acc: 0.6392 | Val F1: 0.6225  
  
    
FER2013 (Frozen Backbone) Results  
Accuracy : 0.6391501219087427  
Precision: 0.6284307035426985  
Recall   : 0.6222899748405688  
F1-score : 0.6225062441760414  
Confusion Matrix:  
 [[ 461    5   82   43   84   89   35]  
 [  14   44    5    0    3    7    3]  
 [ 102    7  356   35   78  136  106]  
 [  48    1   27 1180  119   28   42]  
 [  77    1   60   48  678  137   24]  
 [ 123    3  144   29  188  470   26]  
 [  20    2   31   34   15   11  481]]  