Dataset Link:
Fer2013: https://www.kaggle.com/datasets/msambare/fer2013

In [None]:
import os
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

In [None]:
# 1. Define Paths (Kaggle directory structure)
base_path = '/kaggle/input/fer2013'
train_dir = os.path.join(base_path, 'train')
test_dir = os.path.join(base_path, 'test')

In [None]:
# 2. Advanced Transforms for Pre-training
# We resize to 224x224 to match ResNet's expected input size
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# 3. Load Data using ImageFolder
train_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
test_dataset = datasets.ImageFolder(test_dir, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

print(f"Detected Classes: {train_dataset.classes}")

In [None]:
# 4. Model Definition (ResNet18 Backbone)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Using weights=models.ResNet18_Weights.IMAGENET1K_V1 for transfer learning
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# Replace the last layer for FER2013's 7 classes
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 7)
model = model.to(device)

In [None]:
# 5. Training Setup
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [None]:
# 6. Training Loop
def train_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
    return running_loss / len(loader), 100. * correct / total

In [None]:
def validate_epoch(model, loader, criterion):
    model.eval()  # Set to evaluation mode
    running_loss = 0.0
    correct = 0
    total = 0
    
    # Disable gradient calculation to save memory and speed up
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
    return running_loss / len(loader), 100. * correct / total

In [None]:
# Updated Training Loop with Validation
best_acc = 0.0

for epoch in range(1, 11):
    # 1. Train
    train_loss, train_acc = train_epoch(model, train_loader, optimizer, criterion)
    
    # 2. Validate (The part we were missing)
    val_loss, val_acc = validate_epoch(model, test_loader, criterion)
    
    print(f"Epoch {epoch}:")
    print(f"  Train -> Loss: {train_loss:.4f}, Acc: {train_acc:.2f}%")
    print(f"  Valid -> Loss: {val_loss:.4f}, Acc: {val_acc:.2f}%")

    # 3. Checkpointing: Save only the best version
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_fer2013_backbone.pth')
        print(f"  *** New Best Model Saved (Accuracy: {val_acc:.2f}%) ***")