<a href="https://colab.research.google.com/github/Mateus929/Facial-Expression-Recognition-Challenge/blob/main/notebooks/nets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

## Data Preparation: Mounting Google Drive

As a first step, this notebook demonstrates how to mount your Google Drive to access files stored there.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Data Loading and Initial Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn

sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (10, 6)

In [None]:
# Note: Make sure the file path is correct and the file exists in your Google Drive.
# The path should be '/content/drive/MyDrive/Colab Notebooks/Facial Expression Recognition Challenge/data/your_data_file.csv'
# Replace 'your_data_file.csv' with the actual name of your training data file.
try:
    df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Facial Expression Recognition Challenge/data/train.csv')
    print("Training data loaded successfully!")
    display(df.head())
except FileNotFoundError:
    print("Error: The training data file was not found. Please check the file path and name.")
except Exception as e:
    print(f"An error occurred while loading the data: {e}")

Training data loaded successfully!


Unnamed: 0,emotion,pixels
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...
1,0,151 150 147 155 148 133 111 140 170 174 182 15...
2,2,231 212 156 164 174 138 161 173 182 200 106 38...
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...


## Data Splitting

In [None]:
import torch
from torch.utils.data import Dataset
import numpy as np
from torchvision import transforms

class FERDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.data = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixel_string = self.data.iloc[idx]['pixels']
        label = int(self.data.iloc[idx]['emotion'])

        pixels = np.array([int(p) for p in pixel_string.split()], dtype=np.uint8).reshape(48, 48)

        if self.transform:
            image = self.transform(pixels)
        else:
            image = torch.tensor(pixels, dtype=torch.float32).unsqueeze(0) / 255.0

        return image, label

In [None]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['emotion'], random_state=42)

train_dataset = FERDataset(train_df, transform=transform)
val_dataset = FERDataset(val_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

print("Training DataLoader created with", len(train_loader), "batches.")
print("Validation DataLoader created with", len(val_loader), "batches.")

Training DataLoader created with 359 batches.
Validation DataLoader created with 90 batches.


## Data Augmentation

In [None]:
from PIL import Image
import torchvision.transforms as T

class ToPILImageAndAugment:
    def __init__(self, augmentations):
        self.augmentations = augmentations

    def __call__(self, x):
        img = Image.fromarray(x, mode='L')
        return self.augmentations(img)

train_augmentations = T.Compose([
    T.RandomResizedCrop(48, scale=(0.9, 1.1), ratio=(0.9, 1.1)),
    T.RandomRotation(20),
    T.RandomHorizontalFlip(),
    T.RandomApply([T.GaussianBlur(kernel_size=3)], p=0.1),  # slight blur noise
    T.RandomApply([T.RandomAffine(degrees=0, translate=(0.1, 0.1))], p=0.2),
    T.ToTensor(),
    T.Normalize((0.5,), (0.5,))
])

train_dataset_t = FERDataset(train_df, transform=ToPILImageAndAugment(train_augmentations))
train_loader_t = DataLoader(train_dataset_t, batch_size=64, shuffle=True, num_workers=2)

valid_transforms = T.Compose([
    T.Resize(48),           # resize to 48x48 (same as training crop size)
    T.CenterCrop(48),
    T.ToTensor(),
    T.Normalize((0.5,), (0.5,))
])

val_dataset_t = FERDataset(val_df, transform=ToPILImageAndAugment(valid_transforms))
val_loader_t = DataLoader(val_dataset_t, batch_size=64, shuffle=False, num_workers=2)

## Wandb

In [None]:
!pip install wandb
import wandb
wandb.login()



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mzhorzholianimate[0m ([33mzhorzholianimate-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

## Train Function

In [None]:
def train_model(model, criterion, optimizer, device, train_loader, val_loader,
                num_epochs=10, scheduler=None, project_name="fer-cnn", group_name="", run_name=None):
    import wandb
    wandb.init(project=project_name, name=run_name, group=group_name)

    wandb.config.update({
        "epochs": num_epochs,
        "batch_size": train_loader.batch_size,
        "learning_rate": optimizer.param_groups[0]['lr'],
        "architecture": str(model),
    })
    wandb.watch(model, criterion, log="all", log_freq=100)

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct = 0.0, 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * labels.size(0)
            correct += (outputs.argmax(1) == labels).sum().item()

        train_loss = running_loss / len(train_loader.dataset)
        train_acc = correct / len(train_loader.dataset)

        # Validation
        model.eval()
        running_val_loss, correct_val = 0.0, 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                running_val_loss += loss.item() * labels.size(0)
                correct_val += (outputs.argmax(1) == labels).sum().item()

        val_loss = running_val_loss / len(val_loader.dataset)
        val_acc = correct_val / len(val_loader.dataset)

        print(f"Epoch {epoch+1}/{num_epochs} | "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_accuracy": train_acc,
            "val_loss": val_loss,
            "val_accuracy": val_acc,
            "learning_rate": optimizer.param_groups[0]['lr']
        })

        if scheduler:
            if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                scheduler.step(val_loss)
            else:
                scheduler.step()

    wandb.finish()

# SqueezeNet

We will be using the SqueezeNet architecture on our dataset. SqueezeNet is a convolutional neural network architecture designed for efficient deep learning, introduced in the paper ["SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size"](https://arxiv.org/abs/1602.07360).

The architecture we will use is essentially the same as the PyTorch implementation found [here](https://github.com/pytorch/vision/blob/6db1569c89094cf23f3bc41f79275c45e9fcb3f3/torchvision/models/squeezenet.py#L37), but it has been modified to handle grayscale images as input.

In [None]:
# import sys
# sys.path.append("/content/drive/MyDrive/Colab Notebooks/Facial Expression Recognition Challenge")
import torch
import torch.nn as nn
import torch.optim as optim
from models.squeezenet import squeezenet1_1


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = squeezenet1_1(pretrained=False, num_classes=7)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2)

train_model(model, criterion, optimizer, device, train_loader_t, val_loader_t,
            num_epochs=20, scheduler=scheduler, group_name='SqueezeNet', run_name="squeezenet_1x48x48_03")



Epoch 1/20 | Train Loss: 1.8484, Train Acc: 0.2468 | Val Loss: 1.8298, Val Acc: 0.2513
Epoch 2/20 | Train Loss: 1.8268, Train Acc: 0.2532 | Val Loss: 1.8193, Val Acc: 0.2579
Epoch 3/20 | Train Loss: 1.7597, Train Acc: 0.2884 | Val Loss: 1.6698, Val Acc: 0.3238
Epoch 4/20 | Train Loss: 1.6821, Train Acc: 0.3227 | Val Loss: 1.6351, Val Acc: 0.3551
Epoch 5/20 | Train Loss: 1.6349, Train Acc: 0.3522 | Val Loss: 1.6321, Val Acc: 0.3483
Epoch 6/20 | Train Loss: 1.6026, Train Acc: 0.3650 | Val Loss: 1.5202, Val Acc: 0.3990
Epoch 7/20 | Train Loss: 1.5692, Train Acc: 0.3831 | Val Loss: 1.5311, Val Acc: 0.4049
Epoch 8/20 | Train Loss: 1.5463, Train Acc: 0.3972 | Val Loss: 1.5212, Val Acc: 0.4117
Epoch 9/20 | Train Loss: 1.5212, Train Acc: 0.4066 | Val Loss: 1.4414, Val Acc: 0.4357
Epoch 10/20 | Train Loss: 1.5030, Train Acc: 0.4180 | Val Loss: 1.4205, Val Acc: 0.4485
Epoch 11/20 | Train Loss: 1.4781, Train Acc: 0.4269 | Val Loss: 1.4107, Val Acc: 0.4572
Epoch 12/20 | Train Loss: 1.4632, Train A

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▁▂▃▄▅▅▆▆▆▇▇▇▇▇█████
train_loss,██▇▆▅▄▄▄▃▃▂▂▂▂▂▂▁▁▁▁
val_accuracy,▁▁▃▄▄▅▅▆▆▇▇▇▇▇▇█████
val_loss,██▆▅▅▄▄▄▃▂▂▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
learning_rate,0.001
train_accuracy,0.47337
train_loss,1.3782
val_accuracy,0.49617
val_loss,1.31731


# ResNet

Based on the training of simpler models, it appears that they may not be sufficient for this task. As you add more layers to a plain neural network, performance often gets worse, not better. This isn't just due to overfitting—it’s an optimization issue: deeper networks are harder to train because gradients can vanish or explode, and useful signals can get lost. To address this, we will use the ResNet architecture, introduced in the paper ["Deep Residual Learning for Image Recognition"](https://arxiv.org/abs/1512.03385), which utilizes techniques called residual connections. These connections help mitigate the training difficulties of deep networks and improve performance.

In [None]:
import torch
import torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights

def get_resnet18_for_grayscale(num_classes, pretrained=True):
    weights = ResNet18_Weights.DEFAULT if pretrained else None
    model = resnet18(weights=weights)
    original_conv = model.conv1
    model.conv1 = nn.Conv2d(
        in_channels=1,
        out_channels=original_conv.out_channels,
        kernel_size=original_conv.kernel_size,
        stride=original_conv.stride,
        padding=original_conv.padding,
        bias=original_conv.bias is not None,
    )
    if pretrained:
        with torch.no_grad():
            model.conv1.weight = nn.Parameter(original_conv.weight.sum(dim=1, keepdim=True))

    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model


First, I will train with the pre-trained model with augmented data, and then without augmentation.

In [None]:
from torch import optim
import torch.nn as nn

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 7

model = get_resnet18_for_grayscale(num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

train_model(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=10,
    scheduler=scheduler,
    project_name="fer-cnn",
    group_name="resnet-gray",
    run_name="resnet18-gray"
)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 136MB/s]


Epoch 1/10 | Train Loss: 1.4775, Train Acc: 0.4418 | Val Loss: 1.2416, Val Acc: 0.5270
Epoch 2/10 | Train Loss: 1.0068, Train Acc: 0.6295 | Val Loss: 1.1985, Val Acc: 0.5632
Epoch 3/10 | Train Loss: 0.6564, Train Acc: 0.7650 | Val Loss: 1.2750, Val Acc: 0.5730
Epoch 4/10 | Train Loss: 0.3576, Train Acc: 0.8818 | Val Loss: 1.4644, Val Acc: 0.5709
Epoch 5/10 | Train Loss: 0.2132, Train Acc: 0.9335 | Val Loss: 1.6660, Val Acc: 0.5834
Epoch 6/10 | Train Loss: 0.0998, Train Acc: 0.9754 | Val Loss: 1.5679, Val Acc: 0.5925
Epoch 7/10 | Train Loss: 0.0529, Train Acc: 0.9910 | Val Loss: 1.5824, Val Acc: 0.5970
Epoch 8/10 | Train Loss: 0.0374, Train Acc: 0.9943 | Val Loss: 1.6135, Val Acc: 0.5967
Epoch 9/10 | Train Loss: 0.0301, Train Acc: 0.9955 | Val Loss: 1.6446, Val Acc: 0.5975
Epoch 10/10 | Train Loss: 0.0252, Train Acc: 0.9962 | Val Loss: 1.6572, Val Acc: 0.6021


0,1
epoch,▁▂▃▃▄▅▆▆▇█
learning_rate,█████▁▁▁▁▁
train_accuracy,▁▃▅▇▇█████
train_loss,█▆▄▃▂▁▁▁▁▁
val_accuracy,▁▄▅▅▆▇█▇██
val_loss,▂▁▂▅█▇▇▇██

0,1
epoch,10.0
learning_rate,1e-05
train_accuracy,0.99621
train_loss,0.02519
val_accuracy,0.60206
val_loss,1.65715


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 7

model = get_resnet18_for_grayscale(num_classes=num_classes, pretrained=True).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

train_model(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    train_loader=train_loader_t,
    val_loader=val_loader,
    num_epochs=10,
    scheduler=scheduler,
    project_name="fer-cnn",
    group_name="resnet-gray",
    run_name="resnet18-gray-01"
)

Epoch 1/10 | Train Loss: 1.6011, Train Acc: 0.3816 | Val Loss: 1.4090, Val Acc: 0.4765
Epoch 2/10 | Train Loss: 1.3444, Train Acc: 0.4867 | Val Loss: 1.2665, Val Acc: 0.5174
Epoch 3/10 | Train Loss: 1.2392, Train Acc: 0.5295 | Val Loss: 1.1739, Val Acc: 0.5566
Epoch 4/10 | Train Loss: 1.1680, Train Acc: 0.5557 | Val Loss: 1.1306, Val Acc: 0.5735
Epoch 5/10 | Train Loss: 1.1229, Train Acc: 0.5774 | Val Loss: 1.1012, Val Acc: 0.5817
Epoch 6/10 | Train Loss: 1.0370, Train Acc: 0.6085 | Val Loss: 1.0680, Val Acc: 0.5909
Epoch 7/10 | Train Loss: 1.0103, Train Acc: 0.6169 | Val Loss: 1.0654, Val Acc: 0.5947
Epoch 8/10 | Train Loss: 0.9900, Train Acc: 0.6262 | Val Loss: 1.0698, Val Acc: 0.5920
Epoch 9/10 | Train Loss: 0.9790, Train Acc: 0.6330 | Val Loss: 1.0608, Val Acc: 0.6005
Epoch 10/10 | Train Loss: 0.9630, Train Acc: 0.6380 | Val Loss: 1.0645, Val Acc: 0.5968


0,1
epoch,▁▂▃▃▄▅▆▆▇█
learning_rate,█████▁▁▁▁▁
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▃▆▆▇▇████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
learning_rate,1e-05
train_accuracy,0.638
train_loss,0.96296
val_accuracy,0.59683
val_loss,1.06446


Now without pretrained model.

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 7

model = get_resnet18_for_grayscale(num_classes=num_classes, pretrained=False).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

train_model(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=10,
    scheduler=scheduler,
    project_name="fer-cnn",
    group_name="resnet-gray",
    run_name="resnet18-gray-03"
)

Epoch 1/10 | Train Loss: 1.6143, Train Acc: 0.3654 | Val Loss: 1.5182, Val Acc: 0.4040
Epoch 2/10 | Train Loss: 1.2925, Train Acc: 0.5148 | Val Loss: 1.4901, Val Acc: 0.4411
Epoch 3/10 | Train Loss: 0.9384, Train Acc: 0.6569 | Val Loss: 1.5551, Val Acc: 0.4657
Epoch 4/10 | Train Loss: 0.5454, Train Acc: 0.8098 | Val Loss: 1.8553, Val Acc: 0.4601
Epoch 5/10 | Train Loss: 0.2866, Train Acc: 0.9040 | Val Loss: 2.2236, Val Acc: 0.4556
Epoch 6/10 | Train Loss: 0.1061, Train Acc: 0.9729 | Val Loss: 2.1647, Val Acc: 0.4648
Epoch 7/10 | Train Loss: 0.0483, Train Acc: 0.9925 | Val Loss: 2.1912, Val Acc: 0.4657
Epoch 8/10 | Train Loss: 0.0337, Train Acc: 0.9953 | Val Loss: 2.2585, Val Acc: 0.4687
Epoch 9/10 | Train Loss: 0.0265, Train Acc: 0.9959 | Val Loss: 2.2933, Val Acc: 0.4683
Epoch 10/10 | Train Loss: 0.0234, Train Acc: 0.9960 | Val Loss: 2.3453, Val Acc: 0.4646


0,1
epoch,▁▂▃▃▄▅▆▆▇█
learning_rate,█████▁▁▁▁▁
train_accuracy,▁▃▄▆▇█████
train_loss,█▇▅▃▂▁▁▁▁▁
val_accuracy,▁▅█▇▇█████
val_loss,▁▁▂▄▇▇▇▇██

0,1
epoch,10.0
learning_rate,1e-05
train_accuracy,0.99595
train_loss,0.02342
val_accuracy,0.46465
val_loss,2.34529


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 7

model = get_resnet18_for_grayscale(num_classes=num_classes, pretrained=False).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

train_model(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    train_loader=train_loader_t,
    val_loader=val_loader_t,
    num_epochs=10,
    scheduler=scheduler,
    project_name="fer-cnn",
    group_name="resnet-gray",
    run_name="resnet18-gray-04"
)

Epoch 1/10 | Train Loss: 1.7207, Train Acc: 0.3086 | Val Loss: 1.5863, Val Acc: 0.3772
Epoch 2/10 | Train Loss: 1.5967, Train Acc: 0.3728 | Val Loss: 1.5043, Val Acc: 0.4235
Epoch 3/10 | Train Loss: 1.5303, Train Acc: 0.4080 | Val Loss: 1.4266, Val Acc: 0.4559
Epoch 4/10 | Train Loss: 1.4751, Train Acc: 0.4284 | Val Loss: 1.3878, Val Acc: 0.4687
Epoch 5/10 | Train Loss: 1.4298, Train Acc: 0.4480 | Val Loss: 1.3320, Val Acc: 0.4915
Epoch 6/10 | Train Loss: 1.3719, Train Acc: 0.4731 | Val Loss: 1.3052, Val Acc: 0.5063
Epoch 7/10 | Train Loss: 1.3555, Train Acc: 0.4785 | Val Loss: 1.2928, Val Acc: 0.5115
Epoch 8/10 | Train Loss: 1.3469, Train Acc: 0.4836 | Val Loss: 1.2871, Val Acc: 0.5153
Epoch 9/10 | Train Loss: 1.3375, Train Acc: 0.4890 | Val Loss: 1.2838, Val Acc: 0.5167
Epoch 10/10 | Train Loss: 1.3206, Train Acc: 0.4927 | Val Loss: 1.2788, Val Acc: 0.5193


0,1
epoch,▁▂▃▃▄▅▆▆▇█
learning_rate,█████▁▁▁▁▁
train_accuracy,▁▃▅▆▆▇▇███
train_loss,█▆▅▄▃▂▂▁▁▁
val_accuracy,▁▃▅▆▇▇████
val_loss,█▆▄▃▂▂▁▁▁▁

0,1
epoch,10.0
learning_rate,1e-05
train_accuracy,0.49266
train_loss,1.32059
val_accuracy,0.51933
val_loss,1.27875
