<a href="https://colab.research.google.com/github/Arvind-K1/EmoNeXt-Facial-Emotion-Detection/blob/main/EmoNeXt_New.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import kagglehub

path = kagglehub.dataset_download("msambare/fer2013")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/msambare/fer2013/versions/1


In [23]:
train_dir = "/root/.cache/kagglehub/datasets/msambare/fer2013/versions/1/train" #passing the path with training images
test_dir = "/root/.cache/kagglehub/datasets/msambare/fer2013/versions/1/test"

In [24]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm

import os
import torch.nn.functional as F
from timm.models.layers import trunc_normal_, DropPath
from timm.models.registry import register_model

from torch.optim.lr_scheduler import CosineAnnealingLR


In [25]:
class LayerNorm(nn.Module):
    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. """
    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
        super().__init__()
        self.weight = nn.Parameter(torch.ones(normalized_shape))
        self.bias = nn.Parameter(torch.zeros(normalized_shape))
        self.eps = eps
        self.data_format = data_format
        if self.data_format not in ["channels_last", "channels_first"]:
            raise NotImplementedError
        self.normalized_shape = (normalized_shape,)

    def forward(self, x):
        if self.data_format == "channels_last":
            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
        elif self.data_format == "channels_first":
            u = x.mean(1, keepdim=True)
            s = (x - u).pow(2).mean(1, keepdim=True)
            x = (x - u) / torch.sqrt(s + self.eps)
            x = self.weight[:, None, None] * x + self.bias[:, None, None]
            return x

class Block(nn.Module):
    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
        super().__init__()
        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim)
        self.norm = LayerNorm(dim, eps=1e-6)
        self.pwconv1 = nn.Linear(dim, 4 * dim)
        self.act = nn.GELU()
        self.pwconv2 = nn.Linear(4 * dim, dim)
        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)), requires_grad=True) if layer_scale_init_value > 0 else None
        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()

    def forward(self, x):
        input = x
        x = self.dwconv(x)
        x = x.permute(0, 2, 3, 1)
        x = self.norm(x)
        x = self.pwconv1(x)
        x = self.act(x)
        x = self.pwconv2(x)
        if self.gamma is not None:
            x = self.gamma * x
        x = x.permute(0, 3, 1, 2)
        x = input + self.drop_path(x)
        return x

In [26]:
class SpatialTransformer(nn.Module):
    def __init__(self):
        super(SpatialTransformer, self).__init__()
        self.localization = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=7),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),
            nn.Conv2d(8, 10, kernel_size=5),
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2)
        )

        # Calculate the input size dynamically
        # Assuming an input image size of 224x224
        dummy_input = torch.randn(1, 1, 224, 224)  # (batch_size, channels, height, width)
        dummy_output = self.localization(dummy_input)
        fc_input_size = dummy_output.shape[1] * dummy_output.shape[2] * dummy_output.shape[3]

        self.fc_loc = nn.Sequential(
            nn.Linear(fc_input_size, 32), # Change input size here
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
        )

        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

    def forward(self, x):
        xs = self.localization(x)
        # Reshape dynamically
        xs = xs.view(xs.size(0), -1)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)
        grid = nn.functional.affine_grid(theta, x.size())
        x = nn.functional.grid_sample(x, grid)
        return x

In [27]:

# Squeeze-and-Excitation Block
class SEBlock(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SEBlock, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

In [28]:


# EmoNeXt Architecture
class EmoNeXt(nn.Module):
    def __init__(self, in_chans=1, num_classes=7,
                 depths=[3, 3, 9, 3], dims=[64, 128, 256, 512],
                 drop_path_rate=0.1, layer_scale_init_value=1e-6, head_init_scale=1.):
        super().__init__()

        self.stn = SpatialTransformer()
        self.downsample_layers = nn.ModuleList()
        stem = nn.Sequential(
            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
        )
        self.downsample_layers.append(stem)
        for i in range(3):
            downsample_layer = nn.Sequential(
                LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
                nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
            )
            self.downsample_layers.append(downsample_layer)

        self.stages = nn.ModuleList()
        dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
        cur = 0
        for i in range(4):
            stage = nn.Sequential(
                *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
                        layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])],
                SEBlock(dims[i])
            )
            self.stages.append(stage)
            cur += depths[i]

        self.norm = nn.LayerNorm(dims[-1], eps=1e-6)
        self.head = nn.Linear(dims[-1], num_classes)

        self.apply(self._init_weights)
        self.head.weight.data.mul_(head_init_scale)
        self.head.bias.data.mul_(head_init_scale)

    def _init_weights(self, m):
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            trunc_normal_(m.weight, std=.02)
            if m.bias is not None:  # Check if bias exists before initializing
                nn.init.constant_(m.bias, 0)

    def forward_features(self, x):
        x = self.stn(x)
        for i in range(4):
            x = self.downsample_layers[i](x)
            x = self.stages[i](x)
        return self.norm(x.mean([-2, -1]))

    def forward(self, x):
        x = self.forward_features(x)
        x = self.head(x)
        return x


In [29]:

# Training and Evaluation Pipeline
def train_and_evaluate(model, train_loader, test_loader, criterion, optimizer, scheduler, num_epochs, device):
    best_test_accuracy = 0
    for epoch in range(num_epochs):
        # Training Phase
        model.train()
        running_loss = 0.0
        train_correct = 0
        train_total = 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Training]"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()

        scheduler.step()
        train_loss = running_loss / len(train_loader)
        train_accuracy = 100. * train_correct / train_total

        # Evaluation Phase
        model.eval()
        test_correct = 0
        test_total = 0

        with torch.no_grad():
            for images, labels in tqdm(test_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Evaluation]"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = outputs.max(1)
                test_total += labels.size(0)
                test_correct += predicted.eq(labels).sum().item()

        test_accuracy = 100. * test_correct / test_total
        if test_accuracy > best_test_accuracy:
            best_test_accuracy = test_accuracy

        print(f"Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Test Acc: {test_accuracy:.2f}%")

    print(f"Best Test Accuracy: {best_test_accuracy:.2f}%")

In [35]:

# Hyperparameters
batch_size = 64
learning_rate = 1e-4
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [36]:


# Transformations
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((224, 224)),  # Resize for pretrained weights
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])


In [37]:

# Datasets and Dataloaders
train_dataset = ImageFolder(train_dir, transform=transform)
test_dataset = ImageFolder(test_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [38]:

# Model, Loss, Optimizer, and Scheduler
model = EmoNeXt(in_chans=1, num_classes=7)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)

In [39]:

# Train and Evaluate
train_and_evaluate(model, train_loader, test_loader, criterion, optimizer, scheduler, num_epochs, device)


Epoch 1/10 [Training]: 100%|██████████| 449/449 [04:14<00:00,  1.77it/s]
Epoch 1/10 [Evaluation]: 100%|██████████| 113/113 [00:23<00:00,  4.81it/s]


Epoch 1/10: Train Loss: 1.8251, Train Acc: 23.99%, Test Acc: 24.69%


Epoch 2/10 [Training]: 100%|██████████| 449/449 [04:12<00:00,  1.78it/s]
Epoch 2/10 [Evaluation]: 100%|██████████| 113/113 [00:23<00:00,  4.88it/s]


Epoch 2/10: Train Loss: 1.8054, Train Acc: 25.09%, Test Acc: 24.63%


Epoch 3/10 [Training]: 100%|██████████| 449/449 [04:13<00:00,  1.77it/s]
Epoch 3/10 [Evaluation]: 100%|██████████| 113/113 [00:23<00:00,  4.82it/s]


Epoch 3/10: Train Loss: 1.7888, Train Acc: 25.25%, Test Acc: 26.76%


Epoch 4/10 [Training]: 100%|██████████| 449/449 [04:13<00:00,  1.77it/s]
Epoch 4/10 [Evaluation]: 100%|██████████| 113/113 [00:23<00:00,  4.85it/s]


Epoch 4/10: Train Loss: 1.7506, Train Acc: 27.48%, Test Acc: 28.91%


Epoch 5/10 [Training]: 100%|██████████| 449/449 [04:13<00:00,  1.77it/s]
Epoch 5/10 [Evaluation]: 100%|██████████| 113/113 [00:23<00:00,  4.82it/s]


Epoch 5/10: Train Loss: 1.7220, Train Acc: 29.16%, Test Acc: 29.52%


Epoch 6/10 [Training]: 100%|██████████| 449/449 [04:13<00:00,  1.77it/s]
Epoch 6/10 [Evaluation]: 100%|██████████| 113/113 [00:23<00:00,  4.85it/s]


Epoch 6/10: Train Loss: 1.6975, Train Acc: 30.96%, Test Acc: 32.08%


Epoch 7/10 [Training]: 100%|██████████| 449/449 [04:13<00:00,  1.77it/s]
Epoch 7/10 [Evaluation]: 100%|██████████| 113/113 [00:23<00:00,  4.82it/s]


Epoch 7/10: Train Loss: 1.6693, Train Acc: 33.13%, Test Acc: 33.38%


Epoch 8/10 [Training]: 100%|██████████| 449/449 [04:13<00:00,  1.77it/s]
Epoch 8/10 [Evaluation]: 100%|██████████| 113/113 [00:23<00:00,  4.79it/s]


Epoch 8/10: Train Loss: 1.6500, Train Acc: 33.96%, Test Acc: 34.19%


Epoch 9/10 [Training]: 100%|██████████| 449/449 [04:13<00:00,  1.77it/s]
Epoch 9/10 [Evaluation]: 100%|██████████| 113/113 [00:23<00:00,  4.83it/s]


Epoch 9/10: Train Loss: 1.6376, Train Acc: 34.63%, Test Acc: 35.05%


Epoch 10/10 [Training]: 100%|██████████| 449/449 [04:13<00:00,  1.77it/s]
Epoch 10/10 [Evaluation]: 100%|██████████| 113/113 [00:23<00:00,  4.83it/s]

Epoch 10/10: Train Loss: 1.6321, Train Acc: 35.34%, Test Acc: 35.32%
Best Test Accuracy: 35.32%



