# 🧠 Model 3: CNN with Global Average Pooling
Improved architecture for stability and better learning.

In [1]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [2]:

# Dataset paths and transforms
train_dir = 'Data/train'
test_dir = 'Data/test'

train_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(5),
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

# Load datasets
full_dataset = datasets.ImageFolder(root=train_dir, transform=train_transform)
val_size = int(0.2 * len(full_dataset))
train_size = len(full_dataset) - val_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
val_dataset.dataset.transform = test_transform
test_dataset = datasets.ImageFolder(root=test_dir, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

class_names = full_dataset.classes


In [3]:

class SpatialAttention(nn.Module):
    def __init__(self):
        super(SpatialAttention, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3)

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x_cat = torch.cat([avg_out, max_out], dim=1)
        attention = torch.sigmoid(self.conv(x_cat))
        return x * attention


In [4]:

class SlimAttentionCNN(nn.Module):
    def __init__(self):
        super(SlimAttentionCNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.attention = SpatialAttention()
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(128, 7)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.attention(x)
        x = self.global_pool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        return self.fc(x)

model = SlimAttentionCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)


In [5]:

# Training loop
num_epochs = 25
train_losses = []
val_accuracies = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    scheduler.step()
    epoch_loss = running_loss / len(train_loader)
    train_losses.append(epoch_loss)

    # Validation
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())

    val_acc = accuracy_score(all_labels, all_preds)
    val_accuracies.append(val_acc)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Val Accuracy: {val_acc:.4f}")


Epoch [1/25], Loss: 1.7829, Val Accuracy: 0.2639
Epoch [2/25], Loss: 1.7341, Val Accuracy: 0.2881
Epoch [3/25], Loss: 1.6914, Val Accuracy: 0.3026
Epoch [4/25], Loss: 1.6585, Val Accuracy: 0.3235
Epoch [5/25], Loss: 1.6311, Val Accuracy: 0.2979
Epoch [6/25], Loss: 1.5964, Val Accuracy: 0.3418
Epoch [7/25], Loss: 1.5753, Val Accuracy: 0.3888
Epoch [8/25], Loss: 1.5542, Val Accuracy: 0.3862
Epoch [9/25], Loss: 1.5399, Val Accuracy: 0.3862
Epoch [10/25], Loss: 1.5193, Val Accuracy: 0.4210
Epoch [11/25], Loss: 1.4948, Val Accuracy: 0.4304
Epoch [12/25], Loss: 1.4857, Val Accuracy: 0.4348
Epoch [13/25], Loss: 1.4766, Val Accuracy: 0.4450
Epoch [14/25], Loss: 1.4660, Val Accuracy: 0.4396
Epoch [15/25], Loss: 1.4562, Val Accuracy: 0.4452
Epoch [16/25], Loss: 1.4435, Val Accuracy: 0.4515
Epoch [17/25], Loss: 1.4389, Val Accuracy: 0.4546
Epoch [18/25], Loss: 1.4342, Val Accuracy: 0.4569
Epoch [19/25], Loss: 1.4280, Val Accuracy: 0.4511
Epoch [20/25], Loss: 1.4247, Val Accuracy: 0.4609
Epoch [21

In [6]:

# Test set evaluation
model.eval()
test_preds, test_labels = [], []
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        test_preds.extend(predicted.cpu().numpy())
        test_labels.extend(labels.numpy())

test_acc = accuracy_score(test_labels, test_preds)
print(f"Test Accuracy: {test_acc:.4f}")


Test Accuracy: 0.4538


In [12]:

# Predict on custom image
test_image_path = 'hmy_image3.jpg'

image = Image.open(test_image_path).convert('L').convert('RGB')
image = test_transform(image).unsqueeze(0).to(device)

model.eval()
with torch.no_grad():
    output = model(image)
    probs = torch.softmax(output, dim=1)
    _, predicted = torch.max(probs, 1)
    predicted_class = class_names[predicted.item()]
    confidence = probs[0][predicted.item()].item()

print(f"Predicted Emotion: {predicted_class} ({confidence*100:.2f}% confidence)")


Predicted Emotion: happy (29.81% confidence)
