In [1]:
from google.colab import drive
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install wandb torch torchvision pandas numpy matplotlib seaborn
!pip install kaggle




In [3]:
#kaggle.json file
from google.colab import files
files.upload()


Saving kaggle.json to kaggle (2).json


{'kaggle (2).json': b'{"username":"tamaritoradze","key":"32312dbeeecea092539f236f63a7460a"}'}

In [4]:

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json



In [5]:
!kaggle config view



Configuration values from /root/.kaggle
- username: tamaritoradze
- path: None
- proxy: None
- competition: None


In [6]:
!kaggle datasets download -d msambare/fer2013
!unzip fer2013.zip
!ls -la



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: train/sad/Training_65505359.jpg  
  inflating: train/sad/Training_65508578.jpg  
  inflating: train/sad/Training_65516023.jpg  
  inflating: train/sad/Training_65524027.jpg  
  inflating: train/sad/Training_65526454.jpg  
  inflating: train/sad/Training_65531175.jpg  
  inflating: train/sad/Training_65552921.jpg  
  inflating: train/sad/Training_65552938.jpg  
  inflating: train/sad/Training_65563105.jpg  
  inflating: train/sad/Training_6558295.jpg  
  inflating: train/sad/Training_65586139.jpg  
  inflating: train/sad/Training_65591252.jpg  
  inflating: train/sad/Training_65626778.jpg  
  inflating: train/sad/Training_65701260.jpg  
  inflating: train/sad/Training_65707197.jpg  
  inflating: train/sad/Training_65720898.jpg  
  inflating: train/sad/Training_6573454.jpg  
  inflating: train/sad/Training_65749670.jpg  
  inflating: train/sad/Training_65756896.jpg  
  inflating: train/sad/Training_65792953.jpg

In [7]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mttora21[0m ([33mttora21-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [8]:
import os

# List contents of the train and test directories
print("Train directory structure:")
print(os.listdir("train"))

print("\nTest directory structure:")
print(os.listdir("test"))


Train directory structure:
['sad', 'neutral', 'happy', 'fear', 'angry', 'disgust', 'surprise']

Test directory structure:
['sad', 'neutral', 'happy', 'fear', 'angry', 'disgust', 'surprise']


In [9]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

def load_image_paths_and_labels(base_dir):
    data = []
    for label in os.listdir(base_dir):
        label_dir = os.path.join(base_dir, label)
        if os.path.isdir(label_dir):
            for img_file in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_file)
                data.append((img_path, label))
    return pd.DataFrame(data, columns=["image_path", "emotion"])

# Load full training and test sets
train_full_df = load_image_paths_and_labels("train")
test_df = load_image_paths_and_labels("test")

# Split a validation set from the train set (e.g., 10% for validation)
train_df, val_df = train_test_split(train_full_df, test_size=0.1, stratify=train_full_df["emotion"], random_state=42)

# Check the results
print("Train set:", train_df.shape)
print("Validation set:", val_df.shape)
print("Test set:", test_df.shape)


Train set: (25838, 2)
Validation set: (2871, 2)
Test set: (7178, 2)


In [10]:
from torch.utils.data import Dataset, DataLoader
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
print("Using device:", device)

Using device: cuda


In [12]:
print(train_df.columns)


Index(['image_path', 'emotion'], dtype='object')


In [13]:
print(train_df['emotion'].unique())

['sad' 'fear' 'angry' 'neutral' 'happy' 'surprise' 'disgust']


In [14]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os

def enhance_dataset_distribution(df, desired_samples):
    """
    Enhances dataset by sampling from underrepresented classes to reach desired_samples.
    Adds a flag 'needs_augmentation' to indicate which samples were duplicated
    and should be augmented later.
    """
    dataset_parts = [df]
    df['needs_augmentation'] = False

    for emotion_type, group in df.groupby('emotion'):
        current_samples = len(group)
        if current_samples < desired_samples:
            additional = group.sample(n=desired_samples-current_samples, replace=True).copy()
            additional['needs_augmentation'] = True
            dataset_parts.append(additional)

    return pd.concat(dataset_parts, ignore_index=True)


class EmotionImageDataset(Dataset):
    def __init__(self, dataframe, image_size=(48, 48)):
        self.image_paths = dataframe['image_path'].values
        self.emotion_labels = dataframe['emotion'].values
        if 'needs_augmentation' in dataframe.columns:
            self.needs_augmentation = dataframe['needs_augmentation'].values
        else:
            self.needs_augmentation = np.zeros(len(dataframe), dtype=bool)

        self.image_size = image_size
        self.emotion_map = {
            'sad': 0, 'fear': 1, 'angry': 2, 'neutral': 3,
            'happy': 4, 'surprise': 5, 'disgust': 6
        }

    def __len__(self):
        return len(self.emotion_labels)

    def modify_brightness(self, img):
        """Applies random brightness adjustment to a PIL Image."""
        brightness_change = np.random.uniform(0.9, 1.1)
        modified = img.point(lambda p: p * brightness_change)
        return modified

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        if not os.path.exists(img_path):
            if not isinstance(img_path, str):
                 raise TypeError(f"Expected image path as string, but got type: {type(img_path)}. Ensure DataFrame 'image_path' column contains file paths.")
            else:
                raise FileNotFoundError(f"Image not found at path: {img_path}")


        img = Image.open(img_path).convert('L')
        img = img.resize(self.image_size)

        if self.needs_augmentation[idx]:
            img = self.modify_brightness(img)

        img_array = np.array(img, dtype=np.float32) / 255.0
        img_tensor = torch.from_numpy(img_array).unsqueeze(0)

        label = self.emotion_map[self.emotion_labels[idx]]
        return img_tensor, label

def create_data_loaders(train_df, val_df, test_df, batch_size=256):
    max_samples = train_df['emotion'].value_counts().max()

    balanced_train = enhance_dataset_distribution(train_df.copy(), max_samples) # Use a copy to avoid modifying the original train_df
    train_dataset = EmotionImageDataset(balanced_train)
    val_dataset = EmotionImageDataset(val_df)
    test_dataset = EmotionImageDataset(test_df)

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=2
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=2
    )

    return train_loader, val_loader, test_loader

In [15]:
import torch
from tqdm import tqdm
import wandb
from sklearn.metrics import confusion_matrix, f1_score
import matplotlib.pyplot as plt
import numpy as np

def execute_training_pipeline(
    model,
    training_dataloader,
    validation_dataloader,
    loss_function,
    optimizer,
    device,
    training_epochs=5,
    emotion_labels=None
):
    """
    Training loop with W&B logging and visualizations.
    Returns trained model and W&B run object.
    """
    # Initialize W&B
    run = wandb.init(
        project="Emotion_Recognition",
        config={
            "epochs": training_epochs,
            "batch_size": training_dataloader.batch_size,
            "optimizer_type": optimizer.__class__.__name__,
            "learning_rate": optimizer.param_groups[0]["lr"],
            "loss_function": loss_function.__class__.__name__,
        }
    )

    wandb.watch(model, log="all", log_freq=100)
    model.to(device)

    training_metrics = {
        'loss': [], 'accuracy': [],
        'val_loss': [], 'val_accuracy': []
    }

    def process_batch(batch_data, batch_labels, is_training=True):
        batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)

        if is_training:
            optimizer.zero_grad()

        predictions = model(batch_data)
        batch_loss = loss_function(predictions, batch_labels)

        if is_training:
            batch_loss.backward()
            optimizer.step()

        predicted_labels = predictions.argmax(dim=1)
        correct_predictions = predicted_labels.eq(batch_labels).sum().item()

        return batch_loss.item() * batch_data.size(0), correct_predictions, predicted_labels

    def create_confusion_matrix_figure(matrix, epoch):
        fig, ax = plt.subplots(figsize=(8, 8))
        im = ax.imshow(matrix, cmap='YlOrRd')
        fig.colorbar(im, ax=ax)

        ax.set_xticks(np.arange(len(emotion_labels)))
        ax.set_yticks(np.arange(len(emotion_labels)))
        ax.set_xticklabels(emotion_labels, rotation=45, ha='right')
        ax.set_yticklabels(emotion_labels)

        for i in range(len(emotion_labels)):
            for j in range(len(emotion_labels)):
                ax.text(j, i, matrix[i, j], ha='center', va='center', color='black')

        ax.set_title(f'Confusion Matrix - Epoch {epoch}')
        ax.set_xlabel('Predicted')
        ax.set_ylabel('Actual')
        fig.tight_layout()
        return fig

    for epoch in range(1, training_epochs + 1):
        model.train()
        epoch_stats = {'loss': 0, 'correct': 0, 'total': 0}

        for data, labels in tqdm(training_dataloader, desc=f"Training Epoch {epoch}"):
            loss, correct, _ = process_batch(data, labels)
            epoch_stats['loss'] += loss
            epoch_stats['correct'] += correct
            epoch_stats['total'] += labels.size(0)

        epoch_loss = epoch_stats['loss'] / epoch_stats['total']
        epoch_accuracy = epoch_stats['correct'] / epoch_stats['total']

        model.eval()
        val_stats = {'loss': 0, 'correct': 0, 'total': 0}
        all_preds, all_targets = [], []

        with torch.no_grad():
            for data, labels in tqdm(validation_dataloader, desc=f"Validation Epoch {epoch}"):
                loss, correct, preds = process_batch(data, labels, is_training=False)
                val_stats['loss'] += loss
                val_stats['correct'] += correct
                val_stats['total'] += labels.size(0)
                all_preds.extend(preds.cpu().numpy())
                all_targets.extend(labels.cpu().numpy())

        val_loss = val_stats['loss'] / val_stats['total']
        val_accuracy = val_stats['correct'] / val_stats['total']
        conf_matrix = confusion_matrix(all_targets, all_preds)
        f1_scores = f1_score(all_targets, all_preds, average=None)

        metrics = {
            'training_loss': epoch_loss,
            'training_accuracy': epoch_accuracy,
            'validation_loss': val_loss,
            'validation_accuracy': val_accuracy,
        }

        if emotion_labels:
            for i, label in enumerate(emotion_labels):
                metrics[f"f1_score_{label}"] = f1_scores[i]

        wandb.log(metrics, step=epoch)

        fig = create_confusion_matrix_figure(conf_matrix, epoch)
        wandb.log({"confusion_matrix": wandb.Image(fig)}, step=epoch)
        plt.close(fig)

        training_metrics['loss'].append(epoch_loss)
        training_metrics['accuracy'].append(epoch_accuracy)
        training_metrics['val_loss'].append(val_loss)
        training_metrics['val_accuracy'].append(val_accuracy)

        print(f"Epoch {epoch}/{training_epochs}")
        print(f"Train Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")
        print(f"Val   Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")
        print("-" * 50)

    epochs = range(1, training_epochs + 1)

    loss_fig, ax1 = plt.subplots()
    ax1.plot(epochs, training_metrics['loss'], label='Training Loss')
    ax1.plot(epochs, training_metrics['val_loss'], label='Validation Loss')
    ax1.set_title('Loss Over Epochs')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    wandb.log({"loss_progression": wandb.Image(loss_fig)})
    plt.close(loss_fig)

    acc_fig, ax2 = plt.subplots()
    ax2.plot(epochs, training_metrics['accuracy'], label='Training Accuracy')
    ax2.plot(epochs, training_metrics['val_accuracy'], label='Validation Accuracy')
    ax2.set_title('Accuracy Over Epochs')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    ax2.legend()
    wandb.log({"accuracy_progression": wandb.Image(acc_fig)})
    plt.close(acc_fig)

    return model, run


In [16]:
import torch.nn as nn

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        self.layer1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.layer2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.layer3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.activation = nn.ReLU()

        self.flatten_layer = nn.Flatten()
        self.fc1 = nn.Linear(128 * 6 * 6, 128)
        self.fc2 = nn.Linear(128, 7)

    def forward(self, input_tensor):
        x = self.layer1(input_tensor)
        x = self.pool(x)
        x = self.activation(x)

        x = self.layer2(x)
        x = self.pool(x)
        x = self.activation(x)

        x = self.layer3(x)
        x = self.pool(x)
        x = self.activation(x)

        x = self.flatten_layer(x)
        x = self.fc1(x)
        x = self.fc2(x)

        return x

**მოდელი**

In [17]:
import torch
from torch import nn
import torch.optim

model = SimpleCNN()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

emotion_labels = ["Sad", "Fear", "Angry", "Neutral", "Happy", "Surprise", "Disgust"]


train_dl, val_dl, test_dl = create_data_loaders(train_df, val_df, test_df, batch_size=256)

trained_model, wandb_run = execute_training_pipeline(
    model=model,
    training_dataloader=train_dl,
    validation_dataloader=val_dl,
    loss_function=loss_function,
    optimizer=optimizer,
    device=device,
    training_epochs=10,
    emotion_labels=emotion_labels
)

wandb.finish()

print("Your W&B run:", wandb_run.get_url())

Training Epoch 1: 100%|██████████| 178/178 [00:12<00:00, 13.98it/s]
Validation Epoch 1: 100%|██████████| 12/12 [00:00<00:00, 17.21it/s]


Epoch 1/10
Train Loss: 1.7142, Accuracy: 0.3228
Val   Loss: 1.5399, Accuracy: 0.4114
--------------------------------------------------


Training Epoch 2: 100%|██████████| 178/178 [00:12<00:00, 14.19it/s]
Validation Epoch 2: 100%|██████████| 12/12 [00:00<00:00, 18.08it/s]


Epoch 2/10
Train Loss: 1.3072, Accuracy: 0.5093
Val   Loss: 1.4007, Accuracy: 0.4741
--------------------------------------------------


Training Epoch 3: 100%|██████████| 178/178 [00:12<00:00, 14.40it/s]
Validation Epoch 3: 100%|██████████| 12/12 [00:00<00:00, 18.06it/s]


Epoch 3/10
Train Loss: 1.0768, Accuracy: 0.5948
Val   Loss: 1.3365, Accuracy: 0.4953
--------------------------------------------------


Training Epoch 4: 100%|██████████| 178/178 [00:14<00:00, 12.64it/s]
Validation Epoch 4: 100%|██████████| 12/12 [00:00<00:00, 18.30it/s]


Epoch 4/10
Train Loss: 0.9377, Accuracy: 0.6483
Val   Loss: 1.3369, Accuracy: 0.5162
--------------------------------------------------


Training Epoch 5: 100%|██████████| 178/178 [00:11<00:00, 15.13it/s]
Validation Epoch 5: 100%|██████████| 12/12 [00:00<00:00, 12.20it/s]


Epoch 5/10
Train Loss: 0.8462, Accuracy: 0.6884
Val   Loss: 1.3755, Accuracy: 0.5152
--------------------------------------------------


Training Epoch 6: 100%|██████████| 178/178 [00:10<00:00, 16.24it/s]
Validation Epoch 6: 100%|██████████| 12/12 [00:01<00:00, 11.80it/s]


Epoch 6/10
Train Loss: 0.7670, Accuracy: 0.7209
Val   Loss: 1.3980, Accuracy: 0.5204
--------------------------------------------------


Training Epoch 7: 100%|██████████| 178/178 [00:11<00:00, 15.88it/s]
Validation Epoch 7: 100%|██████████| 12/12 [00:00<00:00, 16.76it/s]


Epoch 7/10
Train Loss: 0.6977, Accuracy: 0.7512
Val   Loss: 1.4318, Accuracy: 0.5197
--------------------------------------------------


Training Epoch 8: 100%|██████████| 178/178 [00:12<00:00, 14.77it/s]
Validation Epoch 8: 100%|██████████| 12/12 [00:00<00:00, 16.29it/s]


Epoch 8/10
Train Loss: 0.6369, Accuracy: 0.7748
Val   Loss: 1.4641, Accuracy: 0.5259
--------------------------------------------------


Training Epoch 9: 100%|██████████| 178/178 [00:12<00:00, 14.42it/s]
Validation Epoch 9: 100%|██████████| 12/12 [00:00<00:00, 16.98it/s]


Epoch 9/10
Train Loss: 0.5742, Accuracy: 0.7990
Val   Loss: 1.5781, Accuracy: 0.5141
--------------------------------------------------


Training Epoch 10: 100%|██████████| 178/178 [00:12<00:00, 13.93it/s]
Validation Epoch 10: 100%|██████████| 12/12 [00:00<00:00, 17.02it/s]


Epoch 10/10
Train Loss: 0.5300, Accuracy: 0.8174
Val   Loss: 1.6300, Accuracy: 0.5242
--------------------------------------------------


0,1
f1_score_Angry,▁▄▂█▇▇▆▇▆▆
f1_score_Disgust,▁▂▅▆▇███▇▇
f1_score_Fear,▁▄▆▆▆▇▆▇██
f1_score_Happy,▁▅▇███████
f1_score_Neutral,▁▃▃▆▆▇█▅▅▅
f1_score_Sad,▄▅▇▄▆█▅▇▁▇
f1_score_Surprise,▁▅▆▆▇▇█▇▅▇
training_accuracy,▁▄▅▆▆▇▇▇██
training_loss,█▆▄▃▃▂▂▂▁▁
validation_accuracy,▁▅▆▇▇███▇█

0,1
f1_score_Angry,0.39781
f1_score_Disgust,0.47059
f1_score_Fear,0.38375
f1_score_Happy,0.73958
f1_score_Neutral,0.4501
f1_score_Sad,0.41558
f1_score_Surprise,0.67114
training_accuracy,0.81739
training_loss,0.52997
validation_accuracy,0.52421




Your W&B run: https://wandb.ai/ttora21-free-university-of-tbilisi-/Emotion_Recognition/runs/dhn8f49y


**Enhancement**

In [18]:
class EnhancedNeuralNet(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super().__init__()
        self.feature_layers = nn.ModuleList([
            self._create_conv_block(1, 32),
            self._create_conv_block(32, 64),
            self._create_conv_block(64, 128),
            self._create_conv_block(128, 256)
        ])

        self.processing_layers = nn.ModuleList([
            self._create_conv_block(256, 512, pool=False),
            self._create_conv_block(512, 512, pool=False)
        ])

        self.classifier = nn.Sequential(
            nn.Flatten(),
            self._create_fc_block(512 * 3 * 3, 512, dropout_rate),
            self._create_fc_block(512, 256, dropout_rate),
            self._create_fc_block(256, 128, dropout_rate),
            self._create_fc_block(128, 64, dropout_rate),
            nn.Linear(64, 7)
        )

    def _create_conv_block(self, in_channels, out_channels, pool=True):
        layers = [
            nn.Conv2d(in_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Dropout2d(0.5)
        ]
        if pool:
            layers.insert(3, nn.MaxPool2d(2, 2))
        return nn.Sequential(*layers)

    def _create_fc_block(self, in_features, out_features, dropout_rate):
        return nn.Sequential(
            nn.Linear(in_features, out_features, bias=False),
            nn.BatchNorm1d(out_features),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate)
        )

    def forward(self, x):
        for layer in self.feature_layers:
            x = layer(x)

        for layer in self.processing_layers:
            x = layer(x)

        x = self.classifier(x)
        return x

**AdvancedNetwork**

In [19]:
class CustomBlock(nn.Module):
    multiplier = 1
    def __init__(self, input_ch, output_ch, stride=1, drop_prob=0.0):
        super().__init__()

        self.main_path = nn.Sequential(
            nn.Conv2d(input_ch, output_ch, 3, stride, 1, bias=False),
            nn.BatchNorm2d(output_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(output_ch, output_ch, 3, 1, 1, bias=False),
            nn.BatchNorm2d(output_ch)
        )

        self.skip_path = nn.Identity()
        if stride != 1 or input_ch != output_ch:
            self.skip_path = nn.Sequential(
                nn.Conv2d(input_ch, output_ch, 1, stride, bias=False),
                nn.BatchNorm2d(output_ch)
            )

        self.activation = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout2d(drop_prob) if drop_prob > 0 else nn.Identity()

    def forward(self, x):
        main = self.main_path(x)
        skip = self.skip_path(x)
        return self.dropout(self.activation(main + skip))

class AdvancedNetwork(nn.Module):
    def __init__(self, drop_prob=0.3):
        super().__init__()

        self.input_block = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1, 1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True)
        )

        self.blocks = nn.ModuleList([
            CustomBlock(32, 32, 1, drop_prob),
            CustomBlock(32, 64, 2, drop_prob),
            CustomBlock(64, 128, 2, drop_prob),
            CustomBlock(128, 256, 2, drop_prob)
        ])

        self.pool = nn.AdaptiveAvgPool2d((1, 1))

        self.classifier = nn.Sequential(
            nn.Flatten(),
            self._make_fc_block(256 * CustomBlock.multiplier, 512, drop_prob),
            self._make_fc_block(512, 256, drop_prob),
            self._make_fc_block(256, 128, drop_prob),
            nn.Linear(128, 7)
        )

    def _make_fc_block(self, in_features, out_features, drop_prob):
        return nn.Sequential(
            nn.Linear(in_features, out_features, bias=False),
            nn.BatchNorm1d(out_features),
            nn.ReLU(inplace=True),
            nn.Dropout(drop_prob)
        )

    def forward(self, x):
        x = self.input_block(x)
        for block in self.blocks:
            x = block(x)
        x = self.pool(x)
        x = self.classifier(x)
        return x

In [20]:
import torch
from torch import nn
import wandb
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

enhanced_model = EnhancedNeuralNet().to(device)
advanced_model = AdvancedNetwork().to(device)

batch_size = 256
learning_rate = 0.001
num_epochs = 40
emotion_labels = ["Sad", "Fear", "Angry", "Neutral", "Happy", "Surprise", "Disgust"]

criterion = nn.CrossEntropyLoss()
optimizer_enhanced = torch.optim.Adam(enhanced_model.parameters(), lr=learning_rate)
optimizer_advanced = torch.optim.Adam(advanced_model.parameters(), lr=learning_rate)

def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    val_loss = running_loss / len(dataloader)
    val_acc = 100 * correct / total
    return val_loss, val_acc, all_preds, all_labels

def plot_confusion_matrix(true_labels, predictions, class_names):
    cm = confusion_matrix(true_labels, predictions)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names,
                yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    return plt

def train_and_evaluate(model, train_dl, val_dl, criterion, optimizer, device,
                      num_epochs, model_name, emotion_labels):
    wandb.init(project="emotion_recognition", name=model_name)

    best_val_acc = 0.0

    for epoch in range(num_epochs):
        train_loss, train_acc = train_epoch(model, train_dl, criterion, optimizer, device)

        val_loss, val_acc, val_preds, val_labels = validate(model, val_dl, criterion, device)

        wandb.log({
            "epoch": epoch,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "val_loss": val_loss,
            "val_acc": val_acc
        })

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), f'best_{model_name}.pth')

        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

        if (epoch + 1) % 5 == 0:
            plt = plot_confusion_matrix(val_labels, val_preds, emotion_labels)
            wandb.log({f"confusion_matrix_epoch_{epoch+1}": wandb.Image(plt)})
            plt.close()

    return best_val_acc

train_dl, val_dl, test_dl = create_data_loaders(train_df, val_df, test_df, batch_size=batch_size)

print("Training Enhanced Model...")
enhanced_acc = train_and_evaluate(
    enhanced_model, train_dl, val_dl, criterion, optimizer_enhanced,
    device, num_epochs, "enhanced_model", emotion_labels
)
wandb.finish()

print("\nTraining Advanced Model...")
advanced_acc = train_and_evaluate(
    advanced_model, train_dl, val_dl, criterion, optimizer_advanced,
    device, num_epochs, "advanced_model", emotion_labels
)
wandb.finish()

print("\nFinal Results:")
print(f"Enhanced Model Best Validation Accuracy: {enhanced_acc:.2f}%")
print(f"Advanced Model Best Validation Accuracy: {advanced_acc:.2f}%")

def test_best_model(model, model_name, test_dl, criterion, device, emotion_labels):
    model.load_state_dict(torch.load(f'best_{model_name}.pth'))
    test_loss, test_acc, test_preds, test_labels = validate(model, test_dl, criterion, device)

    print(f"\n{model_name} Test Results:")
    print(f"Test Accuracy: {test_acc:.2f}%")
    print(f"Test Loss: {test_loss:.4f}")

    plt = plot_confusion_matrix(test_labels, test_preds, emotion_labels)
    plt.savefig(f'{model_name}_final_confusion_matrix.png')
    plt.close()

test_best_model(enhanced_model, "enhanced_model", test_dl, criterion, device, emotion_labels)
test_best_model(advanced_model, "advanced_model", test_dl, criterion, device, emotion_labels)

Training Enhanced Model...


Epoch 1/40:
Train Loss: 1.9762, Train Acc: 15.28%
Val Loss: 1.9297, Val Acc: 17.83%
Epoch 2/40:
Train Loss: 1.9408, Train Acc: 16.69%
Val Loss: 1.9293, Val Acc: 18.29%
Epoch 3/40:
Train Loss: 1.9339, Train Acc: 17.47%
Val Loss: 1.9086, Val Acc: 20.10%
Epoch 4/40:
Train Loss: 1.9276, Train Acc: 18.14%
Val Loss: 1.9099, Val Acc: 21.66%
Epoch 5/40:
Train Loss: 1.9237, Train Acc: 18.58%
Val Loss: 1.8995, Val Acc: 22.19%
Epoch 6/40:
Train Loss: 1.9197, Train Acc: 19.03%
Val Loss: 1.8910, Val Acc: 22.33%
Epoch 7/40:
Train Loss: 1.8859, Train Acc: 20.76%
Val Loss: 1.8019, Val Acc: 25.57%
Epoch 8/40:
Train Loss: 1.8200, Train Acc: 23.68%
Val Loss: 1.7916, Val Acc: 21.66%
Epoch 9/40:
Train Loss: 1.7669, Train Acc: 27.05%
Val Loss: 1.7627, Val Acc: 21.94%
Epoch 10/40:
Train Loss: 1.7065, Train Acc: 31.13%
Val Loss: 1.7377, Val Acc: 26.05%
Epoch 11/40:
Train Loss: 1.6507, Train Acc: 34.20%
Val Loss: 1.6607, Val Acc: 32.29%
Epoch 12/40:
Train Loss: 1.6009, Train Acc: 36.99%
Val Loss: 1.5895, Val A

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_acc,▁▁▁▁▂▂▂▂▃▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████████
train_loss,██████▇▇▆▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▂▂▂▃▂▂▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇██▇████████████
val_loss,██████▇▇▆▆▅▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
train_acc,55.66654
train_loss,1.14552
val_acc,51.96796
val_loss,1.2753



Training Advanced Model...


Epoch 1/40:
Train Loss: 1.9408, Train Acc: 17.95%
Val Loss: 2.0065, Val Acc: 18.56%
Epoch 2/40:
Train Loss: 1.8872, Train Acc: 21.50%
Val Loss: 1.8146, Val Acc: 28.53%
Epoch 3/40:
Train Loss: 1.7347, Train Acc: 30.70%
Val Loss: 1.6721, Val Acc: 31.28%
Epoch 4/40:
Train Loss: 1.5566, Train Acc: 39.68%
Val Loss: 1.4255, Val Acc: 44.55%
Epoch 5/40:
Train Loss: 1.3771, Train Acc: 46.98%
Val Loss: 1.3778, Val Acc: 46.08%
Epoch 6/40:
Train Loss: 1.2568, Train Acc: 51.26%
Val Loss: 1.3271, Val Acc: 48.21%
Epoch 7/40:
Train Loss: 1.1676, Train Acc: 54.79%
Val Loss: 1.3065, Val Acc: 50.75%
Epoch 8/40:
Train Loss: 1.1079, Train Acc: 57.14%
Val Loss: 1.2886, Val Acc: 51.48%
Epoch 9/40:
Train Loss: 1.0582, Train Acc: 58.98%
Val Loss: 1.2317, Val Acc: 53.74%
Epoch 10/40:
Train Loss: 1.0194, Train Acc: 60.69%
Val Loss: 1.2002, Val Acc: 54.44%
Epoch 11/40:
Train Loss: 0.9738, Train Acc: 62.29%
Val Loss: 1.2060, Val Acc: 54.27%
Epoch 12/40:
Train Loss: 0.9341, Train Acc: 64.12%
Val Loss: 1.1787, Val A

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_acc,▁▁▂▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇█████████████
train_loss,██▇▆▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▃▃▅▆▆▇▇▇▇▇▇██▇█████████████████████████
val_loss,█▆▅▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▅▆▆

0,1
epoch,39.0
train_acc,92.86924
train_loss,0.21418
val_acc,59.07349
val_loss,1.78556



Final Results:
Enhanced Model Best Validation Accuracy: 52.14%
Advanced Model Best Validation Accuracy: 59.07%

enhanced_model Test Results:
Test Accuracy: 51.81%
Test Loss: 1.2384

advanced_model Test Results:
Test Accuracy: 58.51%
Test Loss: 1.7401


In [21]:
import numpy as np
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader

def targeted_oversampling(df, target_count):
    """
    Upsample each emotion class:
      – if emotion in ['angry', 'disgust', 'fear', 'neutral'], new_target = original_count * 2
      – otherwise, new_target = target_count
    Adds 'needs_augmentation' flag for augmentation during data loading
    """
    double_classes = {'angry', 'disgust', 'fear', 'neutral'}

    parts = [df.copy()]
    parts[0]['needs_augmentation'] = False

    for emo, grp in df.groupby('emotion'):
        n_orig = len(grp)
        if emo in double_classes:
            new_target = n_orig * 2
        else:
            new_target = target_count

        if new_target > n_orig:
            n_extra = new_target - n_orig
            extra = grp.sample(n=n_extra, replace=True).copy()
            extra['needs_augmentation'] = True
            parts.append(extra)

    return pd.concat(parts, ignore_index=True)

class FERDataset(Dataset):
    def __init__(self, dataframe, image_size=(48, 48)):
        self.image_paths = dataframe['image_path'].values
        self.emotion_labels = dataframe['emotion'].values
        self.needs_augmentation = dataframe['needs_augmentation'].values if 'needs_augmentation' in dataframe.columns else np.zeros(len(dataframe))
        self.image_size = image_size

    def __len__(self):
        return len(self.emotion_labels)

    def augment_image(self, image):
        """Apply random intensity shift to image"""
        shift = np.random.randint(-10, 11)
        img_array = np.array(image)
        img_array = np.clip(img_array.astype(int) + shift, 0, 255).astype(np.uint8)
        return Image.fromarray(img_array)

    def __getitem__(self, idx):
        # Load image
        img = Image.open(self.image_paths[idx]).convert('L')  # Convert to grayscale
        img = img.resize(self.image_size)

        # Apply augmentation if needed
        if self.needs_augmentation[idx]:
            img = self.augment_image(img)

        # Convert to tensor
        img_array = np.array(img, dtype=np.float32) / 255.0
        img_tensor = torch.from_numpy(img_array).unsqueeze(0)  # Add channel dimension

        return img_tensor, self.emotion_labels[idx]

max_count = train_df['emotion'].value_counts().max()
balanced_train = targeted_oversampling(train_df, target_count=max_count)

train_ds = FERDataset(balanced_train)
val_ds = FERDataset(val_df)
test_ds = FERDataset(test_df)

batch_size = 256
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2)
val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=2)
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=2)

# Class names
class_names = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]

# Check distribution
balanced_train['emotion'].value_counts()

Unnamed: 0_level_0,count
emotion,Unnamed: 1_level_1
neutral,8938
fear,7374
angry,7192
sad,6493
happy,6493
surprise,6493
disgust,784


In [23]:
import torch
from torch import nn
import wandb
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

enhanced_model = EnhancedNeuralNet().to(device)
advanced_model = AdvancedNetwork().to(device)

batch_size = 256
learning_rate = 0.001
num_epochs = 40
emotion_labels = ["Sad", "Fear", "Angry", "Neutral", "Happy", "Surprise", "Disgust"]

criterion = nn.CrossEntropyLoss()
optimizer_enhanced = torch.optim.Adam(enhanced_model.parameters(), lr=learning_rate)
optimizer_advanced = torch.optim.Adam(advanced_model.parameters(), lr=learning_rate)

def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    val_loss = running_loss / len(dataloader)
    val_acc = 100 * correct / total
    return val_loss, val_acc, all_preds, all_labels

def plot_confusion_matrix(true_labels, predictions, class_names):
    cm = confusion_matrix(true_labels, predictions)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names,
                yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    return plt

def train_and_evaluate(model, train_dl, val_dl, criterion, optimizer, device,
                      num_epochs, model_name, emotion_labels):
    wandb.init(project="emotion_recognition", name=model_name)

    best_val_acc = 0.0

    for epoch in range(num_epochs):
        train_loss, train_acc = train_epoch(model, train_dl, criterion, optimizer, device)

        val_loss, val_acc, val_preds, val_labels = validate(model, val_dl, criterion, device)

        wandb.log({
            "epoch": epoch,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "val_loss": val_loss,
            "val_acc": val_acc
        })

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), f'best_{model_name}.pth')

        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

        if (epoch + 1) % 5 == 0:
            plt = plot_confusion_matrix(val_labels, val_preds, emotion_labels)
            wandb.log({f"confusion_matrix_epoch_{epoch+1}": wandb.Image(plt)})
            plt.close()

    return best_val_acc

train_dl, val_dl, test_dl = create_data_loaders(train_df, val_df, test_df, batch_size=batch_size)

print("Training Enhanced Model...")
enhanced_acc = train_and_evaluate(
    enhanced_model, train_dl, val_dl, criterion, optimizer_enhanced,
    device, num_epochs, "enhanced_model", emotion_labels
)
wandb.finish()

print("\nTraining Advanced Model...")
advanced_acc = train_and_evaluate(
    advanced_model, train_dl, val_dl, criterion, optimizer_advanced,
    device, num_epochs, "advanced_model", emotion_labels
)
wandb.finish()

print("\nFinal Results:")
print(f"Enhanced Model Best Validation Accuracy: {enhanced_acc:.2f}%")
print(f"Advanced Model Best Validation Accuracy: {advanced_acc:.2f}%")

def test_best_model(model, model_name, test_dl, criterion, device, emotion_labels):
    model.load_state_dict(torch.load(f'best_{model_name}.pth'))
    test_loss, test_acc, test_preds, test_labels = validate(model, test_dl, criterion, device)

    print(f"\n{model_name} Test Results:")
    print(f"Test Accuracy: {test_acc:.2f}%")
    print(f"Test Loss: {test_loss:.4f}")

    plt = plot_confusion_matrix(test_labels, test_preds, emotion_labels)
    plt.savefig(f'{model_name}_final_confusion_matrix.png')
    plt.close()

test_best_model(enhanced_model, "enhanced_model", test_dl, criterion, device, emotion_labels)
test_best_model(advanced_model, "advanced_model", test_dl, criterion, device, emotion_labels)

Training Enhanced Model...


0,1
epoch,▁▂▂▃▃▄▄▅▅▆▆▇▇█
train_acc,▁▁▂▃▅▅▆▆▇▇▇███
train_loss,██▇▆▅▄▃▃▂▂▂▂▁▁
val_acc,▁▁▁▄▅▅▆▇▇▇▇███
val_loss,██▇▅▃▃▂▂▂▁▁▁▁▁

0,1
epoch,13.0
train_acc,67.47926
train_loss,0.85993
val_acc,59.21282
val_loss,1.15746


Epoch 1/40:
Train Loss: 1.9800, Train Acc: 15.13%
Val Loss: 1.9171, Val Acc: 17.80%
Epoch 2/40:
Train Loss: 1.9427, Train Acc: 16.31%
Val Loss: 1.9147, Val Acc: 18.22%
Epoch 3/40:
Train Loss: 1.9344, Train Acc: 17.44%
Val Loss: 1.9162, Val Acc: 19.33%
Epoch 4/40:
Train Loss: 1.9283, Train Acc: 18.01%
Val Loss: 1.9062, Val Acc: 20.06%
Epoch 5/40:
Train Loss: 1.9245, Train Acc: 18.45%
Val Loss: 1.9056, Val Acc: 19.85%
Epoch 6/40:
Train Loss: 1.9218, Train Acc: 18.70%
Val Loss: 1.9052, Val Acc: 22.88%
Epoch 7/40:
Train Loss: 1.9144, Train Acc: 19.26%
Val Loss: 1.8748, Val Acc: 22.95%
Epoch 8/40:
Train Loss: 1.8572, Train Acc: 21.86%
Val Loss: 1.7982, Val Acc: 23.86%
Epoch 9/40:
Train Loss: 1.8005, Train Acc: 24.57%
Val Loss: 1.7873, Val Acc: 16.27%
Epoch 10/40:
Train Loss: 1.7499, Train Acc: 28.33%
Val Loss: 1.7875, Val Acc: 21.56%
Epoch 11/40:
Train Loss: 1.7023, Train Acc: 31.04%
Val Loss: 1.7301, Val Acc: 25.95%
Epoch 12/40:
Train Loss: 1.6532, Train Acc: 33.66%
Val Loss: 1.7557, Val A

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_acc,▁▁▁▁▂▂▂▂▃▃▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇███████████
train_loss,██████▇▇▆▆▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▂▂▂▂▂▃▁▂▃▄▅▅▆▆▇▇▇▇▇▇▇▇▇███████████████
val_loss,███████▇▇▇▆▆▅▄▄▄▃▃▃▂▂▂▂▂▂▁▂▂▁▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
train_acc,55.53013
train_loss,1.14387
val_acc,50.74887
val_loss,1.27618



Training Advanced Model...


Epoch 1/40:
Train Loss: 1.9470, Train Acc: 17.74%
Val Loss: 1.9011, Val Acc: 21.21%
Epoch 2/40:
Train Loss: 1.8945, Train Acc: 21.23%
Val Loss: 1.8781, Val Acc: 16.27%
Epoch 3/40:
Train Loss: 1.7639, Train Acc: 29.01%
Val Loss: 1.7154, Val Acc: 31.07%
Epoch 4/40:
Train Loss: 1.5704, Train Acc: 39.13%
Val Loss: 1.5972, Val Acc: 38.17%
Epoch 5/40:
Train Loss: 1.3946, Train Acc: 46.37%
Val Loss: 1.5822, Val Acc: 40.54%
Epoch 6/40:
Train Loss: 1.2778, Train Acc: 50.63%
Val Loss: 1.4192, Val Acc: 46.88%
Epoch 7/40:
Train Loss: 1.1899, Train Acc: 53.94%
Val Loss: 1.3531, Val Acc: 48.59%
Epoch 8/40:
Train Loss: 1.1215, Train Acc: 56.49%
Val Loss: 1.3158, Val Acc: 49.74%
Epoch 9/40:
Train Loss: 1.0735, Train Acc: 58.56%
Val Loss: 1.2200, Val Acc: 53.54%
Epoch 10/40:
Train Loss: 1.0191, Train Acc: 60.58%
Val Loss: 1.1930, Val Acc: 56.08%
Epoch 11/40:
Train Loss: 0.9832, Train Acc: 62.40%
Val Loss: 1.2062, Val Acc: 55.42%
Epoch 12/40:
Train Loss: 0.9438, Train Acc: 64.12%
Val Loss: 1.2051, Val A

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_acc,▁▁▂▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇████████████
train_loss,██▇▆▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_acc,▂▁▃▄▅▆▆▆▇▇▇▇█▇█▇████████████████████████
val_loss,██▆▅▅▃▃▂▁▁▁▁▁▁▁▂▁▁▁▁▂▂▃▃▃▃▃▃▃▄▄▅▅▅▅▇▆▆▆▆

0,1
epoch,39.0
train_acc,92.60082
train_loss,0.21945
val_acc,58.48137
val_loss,1.72488



Final Results:
Enhanced Model Best Validation Accuracy: 50.75%
Advanced Model Best Validation Accuracy: 60.15%

enhanced_model Test Results:
Test Accuracy: 52.08%
Test Loss: 1.2408

advanced_model Test Results:
Test Accuracy: 59.18%
Test Loss: 1.2179


In [24]:
import torch
from torch import nn

class SqueezeExcitation(nn.Module):
    def __init__(self, channels, reduction=16):
        super(SqueezeExcitation, self).__init__()
        self.fc1 = nn.Linear(channels, channels // reduction, bias=False)
        self.fc2 = nn.Linear(channels // reduction, channels, bias=False)

    def forward(self, x):
        b, c, _, _ = x.size()
        y = x.mean(dim=[2, 3])  # Global Average Pooling
        y = self.fc1(y).relu()
        y = self.fc2(y).sigmoid()
        return x * y.view(b, c, 1, 1)

class EmotionRecognitionNet(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super().__init__()
        self.feature_layers = nn.ModuleList([
            self._create_conv_block(1, 32),
            self._create_conv_block(32, 64),
            self._create_conv_block(64, 128),
            self._create_conv_block(128, 256)
        ])

        self.processing_layers = nn.ModuleList([
            self._create_conv_block(256, 512, pool=False),
            self._create_conv_block(512, 512, pool=False)
        ])

        self.se_block = SqueezeExcitation(512)

        self.classifier = nn.Sequential(
            nn.Flatten(),
            self._create_fc_block(512 * 3 * 3, 512, dropout_rate),
            self._create_fc_block(512, 256, dropout_rate),
            self._create_fc_block(256, 128, dropout_rate),
            self._create_fc_block(128, 64, dropout_rate),
            nn.Linear(64, 7)
        )

    def _create_conv_block(self, in_channels, out_channels, pool=True):
        layers = [
            nn.Conv2d(in_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Dropout2d(0.5)
        ]
        if pool:
            layers.append(nn.MaxPool2d(2, 2))
        return nn.Sequential(*layers)

    def _create_fc_block(self, in_features, out_features, dropout_rate):
        return nn.Sequential(
            nn.Linear(in_features, out_features, bias=False),
            nn.BatchNorm1d(out_features),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate)
        )

    def forward(self, x):
        for layer in self.feature_layers:
            x = layer(x)

        for layer in self.processing_layers:
            x = layer(x)

        x = self.se_block(x)

        x = self.classifier(x)
        return x

In [25]:
import torch
from torch import nn
import wandb
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

emotion_model = EmotionRecognitionNet().to(device)

batch_size = 256
learning_rate = 0.001
num_epochs = 40
emotion_labels = ["Sad", "Fear", "Angry", "Neutral", "Happy", "Surprise", "Disgust"]

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(emotion_model.parameters(), lr=learning_rate)

def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    val_loss = running_loss / len(dataloader)
    val_acc = 100 * correct / total
    return val_loss, val_acc, all_preds, all_labels

def plot_confusion_matrix(true_labels, predictions, class_names):
    cm = confusion_matrix(true_labels, predictions)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names,
                yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    return plt

def train_and_evaluate(model, train_dl, val_dl, criterion, optimizer, device,
                      num_epochs, model_name, emotion_labels):
    wandb.init(project="emotion_recognition", name=model_name)

    best_val_acc = 0.0

    for epoch in range(num_epochs):
        train_loss, train_acc = train_epoch(model, train_dl, criterion, optimizer, device)

        val_loss, val_acc, val_preds, val_labels = validate(model, val_dl, criterion, device)

        wandb.log({
            "epoch": epoch,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "val_loss": val_loss,
            "val_acc": val_acc
        })

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), f'best_{model_name}.pth')

        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

        if (epoch + 1) % 5 == 0:
            plt = plot_confusion_matrix(val_labels, val_preds, emotion_labels)
            wandb.log({f"confusion_matrix_epoch_{epoch+1}": wandb.Image(plt)})
            plt.close()

    return best_val_acc

train_dl, val_dl, test_dl = create_data_loaders(train_df, val_df, test_df, batch_size=batch_size)

print("Training Emotion Recognition Model...")
best_accuracy = train_and_evaluate(
    emotion_model, train_dl, val_dl, criterion, optimizer,
    device, num_epochs, "emotion_recognition_model", emotion_labels
)
wandb.finish()

def test_best_model(model, model_name, test_dl, criterion, device, emotion_labels):
    model.load_state_dict(torch.load(f'best_{model_name}.pth'))
    test_loss, test_acc, test_preds, test_labels = validate(model, test_dl, criterion, device)

    print(f"\n{model_name} Test Results:")
    print(f"Test Accuracy: {test_acc:.2f}%")
    print(f"Test Loss: {test_loss:.4f}")

    plt = plot_confusion_matrix(test_labels, test_preds, emotion_labels)
    plt.savefig(f'{model_name}_final_confusion_matrix.png')
    plt.close()

test_best_model(emotion_model, "emotion_recognition_model", test_dl, criterion, device, emotion_labels)

Training Emotion Recognition Model...


Epoch 1/40:
Train Loss: 1.9780, Train Acc: 15.11%
Val Loss: 1.9383, Val Acc: 17.07%
Epoch 2/40:
Train Loss: 1.9411, Train Acc: 16.09%
Val Loss: 1.9206, Val Acc: 18.15%
Epoch 3/40:
Train Loss: 1.9308, Train Acc: 17.56%
Val Loss: 1.9121, Val Acc: 18.95%
Epoch 4/40:
Train Loss: 1.9288, Train Acc: 17.99%
Val Loss: 1.9082, Val Acc: 19.40%
Epoch 5/40:
Train Loss: 1.9227, Train Acc: 18.73%
Val Loss: 1.8943, Val Acc: 22.81%
Epoch 6/40:
Train Loss: 1.9147, Train Acc: 19.24%
Val Loss: 1.8733, Val Acc: 23.75%
Epoch 7/40:
Train Loss: 1.8558, Train Acc: 22.42%
Val Loss: 1.8129, Val Acc: 14.42%
Epoch 8/40:
Train Loss: 1.7710, Train Acc: 27.88%
Val Loss: 1.7838, Val Acc: 21.84%
Epoch 9/40:
Train Loss: 1.7029, Train Acc: 31.64%
Val Loss: 1.7203, Val Acc: 25.71%
Epoch 10/40:
Train Loss: 1.6578, Train Acc: 33.93%
Val Loss: 1.7251, Val Acc: 26.85%
Epoch 11/40:
Train Loss: 1.6097, Train Acc: 36.43%
Val Loss: 1.6502, Val Acc: 34.38%
Epoch 12/40:
Train Loss: 1.5665, Train Acc: 38.65%
Val Loss: 1.6437, Val A

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_acc,▁▁▁▁▂▂▂▃▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████████
train_loss,█████▇▇▆▆▅▅▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▂▂▂▃▃▁▂▃▃▅▅▆▆▆▇▇▇▇▇▇▇▇▇████████████████
val_loss,█████▇▇▆▆▆▅▅▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
train_acc,55.82055
train_loss,1.13865
val_acc,51.20167
val_loss,1.2926



emotion_recognition_model Test Results:
Test Accuracy: 52.16%
Test Loss: 1.2349


In [26]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix

def evaluate_on_test_set(model, model_name, test_dl, criterion, device, emotion_labels, save_csv=False):
    model.load_state_dict(torch.load(f'best_{model_name}.pth'))
    model.eval()

    all_preds = []
    all_targets = []

    with torch.no_grad():
        for inputs, labels in test_dl:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            preds = outputs.argmax(dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(labels.cpu().numpy())

    print(f"\nEvaluation Report for {model_name}")
    print(classification_report(all_targets, all_preds, target_names=emotion_labels))

    cm = confusion_matrix(all_targets, all_preds)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=emotion_labels, yticklabels=emotion_labels)
    plt.title(f'{model_name} - Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.savefig(f'{model_name}_test_confusion_matrix.png')
    plt.close()

    if save_csv:
        test_df_copy = test_df.copy().reset_index(drop=True)
        test_df_copy["predicted_label"] = [emotion_labels[p] for p in all_preds]
        test_df_copy.to_csv(f"{model_name}_test_predictions.csv", index=False)
        print(f"Predictions saved to {model_name}_test_predictions.csv")

evaluate_on_test_set(enhanced_model, "enhanced_model", test_dl, criterion, device, emotion_labels, save_csv=True)
evaluate_on_test_set(advanced_model, "advanced_model", test_dl, criterion, device, emotion_labels, save_csv=True)



Evaluation Report for enhanced_model
              precision    recall  f1-score   support

         Sad       0.36      0.40      0.38      1247
        Fear       0.32      0.06      0.11      1024
       Angry       0.41      0.29      0.34       958
     Neutral       0.44      0.64      0.52      1233
       Happy       0.76      0.77      0.76      1774
    Surprise       0.64      0.80      0.71       831
     Disgust       0.32      0.68      0.43       111

    accuracy                           0.52      7178
   macro avg       0.46      0.52      0.46      7178
weighted avg       0.50      0.52      0.49      7178

Predictions saved to enhanced_model_test_predictions.csv

Evaluation Report for advanced_model
              precision    recall  f1-score   support

         Sad       0.43      0.52      0.47      1247
        Fear       0.45      0.32      0.38      1024
       Angry       0.50      0.48      0.49       958
     Neutral       0.52      0.61      0.56      1233