In [None]:
# FOR WINDOWS (your env must be called project-venv; if you choose another name add it in .gitignore)
import subprocess

# Set the execution policy
subprocess.run(["Set-ExecutionPolicy", "RemoteSigned", "-Scope", "Process"], shell=True)

# Activate the virtual environment
subprocess.run(["cd", ".\\project-venv\\"], shell=True)
subprocess.run([".\\Scripts\\Activate.ps1"], shell=True)

# Install requirements
subprocess.run(["pip", "install", "-r", "../requirements.txt"], shell=True)
subprocess.run(["pip", "install", "-r", "../emotions_requirements.txt"], shell=True)

# **Emotion Recognition task**

**import packages**

In [1]:
import cv2
import os
import random
import numpy as np
from scipy.ndimage import gaussian_filter, map_coordinates
import torch
import torch.nn as nn
from sklearn.cluster import DBSCAN
from torch.utils.data import random_split, ConcatDataset
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim import SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from tqdm import tqdm
import dlib
from PIL import Image
import wandb

**wandb login**

In [None]:
wandb.login(key='d29d51017f4231b5149d36ad242526b374c9c60a')

# **Paper implementation 1**
https://ieeexplore.ieee.org/abstract/document/9659697?casa_token=zDD7lwwOig8AAAAA:KcIHhupXAXgiaB_C7A0uNDB7ehrsWNyovQdgDu9LmnwToOGU6akB_gjWTy7JCf4UdKK03Is

https://github.com/serengil/deepface

### **Dataset Augmenting**

#### **Objective:**
The purpose of this Python script is to perform data augmentation on a dataset of images corresponding to different emotions. Data augmentation is a technique commonly used in computer vision tasks to increase the diversity of the training dataset, thereby enhancing the robustness and generalization capabilities of machine learning models.

#### **Dataset Structure:**
The original dataset is organized into folders, each representing a specific emotion (e.g., angry, disgust, fear, happy, neutral, sad, surprise). Each emotion folder contains a collection of images in formats such as JPEG and PNG.

#### **Transformation Techniques:**
The script employs various image transformation techniques to augment the dataset. These techniques include:

1. **Horizontal Flip:** Flips the image horizontally.
2. **Vertical Flip:** Flips the image vertically.
3. **Zoom:** Randomly zooms into or out of the image.
4. **Translation:** Shifts the image horizontally and vertically.
5. **Contrast and Brightness Adjustment:** Randomly adjusts the contrast and brightness of the image.
6. **Elastic Transformation:** Applies a non-linear elastic deformation to the image.

#### **Implementation:**
The script uses the OpenCV library for image processing. For each emotion category, it iterates through the images, applies the defined transformations using the `apply_transformations` function, and saves the augmented images in a new folder named with the emotion followed by "_augmented."

The `elastic_transform` function generates an elastic transformation by displacing pixels based on random elastic deformations. This adds a degree of distortion to the images, contributing to further variability.

#### **Output:**
The augmented images are saved in the corresponding emotion's augmented folder, with filenames indicating the applied augmentation technique. For example, an image originally named "example_image.jpg" may result in augmented images like "example_image_aug_0.jpg," "example_image_aug_1.jpg," etc.

In [18]:
emotions = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

# cycle through emotions
for emotion in emotions:
    # path of the folder containing the images
    folder_path = fr"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_images\{emotion}"
    output_folder_path = fr"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_images\{emotion}_augmented"


    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    # list of images in the folder
    image_files = [f for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))]

    # define transformations inside the apply_transformations function
    def apply_transformations(image):
        # horizontal_flip
        flipped_horizontal = cv2.flip(image, 1)

        # vertical flip
        flipped_vertical = cv2.flip(image, 0)

        # Zoom
        zoom_factor = random.uniform(0.8, 1.2)
        height, width = image.shape[:2]
        zoomed_image = cv2.resize(image, (int(width * zoom_factor), int(height * zoom_factor)))

        # translation
        tx = random.randint(-10, 10)
        ty = random.randint(-10, 10)
        translation_matrix = np.float32([[1, 0, tx], [0, 1, ty]])
        translated_image = cv2.warpAffine(image, translation_matrix, (width, height))

        # contrast and brightness control
        alpha = random.uniform(0.8, 1.2)
        beta = random.randint(-35, 35)
        adjusted_image = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)

        # elastic transformation
        elastic_image = elastic_transform(image, alpha=random.randint(6, 14), sigma=random.uniform(1.1, 2.0))

        return [image, translated_image, flipped_horizontal, zoomed_image, adjusted_image, elastic_image] #forse togliere flipped vertical

    def elastic_transform(image, alpha, sigma):
        random_state = np.random.RandomState(None)
        shape = image.shape
        dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha
        dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha
        dz = np.zeros_like(dx)

        x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2]))
        indices = np.reshape(y + dy, (-1, 1)), np.reshape(x + dx, (-1, 1)), np.reshape(z + dz, (-1, 1))

        distorted_image = map_coordinates(image, indices, order=1, mode='reflect')
        distorted_image = distorted_image.reshape(image.shape)

        return distorted_image

    # apply data augmentation
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        image = cv2.imread(image_path)

        augmented_images = apply_transformations(image)

        # save new images
        base_name = os.path.splitext(image_file)[0]
        for i, augmented_image in enumerate(augmented_images):
            output_file_path = os.path.join(output_folder_path, f"{base_name}_aug_{i}.jpg")
            cv2.imwrite(output_file_path, augmented_image)

### **CNN Architecture: EmotionCNN**

#### **Overview:**
The `EmotionCNN` model is designed for emotion classification using a Convolutional Neural Network (CNN). Below is an overview of its architecture:

#### **Architecture Layers:**

1. **Input Layer:**
   - Input channels: 1 (grayscale images)
   - Input size: Variable (depends on the input image size)

2. **Convolutional Layer 1:**
   - Convolutional operation: 64 filters of size 5x5
   - Activation function: ReLU
   - Max pooling: 5x5 kernel with stride 2

3. **Convolutional Layers 2a and 2b:**
   - Convolutional operation: 64 filters of size 3x3
   - Activation functions: ReLU for both layers
   - Average pooling: 3x3 kernel with stride 2

4. **Convolutional Layers 3a and 3b:**
   - Convolutional operation: 128 filters of size 3x3
   - Activation functions: ReLU for both layers
   - Average pooling: 3x3 kernel with stride 2

5. **Fully Connected (FC) Layer 1:**
   - Units: 1024
   - Activation function: ReLU
   - Dropout: 20% dropout rate

6. **Fully Connected (FC) Layer 2:**
   - Units: 1024
   - Activation function: ReLU
   - Dropout: 20% dropout rate

7. **Output Layer:**
   - Units: Variable (depends on the number of emotion classes)
   - Activation function: Softmax

#### **Additional Information:**
- The architecture is designed for grayscale images (1 channel).
- The network utilizes ReLU activation functions after convolutional and fully connected layers.
- Max pooling and average pooling layers are employed for down-sampling.
- Dropout layers (20% dropout rate) are included for regularization.
- The final layer uses the softmax activation for multiclass classification.

In [2]:
class EmotionCNN(nn.Module):
    def __init__(self, num_classes=7):
        super(EmotionCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 64, kernel_size=5, stride=1, padding=0)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=5, stride=2)
        
        self.conv2a = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.relu2a = nn.ReLU()
        self.conv2b = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.relu2b = nn.ReLU()
        self.avgpool2 = nn.AvgPool2d(kernel_size=3, stride=2)
        
        self.conv3a = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu3a = nn.ReLU()
        self.conv3b = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.relu3b = nn.ReLU()
        self.avgpool3 = nn.AvgPool2d(kernel_size=3, stride=2)
        
        # verify the output size of conv2 and conv3
        self.dummy_input = torch.randn(1, 1, 48, 48)
        self.dummy_output_size = self._get_conv_output_size(self.dummy_input)
        
        # update fc1 units based on feature map size
        self.fc1 = nn.Linear(self.dummy_output_size, 1024)
        self.relu_fc1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)
        
        self.fc2 = nn.Linear(1024, 1024)
        self.relu_fc2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.2)
        
        self.fc3 = nn.Linear(1024, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def _get_conv_output_size(self, input_tensor):
        x = self.maxpool1(self.relu1(self.conv1(input_tensor)))
        x = self.relu2a(self.conv2a(x))
        x = self.relu2b(self.conv2b(x))
        x = self.avgpool2(x)
        x = self.relu3a(self.conv3a(x))
        x = self.relu3b(self.conv3b(x))
        x = self.avgpool3(x)
        return x.view(x.size(0), -1).size(1)

    def forward(self, x):
        x = self.maxpool1(self.relu1(self.conv1(x)))
        x = self.relu2a(self.conv2a(x))
        x = self.relu2b(self.conv2b(x))
        x = self.avgpool2(x)
        x = self.relu3a(self.conv3a(x))
        x = self.relu3b(self.conv3b(x))
        x = self.avgpool3(x)
        x = x.view(x.size(0), -1)
        x = self.dropout1(self.relu_fc1(self.fc1(x)))
        x = self.dropout2(self.relu_fc2(self.fc2(x)))
        x = self.softmax(self.fc3(x))
        return x

### **Hyperparameters**

The parameters that can be modified before performing training are the number of instances per label, the batch size and the number of epochs

In [3]:
number_instances_over_under_sampling_ = 30000
batch_size_ = 58
epochs_ = 50

### **Delete Outliers with DBSCAN**

#### **Objective:**
The Python script is designed to identify and remove outliers from a collection of images using the Density-Based Spatial Clustering of Applications with Noise (DBSCAN) algorithm. The primary goal is to filter out images with unusual pixel statistics, specifically focusing on standard deviation.

#### **Functions:**

1. **`calculate_pixel_std(image_path):`**
   - **Input:** Path to an image file.
   - **Output:** Returns the standard deviation of pixel values in the grayscale image.

2. **`remove_outliers_dbscan(folder_path, eps, min_samples):`**
   - **Input:**
     - `folder_path`: Path to the folder containing images.
     - `eps`: DBSCAN search radius parameter.
     - `min_samples`: Minimum number of samples required for a cluster in DBSCAN.
   - **Action:**
     - Calculates the pixel standard deviation for each image in the folder.
     - Applies DBSCAN to cluster images based on pixel standard deviation.
     - Removes outliers (images with label -1) by deleting them from the folder.
   - **Output:** Prints the number of outliers removed.

#### **Usage:**

1. Set the DBSCAN configuration parameters:
   - `dbscan_eps`: DBSCAN search radius.
   - `dbscan_min_samples`: Minimum number of samples required for a cluster.

2. Provide the path to the folder containing emotion-specific image folders (`emotions_folder_path`).

3. Iterate through each emotion folder:
   - If the emotion is 'disgust,' use different DBSCAN parameters.
   - Apply `remove_outliers_dbscan` to delete outliers in each emotion folder.

4. The script prints the number of outliers removed for each emotion.

In [None]:
def calculate_pixel_std(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    return np.std(image)

def remove_outliers_dbscan(folder_path, eps, min_samples):
    images = []
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        pixel_std = calculate_pixel_std(image_path)
        images.append([pixel_std])

    images = np.array(images)

    # dbscan to identify outliers
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    labels = dbscan.fit_predict(images)

    # remove outliers
    counter = 0
    for i, (label, image) in enumerate(zip(labels, os.listdir(folder_path))):
        if label == -1:  
            image_path = os.path.join(folder_path, image)
            os.remove(image_path)
            counter += 1
    print(counter)

# DBSCAN configuration
dbscan_eps = 0.4  # search radius
dbscan_min_samples = 15  # minimum number of samples required for a cluster

emotions_folder_path = r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images"


for emotion in os.listdir(emotions_folder_path):
    emotion_folder_path = os.path.join(emotions_folder_path, emotion)
    print(emotion)
    
    if emotion == 'disgust':
        tmp_folder_path = emotion_folder_path 
        remove_outliers_dbscan(tmp_folder_path, 0.5, 10)
    else:
        remove_outliers_dbscan(emotion_folder_path, dbscan_eps, dbscan_min_samples)


Print the path and the number of the outliers per class

In [3]:
def calculate_pixel_std(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    return np.std(image)

def get_outliers_dbscan(folder_path, eps, min_samples):
    images = []
    image_paths = []

    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        pixel_std = calculate_pixel_std(image_path)
        images.append([pixel_std])
        image_paths.append(image_path)

    images = np.array(images)

    # dbscan to identify outliers
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    labels = dbscan.fit_predict(images)

    # collect outlier paths and count for each label
    outlier_paths_by_label = {}
    for label, image_path in zip(labels, image_paths):
        if label == -1:
            if label not in outlier_paths_by_label:
                outlier_paths_by_label[label] = []
            outlier_paths_by_label[label].append(image_path)

    return outlier_paths_by_label

# Configurazione DBSCAN
dbscan_eps = 0.5  # Raggio di ricerca
dbscan_min_samples = 10  # Numero minimo di campioni in un cluster

emotions_folder_path = r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images"

for emotion in os.listdir(emotions_folder_path):
    emotion_folder_path = os.path.join(emotions_folder_path, emotion)
    print(emotion)

    if emotion == 'disgust':
        tmp_folder_path = emotion_folder_path 
        outliers = get_outliers_dbscan(tmp_folder_path, 0.5, 10)
    else:
        outliers = get_outliers_dbscan(emotion_folder_path, dbscan_eps, dbscan_min_samples)

    # Stampa i percorsi degli outliers per ogni label
    for label, outlier_paths in outliers.items():
        print(f"Label {label}: {len(outlier_paths)} outliers")
        for path in outlier_paths:
            print(path)


angry
Label -1: 44 outliers
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_24139016_aug_1.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_27212219_aug_1.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_33469617_aug_5.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_51610150_aug_1.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_66705645_aug_1.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_66705645_aug_5.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_67702983_aug_1.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Da

### **Over and Under sampling: Class balance**
We apply the transformations on the image dataset: grayscale, resize, conversion to tensor and normalization . Weighted random samplers handle class imbalances.

In [4]:
# transformation definition
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset_root = r'C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images'

# create an instance of ImageFolder with the transformations
dataset = ImageFolder(root=dataset_root, transform=transform)

# seed = 42
torch.manual_seed(42)

# extract the labels and the indices of the dataset
labels = [label for _, label in dataset.imgs]

# convert the list into a tensor
labels = torch.tensor(labels)

# calculate the number of instances for each class
counts = torch.bincount(labels)

# calculate the weights for each class
weights = 1.0 / counts.float()

# create a weight vector for each index in the dataset
sample_weights = weights[labels]

# set the number of samples for the train set and the test set
train_size = number_instances_over_under_sampling_ * 7 * 0.8
val_size = number_instances_over_under_sampling_ * 7 * 0.1
test_size = number_instances_over_under_sampling_ * 7 * 0.1

# crea un sampler per il train set and one for the test set
train_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(train_size))
val_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(val_size))
test_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(test_size))

# create a dataloader for the train set and the test set with the corresponding samplers
train_loader = DataLoader(dataset, batch_size=batch_size_, sampler=train_sampler, num_workers=4)
val_loader = DataLoader(dataset, batch_size=batch_size_, sampler=val_sampler, num_workers=4)
test_loader = DataLoader(dataset, batch_size=batch_size_, sampler=test_sampler, num_workers=4)


verifying if for each label there are number_instances_over_under_sampling instances

In [13]:
train_indices = list(train_loader.sampler)
train_counts = torch.bincount(torch.tensor(labels)[train_indices])
print("number of instances for class in the train set:", train_counts)

val_indices = list(val_loader.sampler)
val_counts = torch.bincount(torch.tensor(labels)[val_indices])
print("number of instances for class in the validation set:", val_counts)

test_indices = list(test_loader.sampler)
test_counts = torch.bincount(torch.tensor(labels)[test_indices])
print("number of instances for class in the test set:", test_counts)

number of instances for class in the train set: tensor([16164, 15870, 16006, 15886, 15980, 15859, 16235])
number of instances for class in the validation set: tensor([2017, 2013, 1980, 2019, 1985, 1996, 1990])
number of instances for class in the test set: tensor([2029, 2039, 1972, 1939, 2043, 2019, 1959])


  train_counts = torch.bincount(torch.tensor(labels)[train_indices])
  val_counts = torch.bincount(torch.tensor(labels)[val_indices])
  test_counts = torch.bincount(torch.tensor(labels)[test_indices])


In [14]:
len(train_loader), len(val_loader), len(test_loader)

(3500, 438, 438)

At the beginning these are the number of instances for each class

In [15]:
# obtain the classes (labels)
classes = dataset.classes

# count the instances for each class
instances_per_class = {cls: 0 for cls in classes}

for _, label in dataset.imgs:
    instances_per_class[classes[label]] += 1

# print("Number of instances per class:")
for cls, count in instances_per_class.items():
    print(f"Class {cls}: {count} istances")

Class angry_augmented: 29680 istances
Class disgust_augmented: 3216 istances
Class fear_augmented: 30675 istances
Class happy_augmented: 53867 istances
Class neutral_augmented: 37132 istances
Class sad_augmented: 36418 istances
Class surprise_augmented: 23981 istances


### **Emotion Recognition Training and Evaluation**

This code performs emotion recognition using a Convolutional Neural Network (CNN). Key components include:

- **Emotion Label Mapping**: Emotions are mapped to numerical values for training and evaluation.
- **Model Definition and Setup**: A CNN model, along with the criterion, optimizer, and scheduler, is defined.
- **Metrics Calculation**: The `calculate_metrics_per_class` function computes accuracy, precision, recall, F1 score, and support for each class.
- **Training Function**: The `train_epoch` function trains the model for one epoch, logging loss, accuracy, and metrics per class.
- **Evaluation Function**: The `evaluate` function assesses the model on the validation set, calculating loss, accuracy, and metrics per class.
- **Test Function**: The `test` function evaluates the model on the test set, providing loss, accuracy, and metrics per class.
- **Training and Evaluation Process**: The `train_and_evaluate` function orchestrates the entire process. It initializes WandB, sets up data transformations, creates weighted random samplers for handling class imbalances, and conducts training and validation. The best model is saved, and evaluation metrics are logged.
- **Hyperparameter Sweep Configuration**: The code supports hyperparameter optimization using WandB sweeps. Parameters include the number of instances for over/under-sampling, batch size, and epochs.

#### Training Process

The training process involves iterating through epochs, training on the specified dataset, and evaluating on validation data. Early stopping is implemented to prevent overfitting.

#### Logging and Visualization

WandB is integrated for experiment tracking. Metrics such as loss, accuracy, and metrics per class are logged during training and evaluation.

In [17]:
your_label_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# definition of the model, criterion, optimizer and scheduler
net = EmotionCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = SGD(net.parameters(), lr=0.01, momentum=0.9, nesterov=True, weight_decay=0.0001)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.75, patience=5, verbose=True)

def calculate_metrics_per_class(true_labels, predicted_labels, label_mapping):
    unique_labels = list(label_mapping.keys())
    precision, recall, f1, support = precision_recall_fscore_support(true_labels, predicted_labels, labels=unique_labels)
    accuracy = accuracy_score(true_labels, predicted_labels)
    
    metrics_per_class = {}
    for i, idx in enumerate(unique_labels):
        metrics_per_class[idx] = {
            'precision': precision[i],
            'recall': recall[i],
            'f1': f1[i],
            'support': support[i]
        }

    return accuracy, metrics_per_class

# function for training
def train_epoch(model, train_loader, criterion, optimizer, device, label_mapping):
    model.train()
    running_loss = 0.0
    true_labels = []
    predicted_labels = []

    for inputs, labels in tqdm(train_loader, desc='Training', leave=False):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, preds = torch.max(outputs, 1)
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(preds.cpu().numpy())

    average_loss = running_loss / len(train_loader)
    accuracy, metrics_per_class = calculate_metrics_per_class(true_labels, predicted_labels, label_mapping)

    return average_loss, accuracy, metrics_per_class

# function for evaluation
def evaluate(model, val_loader, criterion, device, label_mapping):
    model.eval()
    running_loss = 0.0
    true_labels = []
    predicted_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc='Validation', leave=False):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            predicted_labels.extend(preds.cpu().numpy())

    average_loss = running_loss / len(val_loader)
    accuracy, metrics_per_class = calculate_metrics_per_class(true_labels, predicted_labels, label_mapping)

    return average_loss, accuracy, metrics_per_class

# Settings
num_epochs = epochs_
early_stopping_patience = 3  # numbers of epochs with no improvement after which training will be stopped (early stopping)
best_accuracy = 0.0
best_epoch = 0
no_improvement_count = 0

# Training cycle
for epoch in range(num_epochs):
    # Training
    train_loss, train_accuracy, train_metrics_per_class = train_epoch(net, train_loader, criterion, optimizer, device, your_label_mapping)

    # Validation
    val_loss, val_accuracy, val_metrics_per_class = evaluate(net, val_loader, criterion, device, your_label_mapping)

    # Scheduler step based on validation accuracy
    scheduler.step(val_accuracy)

    # Saving the model if the current accuracy is better than the best
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_epoch = epoch
        torch.save(net.state_dict(), r'C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Models\paper1_models\best_model.pth')
        no_improvement_count = 0
    else:
        no_improvement_count += 1

    # Print epoch statistics
    print(f'Epoch {epoch + 1}/{num_epochs} => '
          f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')

    # Print metrics per class
    for idx, label in your_label_mapping.items():
        print(f'{label}: Train Precision: {train_metrics_per_class[idx]["precision"]:.4f}, Train Recall: {train_metrics_per_class[idx]["recall"]:.4f}, Train F1: {train_metrics_per_class[idx]["f1"]:.4f}, Train Support: {train_metrics_per_class[idx]["support"]}')

    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')
    
    for idx, label in your_label_mapping.items():
        print(f'{label}: Validation Precision: {val_metrics_per_class[idx]["precision"]:.4f}, Validation Recall: {val_metrics_per_class[idx]["recall"]:.4f}, Validation F1: {val_metrics_per_class[idx]["f1"]:.4f}, Validation Support: {val_metrics_per_class[idx]["support"]}')

    if no_improvement_count >= early_stopping_patience:
        print(f'Early stopping at epoch {epoch + 1} as there is no improvement in validation accuracy for {early_stopping_patience} consecutive epochs.')
        break

print(f'Best model achieved at epoch {best_epoch + 1} with accuracy {best_accuracy:.4f}')


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/15 => Train Loss: 1.9459, Train Accuracy: 0.1420
Angry: Train Precision: 0.1567, Train Recall: 0.0013, Train F1: 0.0027, Train Support: 15711
Disgust: Train Precision: 0.1458, Train Recall: 0.1059, Train F1: 0.1227, Train Support: 16118
Fear: Train Precision: 0.1425, Train Recall: 0.0939, Train F1: 0.1132, Train Support: 16068
Happy: Train Precision: 0.1417, Train Recall: 0.1087, Train F1: 0.1230, Train Support: 16028
Neutral: Train Precision: 0.1418, Train Recall: 0.6468, Train F1: 0.2326, Train Support: 16138
Sad: Train Precision: 0.1274, Train Recall: 0.0281, Train F1: 0.0460, Train Support: 15952
Surprise: Train Precision: 0.2635, Train Recall: 0.0024, Train F1: 0.0048, Train Support: 15985
Validation Loss: 1.9457, Validation Accuracy: 0.1688
Angry: Validation Precision: 0.0000, Validation Recall: 0.0000, Validation F1: 0.0000, Validation Support: 2003
Disgust: Validation Precision: 0.1645, Validation Recall: 0.1707, Validation F1: 0.1675, Validation Support: 1980
Fear: Val

                                                             

Epoch 2/15 => Train Loss: 1.9188, Train Accuracy: 0.2017
Angry: Train Precision: 0.1950, Train Recall: 0.1197, Train F1: 0.1484, Train Support: 15929
Disgust: Train Precision: 0.1736, Train Recall: 0.1485, Train F1: 0.1601, Train Support: 15924
Fear: Train Precision: 0.1451, Train Recall: 0.1201, Train F1: 0.1314, Train Support: 16058
Happy: Train Precision: 0.2225, Train Recall: 0.0647, Train F1: 0.1003, Train Support: 15850
Neutral: Train Precision: 0.1861, Train Recall: 0.1319, Train F1: 0.1544, Train Support: 16027
Sad: Train Precision: 0.1897, Train Recall: 0.2882, Train F1: 0.2288, Train Support: 16097
Surprise: Train Precision: 0.2468, Train Recall: 0.5341, Train F1: 0.3376, Train Support: 16115
Validation Loss: 1.8791, Validation Accuracy: 0.2562
Angry: Validation Precision: 0.3750, Validation Recall: 0.0015, Validation F1: 0.0030, Validation Support: 1975
Disgust: Validation Precision: 0.2505, Validation Recall: 0.2746, Validation F1: 0.2620, Validation Support: 1919
Fear: Val

                                                             

Epoch 3/15 => Train Loss: 1.8389, Train Accuracy: 0.3052
Angry: Train Precision: 0.2293, Train Recall: 0.1548, Train F1: 0.1848, Train Support: 15895
Disgust: Train Precision: 0.3283, Train Recall: 0.4308, Train F1: 0.3726, Train Support: 16232
Fear: Train Precision: 0.1714, Train Recall: 0.0354, Train F1: 0.0587, Train Support: 15762
Happy: Train Precision: 0.3243, Train Recall: 0.4243, Train F1: 0.3676, Train Support: 16056
Neutral: Train Precision: 0.2549, Train Recall: 0.2622, Train F1: 0.2585, Train Support: 16064
Sad: Train Precision: 0.2268, Train Recall: 0.2193, Train F1: 0.2230, Train Support: 16094
Surprise: Train Precision: 0.4071, Train Recall: 0.6050, Train F1: 0.4867, Train Support: 15897
Validation Loss: 1.7902, Validation Accuracy: 0.3614
Angry: Validation Precision: 0.2616, Validation Recall: 0.2103, Validation F1: 0.2332, Validation Support: 1978
Disgust: Validation Precision: 0.4702, Validation Recall: 0.5153, Validation F1: 0.4917, Validation Support: 2053
Fear: Val

                                                             

Epoch 4/15 => Train Loss: 1.7769, Train Accuracy: 0.3736
Angry: Train Precision: 0.2833, Train Recall: 0.1827, Train F1: 0.2221, Train Support: 15989
Disgust: Train Precision: 0.4272, Train Recall: 0.5673, Train F1: 0.4874, Train Support: 16133
Fear: Train Precision: 0.2142, Train Recall: 0.0607, Train F1: 0.0946, Train Support: 16103
Happy: Train Precision: 0.4249, Train Recall: 0.5195, Train F1: 0.4675, Train Support: 15895
Neutral: Train Precision: 0.3149, Train Recall: 0.3918, Train F1: 0.3492, Train Support: 15861
Sad: Train Precision: 0.2555, Train Recall: 0.2465, Train F1: 0.2509, Train Support: 15867
Surprise: Train Precision: 0.4903, Train Recall: 0.6444, Train F1: 0.5569, Train Support: 16152
Validation Loss: 1.7598, Validation Accuracy: 0.3937
Angry: Validation Precision: 0.2871, Validation Recall: 0.3041, Validation F1: 0.2953, Validation Support: 1993
Disgust: Validation Precision: 0.4807, Validation Recall: 0.5772, Validation F1: 0.5245, Validation Support: 2027
Fear: Val

                                                             

Epoch 5/15 => Train Loss: 1.7310, Train Accuracy: 0.4226
Angry: Train Precision: 0.3374, Train Recall: 0.2533, Train F1: 0.2894, Train Support: 16061
Disgust: Train Precision: 0.5076, Train Recall: 0.5953, Train F1: 0.5479, Train Support: 16005
Fear: Train Precision: 0.2498, Train Recall: 0.0663, Train F1: 0.1048, Train Support: 15849
Happy: Train Precision: 0.4830, Train Recall: 0.6139, Train F1: 0.5407, Train Support: 16132
Neutral: Train Precision: 0.3531, Train Recall: 0.4481, Train F1: 0.3950, Train Support: 16055
Sad: Train Precision: 0.2902, Train Recall: 0.2838, Train F1: 0.2870, Train Support: 15869
Surprise: Train Precision: 0.5389, Train Recall: 0.6912, Train F1: 0.6056, Train Support: 16029
Validation Loss: 1.7261, Validation Accuracy: 0.4256
Angry: Validation Precision: 0.4328, Validation Recall: 0.1883, Validation F1: 0.2624, Validation Support: 2002
Disgust: Validation Precision: 0.4790, Validation Recall: 0.6549, Validation F1: 0.5533, Validation Support: 2005
Fear: Val

                                                             

Epoch 6/15 => Train Loss: 1.6961, Train Accuracy: 0.4604
Angry: Train Precision: 0.3728, Train Recall: 0.3102, Train F1: 0.3387, Train Support: 15995
Disgust: Train Precision: 0.5681, Train Recall: 0.6318, Train F1: 0.5982, Train Support: 16029
Fear: Train Precision: 0.3195, Train Recall: 0.1087, Train F1: 0.1622, Train Support: 15921
Happy: Train Precision: 0.5400, Train Recall: 0.6344, Train F1: 0.5834, Train Support: 16091
Neutral: Train Precision: 0.3794, Train Recall: 0.4683, Train F1: 0.4192, Train Support: 15966
Sad: Train Precision: 0.3281, Train Recall: 0.3584, Train F1: 0.3426, Train Support: 16132
Surprise: Train Precision: 0.5869, Train Recall: 0.7106, Train F1: 0.6429, Train Support: 15866
Validation Loss: 1.7015, Validation Accuracy: 0.4565
Angry: Validation Precision: 0.4024, Validation Recall: 0.3555, Validation F1: 0.3775, Validation Support: 1983
Disgust: Validation Precision: 0.6164, Validation Recall: 0.6431, Validation F1: 0.6295, Validation Support: 2026
Fear: Val

                                                             

Epoch 7/15 => Train Loss: 1.6679, Train Accuracy: 0.4885
Angry: Train Precision: 0.4054, Train Recall: 0.3555, Train F1: 0.3788, Train Support: 15938
Disgust: Train Precision: 0.6185, Train Recall: 0.6825, Train F1: 0.6489, Train Support: 15892
Fear: Train Precision: 0.3372, Train Recall: 0.1417, Train F1: 0.1996, Train Support: 15937
Happy: Train Precision: 0.5781, Train Recall: 0.6525, Train F1: 0.6131, Train Support: 16179
Neutral: Train Precision: 0.4009, Train Recall: 0.4898, Train F1: 0.4409, Train Support: 16115
Sad: Train Precision: 0.3526, Train Recall: 0.3798, Train F1: 0.3657, Train Support: 16064
Surprise: Train Precision: 0.6147, Train Recall: 0.7178, Train F1: 0.6623, Train Support: 15875
Validation Loss: 1.6442, Validation Accuracy: 0.5146
Angry: Validation Precision: 0.4457, Validation Recall: 0.3763, Validation F1: 0.4081, Validation Support: 1961
Disgust: Validation Precision: 0.7029, Validation Recall: 0.7094, Validation F1: 0.7061, Validation Support: 2034
Fear: Val

                                                             

Epoch 8/15 => Train Loss: 1.6424, Train Accuracy: 0.5165
Angry: Train Precision: 0.4434, Train Recall: 0.3811, Train F1: 0.4099, Train Support: 15974
Disgust: Train Precision: 0.6659, Train Recall: 0.7160, Train F1: 0.6901, Train Support: 16004
Fear: Train Precision: 0.3769, Train Recall: 0.1745, Train F1: 0.2386, Train Support: 15977
Happy: Train Precision: 0.5992, Train Recall: 0.6889, Train F1: 0.6409, Train Support: 16102
Neutral: Train Precision: 0.4206, Train Recall: 0.5077, Train F1: 0.4601, Train Support: 15934
Sad: Train Precision: 0.3737, Train Recall: 0.4107, Train F1: 0.3913, Train Support: 15993
Surprise: Train Precision: 0.6410, Train Recall: 0.7343, Train F1: 0.6845, Train Support: 16016
Validation Loss: 1.6301, Validation Accuracy: 0.5293
Angry: Validation Precision: 0.4929, Validation Recall: 0.3563, Validation F1: 0.4136, Validation Support: 1962
Disgust: Validation Precision: 0.7196, Validation Recall: 0.7752, Validation F1: 0.7464, Validation Support: 1953
Fear: Val

                                                             

Epoch 9/15 => Train Loss: 1.6200, Train Accuracy: 0.5392
Angry: Train Precision: 0.4707, Train Recall: 0.4256, Train F1: 0.4470, Train Support: 16061
Disgust: Train Precision: 0.6995, Train Recall: 0.7558, Train F1: 0.7266, Train Support: 15967
Fear: Train Precision: 0.4014, Train Recall: 0.2084, Train F1: 0.2744, Train Support: 16110
Happy: Train Precision: 0.6313, Train Recall: 0.6928, Train F1: 0.6606, Train Support: 15926
Neutral: Train Precision: 0.4425, Train Recall: 0.5220, Train F1: 0.4789, Train Support: 15929
Sad: Train Precision: 0.3942, Train Recall: 0.4382, Train F1: 0.4151, Train Support: 16080
Surprise: Train Precision: 0.6623, Train Recall: 0.7366, Train F1: 0.6975, Train Support: 15927
Validation Loss: 1.6002, Validation Accuracy: 0.5608
Angry: Validation Precision: 0.5063, Validation Recall: 0.4400, Validation F1: 0.4708, Validation Support: 2007
Disgust: Validation Precision: 0.7877, Validation Recall: 0.7566, Validation F1: 0.7719, Validation Support: 2001
Fear: Val

                                                             

KeyboardInterrupt: 

**Test the final model with the test set of the same dataset**

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def calculate_metrics_per_class(true_labels, predicted_labels, label_mapping):
    unique_labels = list(label_mapping.keys())
    precision, recall, f1, support = precision_recall_fscore_support(true_labels, predicted_labels, labels=unique_labels)
    accuracy = accuracy_score(true_labels, predicted_labels)
    
    metrics_per_class = {}
    for i, idx in enumerate(unique_labels):
        metrics_per_class[idx] = {
            'precision': precision[i],
            'recall': recall[i],
            'f1': f1[i],
            'support': support[i]
        }

    return accuracy, metrics_per_class

# function for testing
def test(model, test_loader, criterion, device, label_mapping):
    model.eval()
    running_loss = 0.0
    true_labels = []
    predicted_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc='Testing', leave=False):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            predicted_labels.extend(preds.cpu().numpy())

    average_loss = running_loss / len(test_loader)
    accuracy, metrics_per_class = calculate_metrics_per_class(true_labels, predicted_labels, label_mapping)

    return average_loss, accuracy, metrics_per_class

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# load the model
best_model = EmotionCNN()
best_model.load_state_dict(torch.load(r'C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Models\paper1_models\best_model_paper_1_20_epochs_bs_48_30k.pth', map_location=device))
best_model.to(device)
criterion = nn.CrossEntropyLoss()
your_label_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}

test_loss, test_accuracy, test_metrics_per_class = test(best_model, test_loader, criterion, device, your_label_mapping)

# print the metrics per class
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

for idx, label in your_label_mapping.items():
    print(f'{label}: Test Precision: {test_metrics_per_class[idx]["precision"]:.4f}, Test Recall: {test_metrics_per_class[idx]["recall"]:.4f}, Test F1: {test_metrics_per_class[idx]["f1"]:.4f}, Test Support: {test_metrics_per_class[idx]["support"]}')


                                                          

Test Loss: 1.3903, Test Accuracy: 0.7750
Angry: Test Precision: 0.7497, Test Recall: 0.7361, Test F1: 0.7429, Test Support: 2967
Disgust: Test Precision: 0.9443, Test Recall: 0.8830, Test F1: 0.9126, Test Support: 2975
Fear: Test Precision: 0.7215, Test Recall: 0.6304, Test F1: 0.6729, Test Support: 2922
Happy: Test Precision: 0.7768, Test Recall: 0.8618, Test F1: 0.8171, Test Support: 3082
Neutral: Test Precision: 0.7173, Test Recall: 0.7299, Test F1: 0.7235, Test Support: 3017
Sad: Test Precision: 0.6742, Test Recall: 0.7450, Test F1: 0.7078, Test Support: 3039
Surprise: Test Precision: 0.8606, Test Recall: 0.8339, Test F1: 0.8470, Test Support: 2998




**Test the final model with a different dataset**

In [7]:
# transformation definition
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset_root = r'C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\test_images_emotion'

# create an instance of ImageFolder with the transformations
dataset = ImageFolder(root=dataset_root, transform=transform)

# seed = 42
torch.manual_seed(42)

# extract the labels and the indices of the dataset
labels = [label for _, label in dataset.imgs]

# convert the list into a tensor
labels = torch.tensor(labels)

# calculate the number of instances for each class
counts = torch.bincount(labels)

# calculate the weights for each class
weights = 1.0 / counts.float()

# create a weight vector for each index in the dataset
sample_weights = weights[labels]

# set the number of samples for the train set and the test set
train_size = (number_instances_over_under_sampling_/10) * 7 * 0.1
val_size = (number_instances_over_under_sampling_/10) * 7 * 0.1
test_size = (number_instances_over_under_sampling_/10) * 7 * 0.8

# crea un sampler per il train set and one for the test set
train_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(train_size))
val_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(val_size))
test_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(test_size))

# create a dataloader for the train set and the test set with the corresponding samplers
train_loader = DataLoader(dataset, batch_size=batch_size_, sampler=train_sampler, num_workers=4)
val_loader = DataLoader(dataset, batch_size=batch_size_, sampler=val_sampler, num_workers=4)
test_loader = DataLoader(dataset, batch_size=batch_size_, sampler=test_sampler, num_workers=4)

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def calculate_metrics_per_class(true_labels, predicted_labels, label_mapping):
    unique_labels = list(label_mapping.keys())
    precision, recall, f1, support = precision_recall_fscore_support(true_labels, predicted_labels, labels=unique_labels)
    accuracy = accuracy_score(true_labels, predicted_labels)
    
    metrics_per_class = {}
    for i, idx in enumerate(unique_labels):
        metrics_per_class[idx] = {
            'precision': precision[i],
            'recall': recall[i],
            'f1': f1[i],
            'support': support[i]
        }

    return accuracy, metrics_per_class

# function for the test
def test(model, test_loader, criterion, device, label_mapping):
    model.eval()
    running_loss = 0.0
    true_labels = []
    predicted_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc='Testing', leave=False):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            predicted_labels.extend(preds.cpu().numpy())

    average_loss = running_loss / len(test_loader)
    accuracy, metrics_per_class = calculate_metrics_per_class(true_labels, predicted_labels, label_mapping)

    return average_loss, accuracy, metrics_per_class

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# load the model 
best_model = EmotionCNN()
best_model.load_state_dict(torch.load(r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Models\paper1_models\best_model_paper_1_20_epochs_bs_48_30k.pth", map_location=device))
best_model.to(device)
criterion = nn.CrossEntropyLoss()
your_label_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}

test_loss, test_accuracy, test_metrics_per_class = test(best_model, test_loader, criterion, device, your_label_mapping)

# print the loss and the accuracy of the model
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

for idx, label in your_label_mapping.items():
    print(f'{label}: Test Precision: {test_metrics_per_class[idx]["precision"]:.4f}, Test Recall: {test_metrics_per_class[idx]["recall"]:.4f}, Test F1: {test_metrics_per_class[idx]["f1"]:.4f}, Test Support: {test_metrics_per_class[idx]["support"]}')


Testing:   0%|          | 0/290 [00:00<?, ?it/s]

                                                          

Test Loss: 1.7549, Test Accuracy: 0.4045
Angry: Test Precision: 0.3897, Test Recall: 0.4119, Test F1: 0.4005, Test Support: 2367
Disgust: Test Precision: 0.5617, Test Recall: 0.0959, Test F1: 0.1638, Test Support: 2420
Fear: Test Precision: 0.4656, Test Recall: 0.2522, Test F1: 0.3272, Test Support: 2391
Happy: Test Precision: 0.3737, Test Recall: 0.9099, Test F1: 0.5299, Test Support: 2497
Neutral: Test Precision: 0.3302, Test Recall: 0.4018, Test F1: 0.3625, Test Support: 2275
Sad: Test Precision: 0.3552, Test Recall: 0.3682, Test F1: 0.3616, Test Support: 2428
Surprise: Test Precision: 0.7382, Test Recall: 0.3737, Test F1: 0.4962, Test Support: 2422




### **Live emotion detection**

In [None]:
num_classes = 7
your_label_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}
model = EmotionCNN(num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load(r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Models\paper1_models\best_model_paper_1_20_epochs_bs_48_30k.pth", map_location=device))
model.to(device)
model.eval() 

# initialize the face detector
detector = dlib.get_frontal_face_detector()

# initialize the camera
cap = cv2.VideoCapture(0)

# apply the transformations to the face image
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

while True:
    # read a frame from the camera
    ret, frame = cap.read()

    # faces detection
    faces = detector(frame)

    # if there is at least one face detected, process the image
    if len(faces) > 0:
        # take only the first face
        face = faces[0]
        
        # cut the face from the frame
        x, y, w, h = face.left(), face.top(), face.width(), face.height()
        face_image = frame[y:y+h, x:x+w]

        # check if the face image is not empty
        if not face_image.size == 0:
            # apply the transformations to the face image
            pil_image = Image.fromarray(cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB))
            input_image = transform(pil_image).unsqueeze(0)  # Aggiunge una dimensione di batch
            input_image = input_image.to(device)

            # model prediction
            with torch.no_grad():
                output = model(input_image)

            # get the label predicted by the model
            _, predicted = torch.max(output, 1)
            predicted_emotion = your_label_mapping[predicted.item()]

            print(f'Predicted Emotion: {predicted_emotion}')

    # show the frame with the face rectangle added
    cv2.imshow("Face Detection", frame)

    # wait for 2 seconds (time in milliseconds)
    cv2.waitKey(1000)

    # if q is pressed, terminate the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# release the capture
cap.release()
cv2.destroyAllWindows()