In [None]:
# FOR WINDOWS (your env must be called project-venv; if you choose another name add it in .gitignore)
import subprocess

# Set the execution policy
subprocess.run(["Set-ExecutionPolicy", "RemoteSigned", "-Scope", "Process"], shell=True)

# Activate the virtual environment
subprocess.run(["cd", ".\\project-venv\\"], shell=True)
subprocess.run([".\\Scripts\\Activate.ps1"], shell=True)

# Install requirements
subprocess.run(["pip", "install", "-r", "../requirements.txt"], shell=True)
subprocess.run(["pip", "install", "-r", "../emotions_requirements.txt"], shell=True)

In [None]:
import gdown


########## dlib_face_recognition_resnet_model_v1.dat ################

# URL del file di Google Drive
url_1 = 'https://drive.google.com/uc?id=1tXD6dha1ZD4fceLWsGlI89t8HeHlkJYC' 

# Percorso in cui si desidera salvare il file scaricato
output_1 = '../Models/dlib_face_recognition_resnet_model_v1.dat'

gdown.download(url_1, output_1, quiet=False)



########## shape_predictor_68_face_landmarks.dat ###################

# URL del file di Google Drive
url_2 = 'https://drive.google.com/uc?id=1dvIeJtWhObCgSYJt8WKnjIlHhw5Y9ioN'

# Percorso in cui si desidera salvare il file scaricato
output_2 = '../Models/shape_predictor_68_face_landmarks.dat'

gdown.download(url_2, output_2, quiet=False)

# Emotion Recognition task

**import packages**

In [None]:
import cv2
import os
import random
import numpy as np
from scipy.ndimage import gaussian_filter, map_coordinates
import torch
import torch.nn as nn
from sklearn.cluster import DBSCAN
from torch.utils.data import random_split, ConcatDataset
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim import SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from tqdm import tqdm
import dlib
from PIL import Image
import wandb

**wandb login**

In [None]:
wandb.login(key='d29d51017f4231b5149d36ad242526b374c9c60a')

## Paper implementation 1
https://ieeexplore.ieee.org/abstract/document/9659697?casa_token=zDD7lwwOig8AAAAA:KcIHhupXAXgiaB_C7A0uNDB7ehrsWNyovQdgDu9LmnwToOGU6akB_gjWTy7JCf4UdKK03Is

**Dataset augmenting**

In [18]:
emotions = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

# cycle through emotions
for emotion in emotions:
    # path of the folder containing the images
    folder_path = fr"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_images\{emotion}"
    output_folder_path = fr"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_images\{emotion}_augmented"


    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    # list of images in the folder
    image_files = [f for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))]

    # define transformations inside the apply_transformations function
    def apply_transformations(image):
        # horizontal_flip
        flipped_horizontal = cv2.flip(image, 1)

        # vertical flip
        flipped_vertical = cv2.flip(image, 0)

        # Zoom
        zoom_factor = random.uniform(0.8, 1.2)
        height, width = image.shape[:2]
        zoomed_image = cv2.resize(image, (int(width * zoom_factor), int(height * zoom_factor)))

        # translation
        tx = random.randint(-10, 10)
        ty = random.randint(-10, 10)
        translation_matrix = np.float32([[1, 0, tx], [0, 1, ty]])
        translated_image = cv2.warpAffine(image, translation_matrix, (width, height))

        # contrast and brightness control
        alpha = random.uniform(0.8, 1.2)
        beta = random.randint(-35, 35)
        adjusted_image = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)

        # elastic transformation
        elastic_image = elastic_transform(image, alpha=random.randint(6, 14), sigma=random.uniform(1.1, 2.0))

        return [image, translated_image, flipped_horizontal, zoomed_image, adjusted_image, elastic_image] #forse togliere flipped vertical

    def elastic_transform(image, alpha, sigma):
        random_state = np.random.RandomState(None)
        shape = image.shape
        dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha
        dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha
        dz = np.zeros_like(dx)

        x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2]))
        indices = np.reshape(y + dy, (-1, 1)), np.reshape(x + dx, (-1, 1)), np.reshape(z + dz, (-1, 1))

        distorted_image = map_coordinates(image, indices, order=1, mode='reflect')
        distorted_image = distorted_image.reshape(image.shape)

        return distorted_image

    # apply data augmentation
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        image = cv2.imread(image_path)

        augmented_images = apply_transformations(image)

        # save new images
        base_name = os.path.splitext(image_file)[0]
        for i, augmented_image in enumerate(augmented_images):
            output_file_path = os.path.join(output_folder_path, f"{base_name}_aug_{i}.jpg")
            cv2.imwrite(output_file_path, augmented_image)

**CNN architecture**

In [1]:
class EmotionCNN(nn.Module):
    def __init__(self, num_classes=7):
        super(EmotionCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 64, kernel_size=5, stride=1, padding=0)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=5, stride=2)
        
        self.conv2a = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.relu2a = nn.ReLU()
        self.conv2b = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.relu2b = nn.ReLU()
        self.avgpool2 = nn.AvgPool2d(kernel_size=3, stride=2)
        
        self.conv3a = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu3a = nn.ReLU()
        self.conv3b = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.relu3b = nn.ReLU()
        self.avgpool3 = nn.AvgPool2d(kernel_size=3, stride=2)
        
        # verify the output size of conv2 and conv3
        self.dummy_input = torch.randn(1, 1, 48, 48)
        self.dummy_output_size = self._get_conv_output_size(self.dummy_input)
        
        # update fc1 units based on feature map size
        self.fc1 = nn.Linear(self.dummy_output_size, 1024)
        self.relu_fc1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)
        
        self.fc2 = nn.Linear(1024, 1024)
        self.relu_fc2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.2)
        
        self.fc3 = nn.Linear(1024, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def _get_conv_output_size(self, input_tensor):
        x = self.maxpool1(self.relu1(self.conv1(input_tensor)))
        x = self.relu2a(self.conv2a(x))
        x = self.relu2b(self.conv2b(x))
        x = self.avgpool2(x)
        x = self.relu3a(self.conv3a(x))
        x = self.relu3b(self.conv3b(x))
        x = self.avgpool3(x)
        return x.view(x.size(0), -1).size(1)

    def forward(self, x):
        x = self.maxpool1(self.relu1(self.conv1(x)))
        x = self.relu2a(self.conv2a(x))
        x = self.relu2b(self.conv2b(x))
        x = self.avgpool2(x)
        x = self.relu3a(self.conv3a(x))
        x = self.relu3b(self.conv3b(x))
        x = self.avgpool3(x)
        x = x.view(x.size(0), -1)
        x = self.dropout1(self.relu_fc1(self.fc1(x)))
        x = self.dropout2(self.relu_fc2(self.fc2(x)))
        x = self.softmax(self.fc3(x))
        return x

**hyperparameters**

In [2]:
number_instances_over_under_sampling_ = 30000
batch_size_ = 48
epochs_ = 20

**delete outliers with dbscan**

In [None]:
def calculate_pixel_std(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    return np.std(image)

def remove_outliers_dbscan(folder_path, eps, min_samples):
    images = []
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        pixel_std = calculate_pixel_std(image_path)
        images.append([pixel_std])

    images = np.array(images)

    # dbscan to identify outliers
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    labels = dbscan.fit_predict(images)

    # remove outliers
    counter = 0
    for i, (label, image) in enumerate(zip(labels, os.listdir(folder_path))):
        if label == -1:  
            image_path = os.path.join(folder_path, image)
            os.remove(image_path)
            counter += 1
    print(counter)

# DBSCAN configuration
dbscan_eps = 0.4  # search radius
dbscan_min_samples = 15  # minimum number of samples required for a cluster

emotions_folder_path = r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images"


for emotion in os.listdir(emotions_folder_path):
    emotion_folder_path = os.path.join(emotions_folder_path, emotion)
    print(emotion)
    
    if emotion == 'disgust':
        tmp_folder_path = emotion_folder_path 
        remove_outliers_dbscan(tmp_folder_path, 0.5, 10)
    else:
        remove_outliers_dbscan(emotion_folder_path, dbscan_eps, dbscan_min_samples)


Print the number of the outliers per class

In [3]:
def calculate_pixel_std(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    return np.std(image)

def get_outliers_dbscan(folder_path, eps, min_samples):
    images = []
    image_paths = []

    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        pixel_std = calculate_pixel_std(image_path)
        images.append([pixel_std])
        image_paths.append(image_path)

    images = np.array(images)

    # dbscan to identify outliers
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    labels = dbscan.fit_predict(images)

    # collect outlier paths and count for each label
    outlier_paths_by_label = {}
    for label, image_path in zip(labels, image_paths):
        if label == -1:
            if label not in outlier_paths_by_label:
                outlier_paths_by_label[label] = []
            outlier_paths_by_label[label].append(image_path)

    return outlier_paths_by_label

# Configurazione DBSCAN
dbscan_eps = 0.5  # Raggio di ricerca
dbscan_min_samples = 10  # Numero minimo di campioni in un cluster

emotions_folder_path = r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images"

for emotion in os.listdir(emotions_folder_path):
    emotion_folder_path = os.path.join(emotions_folder_path, emotion)
    print(emotion)

    if emotion == 'disgust':
        tmp_folder_path = emotion_folder_path 
        outliers = get_outliers_dbscan(tmp_folder_path, 0.5, 10)
    else:
        outliers = get_outliers_dbscan(emotion_folder_path, dbscan_eps, dbscan_min_samples)

    # Stampa i percorsi degli outliers per ogni label
    for label, outlier_paths in outliers.items():
        print(f"Label {label}: {len(outlier_paths)} outliers")
        for path in outlier_paths:
            print(path)


angry
Label -1: 44 outliers
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_24139016_aug_1.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_27212219_aug_1.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_33469617_aug_5.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_51610150_aug_1.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_66705645_aug_1.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_66705645_aug_5.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images\angry\PrivateTest_67702983_aug_1.jpg
C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Da

**with over and under sampling**

**1-** Using this method, a random selection of number_instances_over_under_sampling instances is made for each class. ########################################################################

In [14]:
# transformation definition
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset_root = r'C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_aug_images'

# create an instance of ImageFolder with the transformations
dataset = ImageFolder(root=dataset_root, transform=transform)

# seed = 42
torch.manual_seed(42)

# extract the labels and the indices of the dataset
labels = [label for _, label in dataset.imgs]

# convert the list into a tensor
labels = torch.tensor(labels)

# calculate the number of instances for each class
counts = torch.bincount(labels)

# calculate the weights for each class
weights = 1.0 / counts.float()

# create a weight vector for each index in the dataset
sample_weights = weights[labels]

# set the number of samples for the train set and the test set
train_size = number_instances_over_under_sampling_ * 7 * 0.8
val_size = number_instances_over_under_sampling_ * 7 * 0.1
test_size = number_instances_over_under_sampling_ * 7 * 0.1

# crea un sampler per il train set and one for the test set
train_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(train_size))
val_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(val_size))
test_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(test_size))

# create a dataloader for the train set and the test set with the corresponding samplers
train_loader = DataLoader(dataset, batch_size=batch_size_, sampler=train_sampler, num_workers=4)
val_loader = DataLoader(dataset, batch_size=batch_size_, sampler=val_sampler, num_workers=4)
test_loader = DataLoader(dataset, batch_size=batch_size_, sampler=test_sampler, num_workers=4)


verifying if for each label there are number_instances_over_under_sampling instances

In [4]:
train_indices = list(train_loader.sampler)
train_counts = torch.bincount(torch.tensor(labels)[train_indices])
print("number of instances for class in the train set:", train_counts)

val_indices = list(val_loader.sampler)
val_counts = torch.bincount(torch.tensor(labels)[val_indices])
print("number of instances for class in the validation set:", val_counts)

test_indices = list(test_loader.sampler)
test_counts = torch.bincount(torch.tensor(labels)[test_indices])
print("number of instances for class in the test set:", test_counts)

number of instances for class in the train set: tensor([24215, 23930, 23907, 23856, 23931, 23907, 24254])
number of instances for class in the validation set: tensor([3052, 3019, 3029, 3027, 2921, 3047, 2905])
number of instances for class in the test set: tensor([2935, 2953, 2998, 3093, 3000, 2991, 3030])


  train_counts = torch.bincount(torch.tensor(labels)[train_indices])
  val_counts = torch.bincount(torch.tensor(labels)[val_indices])
  test_counts = torch.bincount(torch.tensor(labels)[test_indices])


In [5]:
len(train_loader), len(val_loader), len(test_loader)

(3500, 438, 438)

At the beginning these are the number of instances for each class

In [6]:
# obtain the classes (labels)
classes = dataset.classes

# count the instances for each class
instances_per_class = {cls: 0 for cls in classes}

for _, label in dataset.imgs:
    instances_per_class[classes[label]] += 1

# print("Number of instances per class:")
for cls, count in instances_per_class.items():
    print(f"Class {cls}: {count} istances")

Class angry: 34671 istances
Class disgust: 3829 istances
Class fear: 35847 istances
Class happy: 62923 istances
Class neutral: 43386 istances
Class sad: 42539 istances
Class surprise: 28014 istances


**model training**

In [71]:
your_label_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# definition of the model, criterion, optimizer and scheduler
net = EmotionCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = SGD(net.parameters(), lr=0.01, momentum=0.9, nesterov=True, weight_decay=0.0001)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.75, patience=5, verbose=True)

def calculate_metrics_per_class(true_labels, predicted_labels, label_mapping):
    unique_labels = list(label_mapping.keys())
    precision, recall, f1, support = precision_recall_fscore_support(true_labels, predicted_labels, labels=unique_labels)
    accuracy = accuracy_score(true_labels, predicted_labels)
    
    metrics_per_class = {}
    for i, idx in enumerate(unique_labels):
        metrics_per_class[idx] = {
            'precision': precision[i],
            'recall': recall[i],
            'f1': f1[i],
            'support': support[i]
        }

    return accuracy, metrics_per_class

# function for training
def train_epoch(model, train_loader, criterion, optimizer, device, label_mapping):
    model.train()
    running_loss = 0.0
    true_labels = []
    predicted_labels = []

    for inputs, labels in tqdm(train_loader, desc='Training', leave=False):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, preds = torch.max(outputs, 1)
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(preds.cpu().numpy())

    average_loss = running_loss / len(train_loader)
    accuracy, metrics_per_class = calculate_metrics_per_class(true_labels, predicted_labels, label_mapping)

    return average_loss, accuracy, metrics_per_class

# function for evaluation
def evaluate(model, val_loader, criterion, device, label_mapping):
    model.eval()
    running_loss = 0.0
    true_labels = []
    predicted_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc='Validation', leave=False):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            predicted_labels.extend(preds.cpu().numpy())

    average_loss = running_loss / len(val_loader)
    accuracy, metrics_per_class = calculate_metrics_per_class(true_labels, predicted_labels, label_mapping)

    return average_loss, accuracy, metrics_per_class

# Settings
num_epochs = epochs_
early_stopping_patience = 3  # numbers of epochs with no improvement after which training will be stopped (early stopping)
best_accuracy = 0.0
best_epoch = 0
no_improvement_count = 0

# Training cycle
for epoch in range(num_epochs):
    # Training
    train_loss, train_accuracy, train_metrics_per_class = train_epoch(net, train_loader, criterion, optimizer, device, your_label_mapping)

    # Validation
    val_loss, val_accuracy, val_metrics_per_class = evaluate(net, val_loader, criterion, device, your_label_mapping)

    # Scheduler step based on validation accuracy
    scheduler.step(val_accuracy)

    # Saving the model if the current accuracy is better than the best
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_epoch = epoch
        torch.save(net.state_dict(), r'C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Models\paper1_models\best_model.pth')
        no_improvement_count = 0
    else:
        no_improvement_count += 1

    # Print epoch statistics
    print(f'Epoch {epoch + 1}/{num_epochs} => '
          f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')

    # Print metrics per class
    for idx, label in your_label_mapping.items():
        print(f'{label}: Train Precision: {train_metrics_per_class[idx]["precision"]:.4f}, Train Recall: {train_metrics_per_class[idx]["recall"]:.4f}, Train F1: {train_metrics_per_class[idx]["f1"]:.4f}, Train Support: {train_metrics_per_class[idx]["support"]}')

    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')
    
    for idx, label in your_label_mapping.items():
        print(f'{label}: Validation Precision: {val_metrics_per_class[idx]["precision"]:.4f}, Validation Recall: {val_metrics_per_class[idx]["recall"]:.4f}, Validation F1: {val_metrics_per_class[idx]["f1"]:.4f}, Validation Support: {val_metrics_per_class[idx]["support"]}')

    if no_improvement_count >= early_stopping_patience:
        print(f'Early stopping at epoch {epoch + 1} as there is no improvement in validation accuracy for {early_stopping_patience} consecutive epochs.')
        break

print(f'Best model achieved at epoch {best_epoch + 1} with accuracy {best_accuracy:.4f}')


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/10 => Train Loss: 1.9459, Train Accuracy: 0.1480
Angry: Train Precision: 0.1479, Train Recall: 0.1217, Train F1: 0.1336, Train Support: 16033
Disgust: Train Precision: 0.1392, Train Recall: 0.1083, Train F1: 0.1218, Train Support: 16049
Fear: Train Precision: 0.1250, Train Recall: 0.0001, Train F1: 0.0001, Train Support: 15950
Happy: Train Precision: 0.1438, Train Recall: 0.2303, Train F1: 0.1770, Train Support: 16080
Neutral: Train Precision: 0.1478, Train Recall: 0.0707, Train F1: 0.0956, Train Support: 15890
Sad: Train Precision: 0.1440, Train Recall: 0.2054, Train F1: 0.1693, Train Support: 15916
Surprise: Train Precision: 0.1584, Train Recall: 0.2980, Train F1: 0.2069, Train Support: 16082
Validation Loss: 1.9457, Validation Accuracy: 0.1679
Angry: Validation Precision: 0.0000, Validation Recall: 0.0000, Validation F1: 0.0000, Validation Support: 1941
Disgust: Validation Precision: 0.0000, Validation Recall: 0.0000, Validation F1: 0.0000, Validation Support: 1989
Fear: Val

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/10 => Train Loss: 1.9389, Train Accuracy: 0.1712
Angry: Train Precision: 0.1532, Train Recall: 0.4038, Train F1: 0.2221, Train Support: 16162
Disgust: Train Precision: 0.1734, Train Recall: 0.0051, Train F1: 0.0099, Train Support: 15956
Fear: Train Precision: 0.0727, Train Recall: 0.0002, Train F1: 0.0005, Train Support: 16023
Happy: Train Precision: 0.1430, Train Recall: 0.0943, Train F1: 0.1137, Train Support: 15971
Neutral: Train Precision: 0.1745, Train Recall: 0.0593, Train F1: 0.0885, Train Support: 16008
Sad: Train Precision: 0.1734, Train Recall: 0.2551, Train F1: 0.2065, Train Support: 16047
Surprise: Train Precision: 0.2052, Train Recall: 0.3798, Train F1: 0.2664, Train Support: 15833
Validation Loss: 1.9242, Validation Accuracy: 0.1991
Angry: Validation Precision: 0.2056, Validation Recall: 0.1438, Validation F1: 0.1692, Validation Support: 1989
Disgust: Validation Precision: 0.1846, Validation Recall: 0.0061, Validation F1: 0.0118, Validation Support: 1971
Fear: Val

                                                             

Epoch 3/10 => Train Loss: 1.9033, Train Accuracy: 0.2240
Angry: Train Precision: 0.1939, Train Recall: 0.2202, Train F1: 0.2063, Train Support: 15932
Disgust: Train Precision: 0.2226, Train Recall: 0.1919, Train F1: 0.2061, Train Support: 16082
Fear: Train Precision: 0.1583, Train Recall: 0.0130, Train F1: 0.0240, Train Support: 16011
Happy: Train Precision: 0.2174, Train Recall: 0.2830, Train F1: 0.2459, Train Support: 16002
Neutral: Train Precision: 0.1823, Train Recall: 0.1238, Train F1: 0.1474, Train Support: 15878
Sad: Train Precision: 0.1909, Train Recall: 0.2283, Train F1: 0.2079, Train Support: 16082
Surprise: Train Precision: 0.2913, Train Recall: 0.5071, Train F1: 0.3700, Train Support: 16013
Validation Loss: 1.8859, Validation Accuracy: 0.2509
Angry: Validation Precision: 0.2387, Validation Recall: 0.1595, Validation F1: 0.1912, Validation Support: 2050
Disgust: Validation Precision: 0.2477, Validation Recall: 0.3495, Validation F1: 0.2899, Validation Support: 1960
Fear: Val

                                                             

Epoch 4/10 => Train Loss: 1.8649, Train Accuracy: 0.2731
Angry: Train Precision: 0.2171, Train Recall: 0.2066, Train F1: 0.2117, Train Support: 16030
Disgust: Train Precision: 0.2902, Train Recall: 0.3569, Train F1: 0.3201, Train Support: 15898
Fear: Train Precision: 0.1843, Train Recall: 0.0919, Train F1: 0.1226, Train Support: 16171
Happy: Train Precision: 0.2832, Train Recall: 0.4807, Train F1: 0.3564, Train Support: 16059
Neutral: Train Precision: 0.1847, Train Recall: 0.0584, Train F1: 0.0888, Train Support: 15854
Sad: Train Precision: 0.2078, Train Recall: 0.2256, Train F1: 0.2164, Train Support: 16002
Surprise: Train Precision: 0.4030, Train Recall: 0.4914, Train F1: 0.4428, Train Support: 15986
Validation Loss: 1.8457, Validation Accuracy: 0.2966
Angry: Validation Precision: 0.2582, Validation Recall: 0.1829, Validation F1: 0.2142, Validation Support: 1968
Disgust: Validation Precision: 0.3941, Validation Recall: 0.2639, Validation F1: 0.3161, Validation Support: 2008
Fear: Val

                                                             

Epoch 5/10 => Train Loss: 1.8276, Train Accuracy: 0.3166
Angry: Train Precision: 0.2363, Train Recall: 0.1839, Train F1: 0.2068, Train Support: 16093
Disgust: Train Precision: 0.3650, Train Recall: 0.4224, Train F1: 0.3916, Train Support: 15887
Fear: Train Precision: 0.1984, Train Recall: 0.0868, Train F1: 0.1207, Train Support: 15965
Happy: Train Precision: 0.3630, Train Recall: 0.4960, Train F1: 0.4192, Train Support: 15943
Neutral: Train Precision: 0.2414, Train Recall: 0.1730, Train F1: 0.2015, Train Support: 15907
Sad: Train Precision: 0.2404, Train Recall: 0.2934, Train F1: 0.2643, Train Support: 16196
Surprise: Train Precision: 0.4251, Train Recall: 0.5620, Train F1: 0.4841, Train Support: 16009
Validation Loss: 1.8078, Validation Accuracy: 0.3407
Angry: Validation Precision: 0.2563, Validation Recall: 0.1929, Validation F1: 0.2201, Validation Support: 1991
Disgust: Validation Precision: 0.3470, Validation Recall: 0.5716, Validation F1: 0.4318, Validation Support: 2003
Fear: Val

                                                             

Epoch 6/10 => Train Loss: 1.7911, Train Accuracy: 0.3580
Angry: Train Precision: 0.2731, Train Recall: 0.2015, Train F1: 0.2319, Train Support: 15838
Disgust: Train Precision: 0.4436, Train Recall: 0.4749, Train F1: 0.4587, Train Support: 16108
Fear: Train Precision: 0.2222, Train Recall: 0.0925, Train F1: 0.1306, Train Support: 15954
Happy: Train Precision: 0.4011, Train Recall: 0.5477, Train F1: 0.4631, Train Support: 15988
Neutral: Train Precision: 0.2815, Train Recall: 0.2811, Train F1: 0.2813, Train Support: 15918
Sad: Train Precision: 0.2630, Train Recall: 0.2840, Train F1: 0.2731, Train Support: 16011
Surprise: Train Precision: 0.4671, Train Recall: 0.6181, Train F1: 0.5321, Train Support: 16183
Validation Loss: 1.7813, Validation Accuracy: 0.3708
Angry: Validation Precision: 0.2717, Validation Recall: 0.3434, Validation F1: 0.3034, Validation Support: 1989
Disgust: Validation Precision: 0.5825, Validation Recall: 0.4239, Validation F1: 0.4907, Validation Support: 2024
Fear: Val

                                                             

Epoch 7/10 => Train Loss: 1.7614, Train Accuracy: 0.3901
Angry: Train Precision: 0.2936, Train Recall: 0.2518, Train F1: 0.2711, Train Support: 15955
Disgust: Train Precision: 0.4969, Train Recall: 0.5199, Train F1: 0.5082, Train Support: 15868
Fear: Train Precision: 0.2481, Train Recall: 0.1061, Train F1: 0.1486, Train Support: 16195
Happy: Train Precision: 0.4458, Train Recall: 0.5745, Train F1: 0.5021, Train Support: 15923
Neutral: Train Precision: 0.3278, Train Recall: 0.3641, Train F1: 0.3450, Train Support: 16264
Sad: Train Precision: 0.2787, Train Recall: 0.2803, Train F1: 0.2795, Train Support: 16045
Surprise: Train Precision: 0.5052, Train Recall: 0.6439, Train F1: 0.5662, Train Support: 15750
Validation Loss: 1.7432, Validation Accuracy: 0.4108
Angry: Validation Precision: 0.2973, Validation Recall: 0.3183, Validation F1: 0.3074, Validation Support: 1989
Disgust: Validation Precision: 0.5329, Validation Recall: 0.5937, Validation F1: 0.5617, Validation Support: 1979
Fear: Val

                                                             

Epoch 8/10 => Train Loss: 1.7350, Train Accuracy: 0.4187
Angry: Train Precision: 0.3278, Train Recall: 0.2919, Train F1: 0.3088, Train Support: 16047
Disgust: Train Precision: 0.5426, Train Recall: 0.5397, Train F1: 0.5411, Train Support: 16065
Fear: Train Precision: 0.2800, Train Recall: 0.1208, Train F1: 0.1687, Train Support: 16140
Happy: Train Precision: 0.4862, Train Recall: 0.5953, Train F1: 0.5352, Train Support: 15941
Neutral: Train Precision: 0.3491, Train Recall: 0.4015, Train F1: 0.3735, Train Support: 15802
Sad: Train Precision: 0.3016, Train Recall: 0.3196, Train F1: 0.3103, Train Support: 16132
Surprise: Train Precision: 0.5307, Train Recall: 0.6682, Train F1: 0.5916, Train Support: 15873
Validation Loss: 1.7246, Validation Accuracy: 0.4286
Angry: Validation Precision: 0.3365, Validation Recall: 0.3261, Validation F1: 0.3312, Validation Support: 2064
Disgust: Validation Precision: 0.4939, Validation Recall: 0.6162, Validation F1: 0.5483, Validation Support: 1907
Fear: Val

                                                             

Epoch 9/10 => Train Loss: 1.7079, Train Accuracy: 0.4480
Angry: Train Precision: 0.3625, Train Recall: 0.3296, Train F1: 0.3453, Train Support: 16082
Disgust: Train Precision: 0.5960, Train Recall: 0.5735, Train F1: 0.5845, Train Support: 15968
Fear: Train Precision: 0.2918, Train Recall: 0.1423, Train F1: 0.1913, Train Support: 16005
Happy: Train Precision: 0.5205, Train Recall: 0.6281, Train F1: 0.5692, Train Support: 16039
Neutral: Train Precision: 0.3743, Train Recall: 0.4378, Train F1: 0.4035, Train Support: 16020
Sad: Train Precision: 0.3209, Train Recall: 0.3293, Train F1: 0.3251, Train Support: 15972
Surprise: Train Precision: 0.5622, Train Recall: 0.6968, Train F1: 0.6223, Train Support: 15914
Validation Loss: 1.6891, Validation Accuracy: 0.4684
Angry: Validation Precision: 0.3504, Validation Recall: 0.4328, Validation F1: 0.3872, Validation Support: 1948
Disgust: Validation Precision: 0.6357, Validation Recall: 0.5891, Validation F1: 0.6115, Validation Support: 2020
Fear: Val

                                                             

Epoch 10/10 => Train Loss: 1.6869, Train Accuracy: 0.4691
Angry: Train Precision: 0.3784, Train Recall: 0.3699, Train F1: 0.3741, Train Support: 16027
Disgust: Train Precision: 0.6422, Train Recall: 0.5979, Train F1: 0.6193, Train Support: 16085
Fear: Train Precision: 0.3303, Train Recall: 0.1574, Train F1: 0.2132, Train Support: 16005
Happy: Train Precision: 0.5504, Train Recall: 0.6310, Train F1: 0.5879, Train Support: 16001
Neutral: Train Precision: 0.3900, Train Recall: 0.4649, Train F1: 0.4242, Train Support: 16036
Sad: Train Precision: 0.3351, Train Recall: 0.3547, Train F1: 0.3446, Train Support: 15854
Surprise: Train Precision: 0.5795, Train Recall: 0.7062, Train F1: 0.6366, Train Support: 15992
Validation Loss: 1.6660, Validation Accuracy: 0.4919
Angry: Validation Precision: 0.3778, Validation Recall: 0.4078, Validation F1: 0.3922, Validation Support: 2001
Disgust: Validation Precision: 0.7236, Validation Recall: 0.6090, Validation F1: 0.6613, Validation Support: 2033
Fear: Va



In [15]:
def calculate_metrics_per_class(true_labels, predicted_labels, label_mapping):
    unique_labels = list(label_mapping.keys())
    precision, recall, f1, support = precision_recall_fscore_support(true_labels, predicted_labels, labels=unique_labels)
    accuracy = accuracy_score(true_labels, predicted_labels)
    
    metrics_per_class = {}
    for i, idx in enumerate(unique_labels):
        metrics_per_class[idx] = {
            'precision': precision[i],
            'recall': recall[i],
            'f1': f1[i],
            'support': support[i]
        }

    return accuracy, metrics_per_class

# Funzione per il test
def test(model, test_loader, criterion, device, label_mapping):
    model.eval()
    running_loss = 0.0
    true_labels = []
    predicted_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc='Testing', leave=False):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            predicted_labels.extend(preds.cpu().numpy())

    average_loss = running_loss / len(test_loader)
    accuracy, metrics_per_class = calculate_metrics_per_class(true_labels, predicted_labels, label_mapping)

    return average_loss, accuracy, metrics_per_class

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Carica il modello con i pesi migliori
best_model = EmotionCNN()
best_model.load_state_dict(torch.load(r'C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Models\paper1_models\best_model_paper_1_20_epochs.pth', map_location=torch.device('cpu')))
best_model.to(device)
criterion = nn.CrossEntropyLoss()
your_label_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}


# Test
test_loss, test_accuracy, test_metrics_per_class = test(best_model, test_loader, criterion, device, your_label_mapping)

# Print metrics per class per il test set
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

for idx, label in your_label_mapping.items():
    print(f'{label}: Test Precision: {test_metrics_per_class[idx]["precision"]:.4f}, Test Recall: {test_metrics_per_class[idx]["recall"]:.4f}, Test F1: {test_metrics_per_class[idx]["f1"]:.4f}, Test Support: {test_metrics_per_class[idx]["support"]}')


                                                          

Test Loss: 1.5174, Test Accuracy: 0.6449
Angry: Test Precision: 0.6377, Test Recall: 0.5531, Test F1: 0.5924, Test Support: 2985
Disgust: Test Precision: 0.8761, Test Recall: 0.8458, Test F1: 0.8607, Test Support: 3035
Fear: Test Precision: 0.6128, Test Recall: 0.3440, Test F1: 0.4407, Test Support: 2968
Happy: Test Precision: 0.7352, Test Recall: 0.7319, Test F1: 0.7335, Test Support: 3133
Neutral: Test Precision: 0.5036, Test Recall: 0.6099, Test F1: 0.5517, Test Support: 2848
Sad: Test Precision: 0.5069, Test Recall: 0.5822, Test F1: 0.5419, Test Support: 3040
Surprise: Test Precision: 0.6668, Test Recall: 0.8372, Test F1: 0.7424, Test Support: 2991




Test with a different dataset

In [18]:
# transformation definition
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset_root = r'C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\test_images_emotion'

# create an instance of ImageFolder with the transformations
dataset = ImageFolder(root=dataset_root, transform=transform)

# seed = 42
torch.manual_seed(42)

# extract the labels and the indices of the dataset
labels = [label for _, label in dataset.imgs]

# convert the list into a tensor
labels = torch.tensor(labels)

# calculate the number of instances for each class
counts = torch.bincount(labels)

# calculate the weights for each class
weights = 1.0 / counts.float()

# create a weight vector for each index in the dataset
sample_weights = weights[labels]

# set the number of samples for the train set and the test set
train_size = (number_instances_over_under_sampling_/10) * 7 * 0.1
val_size = (number_instances_over_under_sampling_/10) * 7 * 0.1
test_size = (number_instances_over_under_sampling_/10) * 7 * 0.8

# crea un sampler per il train set and one for the test set
train_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(train_size))
val_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(val_size))
test_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(test_size))

# create a dataloader for the train set and the test set with the corresponding samplers
train_loader = DataLoader(dataset, batch_size=batch_size_, sampler=train_sampler, num_workers=4)
val_loader = DataLoader(dataset, batch_size=batch_size_, sampler=val_sampler, num_workers=4)
test_loader = DataLoader(dataset, batch_size=batch_size_, sampler=test_sampler, num_workers=4)

In [30]:
def calculate_metrics_per_class(true_labels, predicted_labels, label_mapping):
    unique_labels = list(label_mapping.keys())
    precision, recall, f1, support = precision_recall_fscore_support(true_labels, predicted_labels, labels=unique_labels)
    accuracy = accuracy_score(true_labels, predicted_labels)
    
    metrics_per_class = {}
    for i, idx in enumerate(unique_labels):
        metrics_per_class[idx] = {
            'precision': precision[i],
            'recall': recall[i],
            'f1': f1[i],
            'support': support[i]
        }

    return accuracy, metrics_per_class

# Funzione per il test
def test(model, test_loader, criterion, device, label_mapping):
    model.eval()
    running_loss = 0.0
    true_labels = []
    predicted_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc='Testing', leave=False):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            predicted_labels.extend(preds.cpu().numpy())

    average_loss = running_loss / len(test_loader)
    accuracy, metrics_per_class = calculate_metrics_per_class(true_labels, predicted_labels, label_mapping)

    return average_loss, accuracy, metrics_per_class

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Carica il modello con i pesi migliori
best_model = EmotionCNN()
best_model.load_state_dict(torch.load(r'C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Models\paper1_models\best_model_paper_1_20_epochs_bs_48_30k.pth', map_location=torch.device('cpu')))
best_model.to(device)
criterion = nn.CrossEntropyLoss()
your_label_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}


# Test
test_loss, test_accuracy, test_metrics_per_class = test(best_model, test_loader, criterion, device, your_label_mapping)

# Print metrics per class per il test set
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

for idx, label in your_label_mapping.items():
    print(f'{label}: Test Precision: {test_metrics_per_class[idx]["precision"]:.4f}, Test Recall: {test_metrics_per_class[idx]["recall"]:.4f}, Test F1: {test_metrics_per_class[idx]["f1"]:.4f}, Test Support: {test_metrics_per_class[idx]["support"]}')


Testing:   0%|          | 0/350 [00:00<?, ?it/s]

                                                          

Test Loss: 1.7224, Test Accuracy: 0.4370
Angry: Test Precision: 0.4473, Test Recall: 0.4866, Test F1: 0.4661, Test Support: 2396
Disgust: Test Precision: 0.5868, Test Recall: 0.1765, Test F1: 0.2714, Test Support: 2413
Fear: Test Precision: 0.4612, Test Recall: 0.2049, Test F1: 0.2838, Test Support: 2435
Happy: Test Precision: 0.4209, Test Recall: 0.9003, Test F1: 0.5736, Test Support: 2396
Neutral: Test Precision: 0.3623, Test Recall: 0.5517, Test F1: 0.4373, Test Support: 2369
Sad: Test Precision: 0.3717, Test Recall: 0.2944, Test F1: 0.3286, Test Support: 2357
Surprise: Test Precision: 0.6118, Test Recall: 0.4486, Test F1: 0.5177, Test Support: 2434




**Live emotion detection**

In [None]:
num_classes = 7
your_label_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}
model = EmotionCNN(num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load(r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Models\paper1_models\best_model_paper_1_20_epochs_bs_48_30k.pth", map_location=device))
model.to(device)
model.eval() 

# initialize the face detector
detector = dlib.get_frontal_face_detector()

# initialize the camera
cap = cv2.VideoCapture(0)

# apply the transformations to the face image
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

while True:
    # read a frame from the camera
    ret, frame = cap.read()

    # faces detection
    faces = detector(frame)

    # if there is at least one face detected, process the image
    if len(faces) > 0:
        # take only the first face
        face = faces[0]
        
        # cut the face from the frame
        x, y, w, h = face.left(), face.top(), face.width(), face.height()
        face_image = frame[y:y+h, x:x+w]

        # check if the face image is not empty
        if not face_image.size == 0:
            # apply the transformations to the face image
            pil_image = Image.fromarray(cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB))
            input_image = transform(pil_image).unsqueeze(0)  # Aggiunge una dimensione di batch
            input_image = input_image.to(device)

            # model prediction
            with torch.no_grad():
                output = model(input_image)

            # get the label predicted by the model
            _, predicted = torch.max(output, 1)
            predicted_emotion = your_label_mapping[predicted.item()]

            print(f'Predicted Emotion: {predicted_emotion}')

    # show the frame with the face rectangle added
    cv2.imshow("Face Detection", frame)

    # wait for 2 seconds (time in milliseconds)
    cv2.waitKey(1000)

    # if q is pressed, terminate the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# release the capture
cap.release()
cv2.destroyAllWindows()