In [None]:
# FOR WINDOWS (your env must be called project-venv; if you choose another name add it in .gitignore)
import subprocess

# Set the execution policy
subprocess.run(["Set-ExecutionPolicy", "RemoteSigned", "-Scope", "Process"], shell=True)

# Activate the virtual environment
subprocess.run(["cd", ".\\project-venv\\"], shell=True)
subprocess.run([".\\Scripts\\Activate.ps1"], shell=True)

# Install requirements
subprocess.run(["pip", "install", "-r", "../requirements.txt"], shell=True)
subprocess.run(["pip", "install", "-r", "../emotions_requirements.txt"], shell=True)

In [None]:
import gdown


########## dlib_face_recognition_resnet_model_v1.dat ################

# URL del file di Google Drive
url_1 = 'https://drive.google.com/uc?id=1tXD6dha1ZD4fceLWsGlI89t8HeHlkJYC' 

# Percorso in cui si desidera salvare il file scaricato
output_1 = '../Models/dlib_face_recognition_resnet_model_v1.dat'

gdown.download(url_1, output_1, quiet=False)



########## shape_predictor_68_face_landmarks.dat ###################

# URL del file di Google Drive
url_2 = 'https://drive.google.com/uc?id=1dvIeJtWhObCgSYJt8WKnjIlHhw5Y9ioN'

# Percorso in cui si desidera salvare il file scaricato
output_2 = '../Models/shape_predictor_68_face_landmarks.dat'

gdown.download(url_2, output_2, quiet=False)

# Emotion Recognition task

## Paper implementation 1
https://ieeexplore.ieee.org/abstract/document/9659697?casa_token=zDD7lwwOig8AAAAA:KcIHhupXAXgiaB_C7A0uNDB7ehrsWNyovQdgDu9LmnwToOGU6akB_gjWTy7JCf4UdKK03Is

**CNN architecture**

In [1]:
import torch
import torch.nn as nn

class EmotionCNN(nn.Module):
    def __init__(self, num_classes=7):
        super(EmotionCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 64, kernel_size=5, stride=1, padding=0)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=5, stride=2)
        
        self.conv2a = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.relu2a = nn.ReLU()
        self.conv2b = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.relu2b = nn.ReLU()
        self.avgpool2 = nn.AvgPool2d(kernel_size=3, stride=2)
        
        self.conv3a = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu3a = nn.ReLU()
        self.conv3b = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.relu3b = nn.ReLU()
        self.avgpool3 = nn.AvgPool2d(kernel_size=3, stride=2)
        
        # verify the output size of conv2 and conv3
        self.dummy_input = torch.randn(1, 1, 48, 48)
        self.dummy_output_size = self._get_conv_output_size(self.dummy_input)
        
        # update fc1 units based on feature map size
        self.fc1 = nn.Linear(self.dummy_output_size, 1024)
        self.relu_fc1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)
        
        self.fc2 = nn.Linear(1024, 1024)
        self.relu_fc2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.2)
        
        self.fc3 = nn.Linear(1024, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def _get_conv_output_size(self, input_tensor):
        x = self.maxpool1(self.relu1(self.conv1(input_tensor)))
        x = self.relu2a(self.conv2a(x))
        x = self.relu2b(self.conv2b(x))
        x = self.avgpool2(x)
        x = self.relu3a(self.conv3a(x))
        x = self.relu3b(self.conv3b(x))
        x = self.avgpool3(x)
        return x.view(x.size(0), -1).size(1)

    def forward(self, x):
        x = self.maxpool1(self.relu1(self.conv1(x)))
        x = self.relu2a(self.conv2a(x))
        x = self.relu2b(self.conv2b(x))
        x = self.avgpool2(x)
        x = self.relu3a(self.conv3a(x))
        x = self.relu3b(self.conv3b(x))
        x = self.avgpool3(x)
        x = x.view(x.size(0), -1)
        x = self.dropout1(self.relu_fc1(self.fc1(x)))
        x = self.dropout2(self.relu_fc2(self.fc2(x)))
        x = self.softmax(self.fc3(x))
        return x

**hyperparameters**

In [4]:
delete_outliers_k_means = False
delete_outliers_dbscan = False
no_delete_outliers = False

number_instances_over_under_sampling = 5000 # fatto
over_under_sampling = False
no_over_under_sampling = False

batch_size_ = 32
epochs_ = 30

**delete outliers with k-means**

In [None]:
import os
import cv2
import numpy as np

def calculate_pixel_std(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    return np.std(image)

def remove_outliers_in_folder(folder_path, threshold):
    counter = 0
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        pixel_std = calculate_pixel_std(image_path)
        
        if pixel_std > threshold:
            counter += 1
            os.remove(image_path)
    print(counter)

# method based on k-means clustering
def calculate_threshold_kmeans(values, k=1):
    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters=k, random_state=0).fit(values.reshape(-1, 1))
    centroids = kmeans.cluster_centers_.flatten()
    threshold = np.mean(centroids)
    return threshold

emotions_folder_path = r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_images"
pixel_stds = []
for emotion in os.listdir(emotions_folder_path):
    emotion_folder_path = os.path.join(emotions_folder_path, emotion)
    for filename in os.listdir(emotion_folder_path):
        image_path = os.path.join(emotion_folder_path, filename)
        pixel_std = calculate_pixel_std(image_path)
        pixel_stds.append(pixel_std)

pixel_stds = np.array(pixel_stds)

outlier_threshold = calculate_threshold_kmeans(pixel_stds)
for emotion in os.listdir(emotions_folder_path):
    print(emotion)
    emotion_folder_path = os.path.join(emotions_folder_path, emotion)
    remove_outliers_in_folder(emotion_folder_path, outlier_threshold)

**delete outliers with dbscan**

In [6]:
import os
import cv2
import numpy as np
from sklearn.cluster import DBSCAN

def calculate_pixel_std(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    return np.std(image)

def remove_outliers_dbscan(folder_path, eps, min_samples):
    images = []
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        pixel_std = calculate_pixel_std(image_path)
        images.append([pixel_std])

    images = np.array(images)

    # Utilizza DBSCAN per identificare gli outlier
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    labels = dbscan.fit_predict(images)

    # Rimuovi gli outlier
    counter = 0
    for i, (label, image) in enumerate(zip(labels, os.listdir(folder_path))): # usa zip e una variabile diversa per l'immagine
        if label == -1:  # -1 rappresenta gli outlier
            image_path = os.path.join(folder_path, image) # usa la variabile image invece di i
            os.remove(image_path)
            counter += 1
    print(counter)

# Configurazione DBSCAN
dbscan_eps = 0.2  # Raggio di ricerca
dbscan_min_samples = 15  # Numero minimo di campioni in un cluster

emotions_folder_path = r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_images"


for emotion in os.listdir(emotions_folder_path):
    emotion_folder_path = os.path.join(emotions_folder_path, emotion)
    print(emotion)
    
    if emotion == 'disgust':
        tmp_folder_path = emotion_folder_path  # Usa lo stesso per la classe "disgust"
        remove_outliers_dbscan(tmp_folder_path, 0.5, 10)
    else:
        remove_outliers_dbscan(emotion_folder_path, dbscan_eps, dbscan_min_samples)


391
80
300
239
325
334
289


**with over and under sampling**

**1-** Using this method, a random selection of number_instances_over_under_sampling instances is made for each class. ########################################################################

In [5]:
from torch.utils.data import random_split, ConcatDataset
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np
from torch.utils.data import random_split
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split


# transformation definition
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset_root = r'C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_images'

# create an instance of ImageFolder with the transformations
dataset = ImageFolder(root=dataset_root, transform=transform)

# seed = 42
torch.manual_seed(42)

# extract the labels and the indices of the dataset
labels = [label for _, label in dataset.imgs]

# convert the list into a tensor
labels = torch.tensor(labels)

# calculate the number of instances for each class
counts = torch.bincount(labels)

# calculate the weights for each class
weights = 1.0 / counts.float()

# create a weight vector for each index in the dataset
sample_weights = weights[labels]

# set the number of samples for the train set and the test set
train_size = number_instances_over_under_sampling * 7 * 0.8 
test_size = number_instances_over_under_sampling * 7 * 0.2 

# crea un sampler per il train set and one for the test set
train_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(train_size))
test_sampler = torch.utils.data.WeightedRandomSampler(sample_weights, int(test_size))

# create a dataloader for the train set and the test set with the corresponding samplers
batch_size = 32
train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, num_workers=4)
test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler, num_workers=4)


verifying if for each label there are number_instances_over_under_sampling instances

In [6]:
import torch

train_indices = list(train_loader.sampler)
train_counts = torch.bincount(torch.tensor(labels)[train_indices])
print("number of instances for class in the train set:", train_counts)

test_indices = list(test_loader.sampler)
test_counts = torch.bincount(torch.tensor(labels)[test_indices])
print("number of instances for class in the test set:", test_counts)

number of instances for class in the train set: tensor([1593, 1582, 1606, 1537, 1622, 1635, 1625])
number of instances for class in the test set: tensor([398, 386, 391, 408, 398, 411, 408])


  train_counts = torch.bincount(torch.tensor(labels)[train_indices])
  test_counts = torch.bincount(torch.tensor(labels)[test_indices])


**2-** Questo metodo elimina istanze se superano 5000 e le crea usando smote se sono meno di 5000

verifying if for each label there are number_instances_over_under_sampling instances

In [4]:
len(train_loader), len(test_loader)

(730, 663)

**without over and under sampling**

In [2]:
from torch.utils.data import random_split
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


dataset_root = r'C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_images'


dataset = ImageFolder(root=dataset_root, transform=transform)

torch.manual_seed(42)

labels = [label for _, label in dataset.imgs]
indices = list(range(len(dataset)))

train_indices, test_indices = train_test_split(indices, test_size=0.2, shuffle=True, random_state=42)

train_dataset = torch.utils.data.Subset(dataset, train_indices)
test_dataset = torch.utils.data.Subset(dataset, test_indices)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [7]:
len(train_loader), len(test_loader)

(350, 88)

At the beginning these are the number of instances for each class

In [8]:
# Ottieni le classi (etichette)
classes = dataset.classes

# Conta le istanze per ogni classe
instances_per_class = {cls: 0 for cls in classes}

for _, label in dataset.imgs:
    instances_per_class[classes[label]] += 1

# Stampa il numero di istanze per ogni classe
for cls, count in instances_per_class.items():
    print(f"Class {cls}: {count} istances")

Class angry: 4953 istances
Class disgust: 547 istances
Class fear: 5121 istances
Class happy: 8989 istances
Class neutral: 6198 istances
Class sad: 6077 istances
Class surprise: 4002 istances


**model training**

In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

your_label_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}

num_classes = 7
model = EmotionCNN(num_classes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 30
for epoch in range(num_epochs):
    for images, labels in train_loader:

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        all_predicted = []
        all_labels = []
        
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_predicted.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            total_samples += labels.size(0)
            total_correct += (predicted == labels).sum().item()

        accuracy = accuracy_score(all_labels, all_predicted)
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}, Test Accuracy: {accuracy}')

        precision = precision_score(all_labels, all_predicted, average=None)
        recall = recall_score(all_labels, all_predicted, average=None)

        for i, emotion in enumerate(your_label_mapping.values()):
            print(f'Class: {emotion}, Precision: {precision[i]}, Recall: {recall[i]}')

        print(f'Total Accuracy: {accuracy}')


**save and use the model**

In [14]:
torch.save(model.state_dict(), r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Models\model2.pth")

**image emotion detection**

In [21]:
model.load_state_dict(torch.load(r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Models\model1.pth", map_location=device))
model.to(device)
model.eval()

from PIL import Image


image_path = r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Data\emotions_images\happy\Training_99183228.jpg"  # Sostituisci con il percorso effettivo dell'immagine
image = Image.open(image_path)


transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


input_image = transform(image).unsqueeze(0)
input_image = input_image.to(device)


with torch.no_grad():
    output = model(input_image)

_, predicted = torch.max(output, 1)
predicted_emotion = your_label_mapping[predicted.item()]

print(f'Predicted Emotion: {predicted_emotion}')

Predicted Emotion: Happy


**Live emotion detection**

In [None]:
import cv2
import dlib
import torch
from torchvision import transforms
from PIL import Image

num_classes = 7
your_label_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}
model = EmotionCNN(num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load(r"C:\Users\marco\OneDrive\Documenti\CV_project\ComputerVisionProject\Models\paper1_models\model2.pth", map_location=device))
model.to(device)
model.eval() 

# initialize the face detector
detector = dlib.get_frontal_face_detector()

# initialize the camera
cap = cv2.VideoCapture(0)

# apply the transformations to the face image
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

while True:
    # read a frame from the camera
    ret, frame = cap.read()

    # faces detection
    faces = detector(frame)

    # if there is at least one face detected, process the image
    if len(faces) > 0:
        # take only the first face
        face = faces[0]
        
        # cut the face from the frame
        x, y, w, h = face.left(), face.top(), face.width(), face.height()
        face_image = frame[y:y+h, x:x+w]

        # apply the transformations to the face image
        pil_image = Image.fromarray(cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB))
        input_image = transform(pil_image).unsqueeze(0)  # Aggiunge una dimensione di batch
        input_image = input_image.to(device)

        # model prediction
        with torch.no_grad():
            output = model(input_image)

        # get the label predicted by the model
        _, predicted = torch.max(output, 1)
        predicted_emotion = your_label_mapping[predicted.item()]

        print(f'Predicted Emotion: {predicted_emotion}')

    # show the frame with the face rectangle added
    cv2.imshow("Face Detection", frame)

    # wait for 2 seconds (time in milliseconds)
    cv2.waitKey(2000)

    # if q is pressed, terminate the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# release the capture
cap.release()
cv2.destroyAllWindows()