In [2]:
!pip install --upgrade --no-cache-dir gdown

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gdown
  Downloading gdown-4.7.1-py3-none-any.whl (15 kB)
Installing collected packages: gdown
  Attempting uninstall: gdown
    Found existing installation: gdown 4.6.6
    Uninstalling gdown-4.6.6:
      Successfully uninstalled gdown-4.6.6
Successfully installed gdown-4.7.1


In [6]:
!gdown https://drive.google.com/uc?id=10oRO0YPelxqjlD1cl8IgKiYRRoo1I5rQ

Downloading...
From (uriginal): https://drive.google.com/uc?id=10oRO0YPelxqjlD1cl8IgKiYRRoo1I5rQ
From (redirected): https://drive.google.com/uc?id=10oRO0YPelxqjlD1cl8IgKiYRRoo1I5rQ&confirm=t&uuid=da72a381-8d55-4938-bcde-a4a0dbaeec26
To: /content/test_set.tar
100% 52.2M/52.2M [00:00<00:00, 106MB/s]


In [None]:
!tar -xvf test_set.tar

In [8]:
data_path = "/content/val_set/"
data_annotations_path = "/content/val_set/annotations/"
data_images_path = "/content/val_set/images/"

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import numpy as np
import pandas as pd
import torchvision
from tqdm import tqdm
import os
import torch.nn.functional as F

# Data Preparation



### Loading dataset and transforming it into *DataFrames*



In [10]:
def dataset_tp_csv(data_annotations_path, data_images_path):
  data = []
  for i in range(3998):
      try:
          arrousal = np.load(data_annotations_path + '{index}_aro.npy'.format(index=i))
      except FileNotFoundError:
          arrousal = None
          
      try:
          expression = np.load(data_annotations_path + '{index}_exp.npy'.format(index=i))
      except FileNotFoundError:
          expression = None
          
      try:
          valence = np.load(data_annotations_path + '{index}_val.npy'.format(index=i))
      except FileNotFoundError:
          valence = None
          
      try:
          landmarks = np.load(data_annotations_path + '{index}_lnd.npy'.format(index=i))
      except FileNotFoundError:
          landmarks = None
          
      image_path = '{path}/{index}.jpg'.format(path=data_images_path, index=i)
      if os.path.isfile(image_path):
          data.append((i, expression, valence, arrousal, landmarks, image_path))
      else:
          print('Image file not found for index', i)
  return pd.DataFrame.from_records(data, columns=['image_index', 'expression', 'valence', 'arousal', 'landmarks', 'image_path'])

In [None]:
df_test = dataset_tp_csv(data_annotations_path, data_images_path)

### Transforming Dataframes into *Dataloaders*

In [12]:
# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transformation pipeline
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Define dataset class
class FacialExpressionDataset(torch.utils.data.Dataset):
    def __init__(self, df, transform):
        self.df = df
        self.transform = transform
    
    def __getitem__(self, idx):
        img_path = self.df.loc[idx, 'image_path']
        img = Image.open(img_path).convert('RGB')
        img = self.transform(img)
        # landmarks = self.df.loc[idx, 'landmarks']
        # landmarks = torch.tensor(self.df.loc[idx, 'landmarks'], dtype=torch.float32)
        # expression = self.df.loc[idx, 'expression']
        expression_arr = int(self.df.loc[idx, 'expression'].item())
        expression = torch.tensor(expression_arr, dtype=torch.int32)
        # valence = self.df.loc[idx, 'valence']
        valence_arr = float(self.df.loc[idx, 'valence'].item())
        valence = torch.tensor(valence_arr, dtype=torch.float32)
        # arousal = self.df.loc[idx, 'arousal']
        arousal_arr = float(self.df.loc[idx, 'arousal'].item())
        arousal = torch.tensor(arousal_arr, dtype=torch.float32)
        # return img, landmarks, expression, valence, arousal
        return img, expression, valence, arousal
    
    def __len__(self):
        return len(self.df)

In [14]:
batch_size = 32

# Define the AffectNet dataset and dataloaders
test_dataset = FacialExpressionDataset(df_test, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# FineTuning Models for Facial Expression recognition, and computing Valence and Arousal Task

### 1):EffecientNET

In [22]:
# Define the EfficientNet model
class EfficientNetModel(nn.Module):
    def __init__(self, num_classes=8):
        super(EfficientNetModel, self).__init__()
        self.num_classes = num_classes

        self.layer1 = torchvision.models.efficientnet_b0(pretrained=True).features
        self.layer2 = torchvision.models.efficientnet_b0(pretrained=True).avgpool

        self.classifier = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1280, 8),
        )

        self.arousal = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1280, 1),
        )

        self.valence = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(1280, 1),
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        feature = torch.flatten(x, 1)
        logits = self.classifier(feature)

        valence = self.valence(feature)
        arousal = self.arousal(feature)

        return logits, valence, arousal

In [None]:
model_EfficientNet = EfficientNetModel()
model_EfficientNet.load_state_dict(torch.load("EfficientNet_model.pth"))
model_EfficientNet.to(device)
model_EfficientNet.eval()

In [72]:
# Define the loss functions
classification_loss = F.cross_entropy
regression_loss = F.mse_loss

In [59]:
# Evaluate the models on the test set
with torch.no_grad():
    test_loss_EfficientNet_classification = 0
    test_loss_EfficientNet_regression_valence = 0
    test_loss_EfficientNet_regression_arousal = 0
    num_samples = 0
    for images, expressions, valences, arousals in test_loader:
        images = images.to(device)
        expressions = expressions.to(device)
        expressions = expressions.type(torch.LongTensor).to(device)
        valences = valences.to(device)
        arousals = arousals.to(device)
        valences = valences.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        arousals = arousals.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        batch_size = images.size(0)
        num_samples += batch_size
        
        # Evaluate EfficientNet model
        output_expression, output_valence, output_arousal = model_EfficientNet(images)
        test_loss_EfficientNet_classification += classification_loss(output_expression, expressions).item() * batch_size
        test_loss_EfficientNet_regression_valence += regression_loss(output_valence, valences).item() * batch_size
        test_loss_EfficientNet_regression_arousal += regression_loss(output_arousal, arousals).item() * batch_size
        

    print(num_samples)
    # Compute the average test
    test_loss_EfficientNet_classification /= num_samples
    test_loss_EfficientNet_regression_valence /= num_samples
    test_loss_EfficientNet_regression_arousal /= num_samples

    # Print the test results
    print("EfficientNet model classification loss:", test_loss_EfficientNet_classification)
    print("EfficientNet model regression loss for valence:", test_loss_EfficientNet_regression_valence)
    print("EfficientNet model regression loss for arousal:", test_loss_EfficientNet_regression_arousal)

2918
EfficientNet model classification loss: 3.590598021205604
EfficientNet model regression loss for valence: 0.19078590933838635
EfficientNet model regression loss for arousal: 0.14018409192112719


In [67]:
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import classification_report

# Evaluate the models on the test set
with torch.no_grad():
    test_loss_EfficientNet_classification = 0
    num_samples = 0
    y_true = []
    y_pred = []
    for images, expressions, _, _ in test_loader:
        images = images.to(device)
        expressions = expressions.to(device)
        expressions = expressions.type(torch.LongTensor).to(device)
        batch_size = images.size(0)
        num_samples += batch_size
        
        # Evaluate EfficientNet model
        output_expression, _, _ = model_EfficientNet(images)
        test_loss_EfficientNet_classification += classification_loss(output_expression, expressions).item() * batch_size
        
        # Collect true and predicted labels
        y_true.extend(expressions.cpu().numpy())
        y_pred.extend(output_expression.argmax(dim=1).cpu().numpy())
        

    # Compute the average test loss
    test_loss_EfficientNet_classification /= num_samples
    
    # Compute classification metrics
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')

    print(classification_report(y_true, y_pred))
    
    # Print the test results
    print("EfficientNet model classification loss:", test_loss_EfficientNet_classification)
    print("Accuracy:", accuracy)
    print("F1-Score:", f1)


              precision    recall  f1-score   support

           0       0.31      0.70      0.43       355
           1       0.47      0.94      0.63       378
           2       0.52      0.52      0.52       364
           3       0.56      0.41      0.47       369
           4       0.74      0.37      0.50       361
           5       0.81      0.24      0.38       361
           6       0.52      0.57      0.55       375
           7       0.80      0.07      0.12       355

    accuracy                           0.48      2918
   macro avg       0.59      0.48      0.45      2918
weighted avg       0.59      0.48      0.45      2918

EfficientNet model classification loss: 3.590598021205604
Accuracy: 0.48252227553118576
F1-Score: 0.4522966400984679


In [62]:
!pip install krippendorff

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting krippendorff
  Downloading krippendorff-0.6.0-py3-none-any.whl (17 kB)
Installing collected packages: krippendorff
Successfully installed krippendorff-0.6.0


In [74]:
from sklearn.metrics import cohen_kappa_score, roc_auc_score, precision_recall_curve, auc
from sklearn.metrics import mean_squared_error
import krippendorff

# Evaluate the models on the test set
with torch.no_grad():
    num_samples = 0
    true_expressions, pred_expressions = [], []
    true_valences, pred_valences = [], []
    true_arousals, pred_arousals = [], []
    for images, expressions, valences, arousals in test_loader:
        images = images.to(device)
        expressions = expressions.to(device)
        expressions = expressions.type(torch.LongTensor).to(device)
        valences = valences.to(device)
        arousals = arousals.to(device)
        valences = valences.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        arousals = arousals.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        batch_size = images.size(0)
        num_samples += batch_size
        
        # Evaluate EfficientNet model
        output_expression, output_valence, output_arousal = model_EfficientNet(images)
        _, predicted_expression = torch.max(output_expression.data, 1)
        true_expressions.extend(expressions.cpu().numpy())
        pred_expressions.extend(predicted_expression.cpu().numpy())
        true_valences.extend(valences.cpu().numpy())
        pred_valences.extend(output_valence.cpu().numpy())
        true_arousals.extend(arousals.cpu().numpy())
        pred_arousals.extend(output_arousal.cpu().numpy())

    # Calculate metrics for classification task
    cohen_kappa = cohen_kappa_score(true_expressions, pred_expressions)
    alpha = krippendorff.alpha(reliability_data=[true_expressions, pred_expressions])
    # roc_auc = roc_auc_score(true_expressions, pred_expressions)
    # precision, recall, _ = precision_recall_curve(true_expressions, pred_expressions)
    # pr_auc = auc(recall, precision)

    # Print the test results
    print("Cohen's Kappa for classification:", cohen_kappa)
    print("Krippendorff's Alpha for classification:", alpha)
    # print("ROC AUC for classification:", roc_auc)
    # print("AUC-PR for classification:", pr_auc)

Cohen's Kappa for classification: 0.40812615236730454
Krippendorff's Alpha for classification: 0.22502347897785457


In [75]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score, explained_variance_score, \
    mean_absolute_error, mean_squared_log_error
from scipy.stats import pearsonr, spearmanr

with torch.no_grad():
    test_loss_EfficientNet_classification = 0
    test_loss_EfficientNet_regression_valence = 0
    test_loss_EfficientNet_regression_arousal = 0
    num_samples = 0
    predictions_valence = []
    targets_valence = []
    predictions_arousal = []
    targets_arousal = []
    for images, expressions, valences, arousals in test_loader:
        images = images.to(device)
        expressions = expressions.to(device)
        expressions = expressions.type(torch.LongTensor).to(device)
        valences = valences.to(device)
        arousals = arousals.to(device)
        valences = valences.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        arousals = arousals.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        batch_size = images.size(0)
        num_samples += batch_size
        
        # Evaluate EfficientNet model
        output_expression, output_valence, output_arousal = model_EfficientNet(images)
        test_loss_EfficientNet_classification += classification_loss(output_expression, expressions).item() * batch_size
        test_loss_EfficientNet_regression_valence += regression_loss(output_valence, valences).item() * batch_size
        test_loss_EfficientNet_regression_arousal += regression_loss(output_arousal, arousals).item() * batch_size

        # Collect predictions and targets for regression evaluation
        predictions_valence.append(output_valence.cpu().numpy())
        targets_valence.append(valences.cpu().numpy())
        predictions_arousal.append(output_arousal.cpu().numpy())
        targets_arousal.append(arousals.cpu().numpy())

    # Combine predictions and targets for regression evaluation
    predictions_valence = np.concatenate(predictions_valence)
    targets_valence = np.concatenate(targets_valence)
    predictions_arousal = np.concatenate(predictions_arousal)
    targets_arousal = np.concatenate(targets_arousal)

    # Compute the average test
    test_loss_EfficientNet_classification /= num_samples
    test_loss_EfficientNet_regression_valence /= num_samples
    test_loss_EfficientNet_regression_arousal /= num_samples

    # Print the test results
    print("EfficientNet model classification loss:", test_loss_EfficientNet_classification)
    print("EfficientNet model regression loss for valence:", test_loss_EfficientNet_regression_valence)
    print("EfficientNet model regression loss for arousal:", test_loss_EfficientNet_regression_arousal)
    
    # Compute regression metrics
    print("RMSE for valence:", mean_squared_error(targets_valence, predictions_valence, squared=False))
    print("Correlation for valence:", pearsonr(targets_valence.ravel(), predictions_valence.ravel())[0])
    print("Sign Agreement Metric for valence:", np.mean(np.sign(targets_valence) == np.sign(predictions_valence)))
    print("Correlation Coefficient for valence:", spearmanr(targets_valence.ravel(), predictions_valence.ravel())[0])
    
    print("RMSE for arousal:", mean_squared_error(targets_arousal, predictions_arousal, squared=False))
    print("Correlation for arousal:", pearsonr(targets_arousal.ravel(), predictions_arousal.ravel())[0])
    print("Sign Agreement Metric for arousal:", np.mean(np.sign(targets_arousal) == np.sign(predictions_arousal)))
    print("Correlation Coefficient for arousal:", spearmanr(targets_arousal.ravel(), predictions_arousal.ravel())[0])

EfficientNet model classification loss: 3.590598021205604
EfficientNet model regression loss for valence: 0.19078590933838635
EfficientNet model regression loss for arousal: 0.14018409192112719
RMSE for valence: 0.43679047
Correlation for valence: 0.5994073229886933
Sign Agreement Metric for valence: 0.7200137080191912
Correlation Coefficient for valence: 0.5563749148804865
RMSE for arousal: 0.37441167
Correlation for arousal: 0.5151686677268523
Sign Agreement Metric for arousal: 0.7587388622344071
Correlation Coefficient for arousal: 0.49731038884621076


### 2): Mobile Net V3

In [43]:
# Define the EfficientNet model
class MobileNetModel(nn.Module):
    def __init__(self, num_classes=8):
        super(MobileNetModel, self).__init__()
        self.num_classes = num_classes

        self.layer1 = torchvision.models.mobilenet_v3_small(pretrained=True).features
        self.layer2 = torchvision.models.mobilenet_v3_small(pretrained=True).avgpool

        self.classifier = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(576, 8),
        )

        self.arousal = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(576, 1),
        )

        self.valence = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(576, 1),
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        feature = torch.flatten(x, 1)
        logits = self.classifier(feature)
        valence = self.valence(feature)
        arousal = self.arousal(feature)

        return logits, valence, arousal

In [45]:
# Define the loss functions
classification_loss = F.cross_entropy
regression_loss = F.mse_loss

In [77]:
# Evaluate the models on the test set
with torch.no_grad():
    test_loss_MobileNet_classification = 0
    test_loss_MobileNet_regression_valence = 0
    test_loss_MobileNet_regression_arousal = 0
    num_samples = 0
    for images, expressions, valences, arousals in test_loader:
        images = images.to(device)
        expressions = expressions.to(device)
        expressions = expressions.type(torch.LongTensor).to(device)
        valences = valences.to(device)
        arousals = arousals.to(device)
        valences = valences.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        arousals = arousals.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        batch_size = images.size(0)
        num_samples += batch_size
        
        # Evaluate MobileNet model
        output_expression, output_valence, output_arousal = model_MobileNet(images)
        test_loss_MobileNet_classification += classification_loss(output_expression, expressions).item() * batch_size
        test_loss_MobileNet_regression_valence += regression_loss(output_valence, valences).item() * batch_size
        test_loss_MobileNet_regression_arousal += regression_loss(output_arousal, arousals).item() * batch_size

    # Compute the average test
    test_loss_MobileNet_classification /= num_samples
    test_loss_MobileNet_regression_valence /= num_samples
    test_loss_MobileNet_regression_arousal /= num_samples

    # Print the test results
    print("MobileNet model classification loss:", test_loss_MobileNet_classification)
    print("MobileNet model regression loss for valence:", test_loss_MobileNet_regression_valence)
    print("MobileNet model regression loss for arousal:", test_loss_MobileNet_regression_arousal)

MobileNet model classification loss: 2.703925766270401
MobileNet model regression loss for valence: 0.1820510380720749
MobileNet model regression loss for arousal: 0.14822316902030241


In [78]:
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import classification_report

# Evaluate the models on the test set
with torch.no_grad():
    test_loss_MobileNet_classification = 0
    num_samples = 0
    y_true = []
    y_pred = []
    for images, expressions, _, _ in test_loader:
        images = images.to(device)
        expressions = expressions.to(device)
        expressions = expressions.type(torch.LongTensor).to(device)
        batch_size = images.size(0)
        num_samples += batch_size
        
        # Evaluate MobileNet model
        output_expression, _, _ = model_MobileNet(images)
        test_loss_MobileNet_classification += classification_loss(output_expression, expressions).item() * batch_size
        
        # Collect true and predicted labels
        y_true.extend(expressions.cpu().numpy())
        y_pred.extend(output_expression.argmax(dim=1).cpu().numpy())
        

    # Compute the average test loss
    test_loss_MobileNet_classification /= num_samples
    
    # Compute classification metrics
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')

    print(classification_report(y_true, y_pred))
    
    # Print the test results
    print("MobileNet model classification loss:", test_loss_MobileNet_classification)
    print("Accuracy:", accuracy)
    print("F1-Score:", f1)


              precision    recall  f1-score   support

           0       0.30      0.72      0.43       355
           1       0.49      0.93      0.64       378
           2       0.49      0.54      0.51       364
           3       0.56      0.36      0.44       369
           4       0.67      0.38      0.48       361
           5       0.74      0.21      0.33       361
           6       0.46      0.48      0.47       375
           7       0.75      0.03      0.06       355

    accuracy                           0.46      2918
   macro avg       0.56      0.46      0.42      2918
weighted avg       0.56      0.46      0.42      2918

MobileNet model classification loss: 2.703925766270401
Accuracy: 0.4602467443454421
F1-Score: 0.4236166392793974


In [79]:
!pip install krippendorff

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [80]:
from sklearn.metrics import cohen_kappa_score, roc_auc_score, precision_recall_curve, auc
from sklearn.metrics import mean_squared_error
import krippendorff

# Evaluate the models on the test set
with torch.no_grad():
    num_samples = 0
    true_expressions, pred_expressions = [], []
    true_valences, pred_valences = [], []
    true_arousals, pred_arousals = [], []
    for images, expressions, valences, arousals in test_loader:
        images = images.to(device)
        expressions = expressions.to(device)
        expressions = expressions.type(torch.LongTensor).to(device)
        valences = valences.to(device)
        arousals = arousals.to(device)
        valences = valences.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        arousals = arousals.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        batch_size = images.size(0)
        num_samples += batch_size
        
        # Evaluate MobileNet model
        output_expression, output_valence, output_arousal = model_MobileNet(images)
        _, predicted_expression = torch.max(output_expression.data, 1)
        true_expressions.extend(expressions.cpu().numpy())
        pred_expressions.extend(predicted_expression.cpu().numpy())
        true_valences.extend(valences.cpu().numpy())
        pred_valences.extend(output_valence.cpu().numpy())
        true_arousals.extend(arousals.cpu().numpy())
        pred_arousals.extend(output_arousal.cpu().numpy())

    # Calculate metrics for classification task
    cohen_kappa = cohen_kappa_score(true_expressions, pred_expressions)
    alpha = krippendorff.alpha(reliability_data=[true_expressions, pred_expressions])
    # roc_auc = roc_auc_score(true_expressions, pred_expressions)
    # precision, recall, _ = precision_recall_curve(true_expressions, pred_expressions)
    # pr_auc = auc(recall, precision)

    # Print the test results
    print("Cohen's Kappa for classification:", cohen_kappa)
    print("Krippendorff's Alpha for classification:", alpha)
    # print("ROC AUC for classification:", roc_auc)
    # print("AUC-PR for classification:", pr_auc)

Cohen's Kappa for classification: 0.38274150153970454
Krippendorff's Alpha for classification: 0.15785437454744822


In [81]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score, explained_variance_score, \
    mean_absolute_error, mean_squared_log_error
from scipy.stats import pearsonr, spearmanr

with torch.no_grad():
    test_loss_MobileNet_classification = 0
    test_loss_MobileNet_regression_valence = 0
    test_loss_MobileNet_regression_arousal = 0
    num_samples = 0
    predictions_valence = []
    targets_valence = []
    predictions_arousal = []
    targets_arousal = []
    for images, expressions, valences, arousals in test_loader:
        images = images.to(device)
        expressions = expressions.to(device)
        expressions = expressions.type(torch.LongTensor).to(device)
        valences = valences.to(device)
        arousals = arousals.to(device)
        valences = valences.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        arousals = arousals.view(-1, 1)  # Reshape target tensor to (batch_size, 1)
        batch_size = images.size(0)
        num_samples += batch_size
        
        # Evaluate MobileNet model
        output_expression, output_valence, output_arousal = model_MobileNet(images)
        test_loss_MobileNet_classification += classification_loss(output_expression, expressions).item() * batch_size
        test_loss_MobileNet_regression_valence += regression_loss(output_valence, valences).item() * batch_size
        test_loss_MobileNet_regression_arousal += regression_loss(output_arousal, arousals).item() * batch_size

        # Collect predictions and targets for regression evaluation
        predictions_valence.append(output_valence.cpu().numpy())
        targets_valence.append(valences.cpu().numpy())
        predictions_arousal.append(output_arousal.cpu().numpy())
        targets_arousal.append(arousals.cpu().numpy())

    # Combine predictions and targets for regression evaluation
    predictions_valence = np.concatenate(predictions_valence)
    targets_valence = np.concatenate(targets_valence)
    predictions_arousal = np.concatenate(predictions_arousal)
    targets_arousal = np.concatenate(targets_arousal)

    # Compute the average test
    test_loss_MobileNet_classification /= num_samples
    test_loss_MobileNet_regression_valence /= num_samples
    test_loss_MobileNet_regression_arousal /= num_samples

    # Print the test results
    print("MobileNet model classification loss:", test_loss_MobileNet_classification)
    print("MobileNet model regression loss for valence:", test_loss_MobileNet_regression_valence)
    print("MobileNet model regression loss for arousal:", test_loss_MobileNet_regression_arousal)
    
    # Compute regression metrics
    print("RMSE for valence:", mean_squared_error(targets_valence, predictions_valence, squared=False))
    print("Correlation for valence:", pearsonr(targets_valence.ravel(), predictions_valence.ravel())[0])
    print("Sign Agreement Metric for valence:", np.mean(np.sign(targets_valence) == np.sign(predictions_valence)))
    print("Correlation Coefficient for valence:", spearmanr(targets_valence.ravel(), predictions_valence.ravel())[0])
    
    print("RMSE for arousal:", mean_squared_error(targets_arousal, predictions_arousal, squared=False))
    print("Correlation for arousal:", pearsonr(targets_arousal.ravel(), predictions_arousal.ravel())[0])
    print("Sign Agreement Metric for arousal:", np.mean(np.sign(targets_arousal) == np.sign(predictions_arousal)))
    print("Correlation Coefficient for arousal:", spearmanr(targets_arousal.ravel(), predictions_arousal.ravel())[0])

MobileNet model classification loss: 2.703925766270401
MobileNet model regression loss for valence: 0.1820510380720749
MobileNet model regression loss for arousal: 0.14822316902030241
RMSE for valence: 0.4266744
Correlation for valence: 0.5883723132835436
Sign Agreement Metric for valence: 0.7388622344071282
Correlation Coefficient for valence: 0.536732794216031
RMSE for arousal: 0.3849976
Correlation for arousal: 0.4819521835987085
Sign Agreement Metric for arousal: 0.747429746401645
Correlation Coefficient for arousal: 0.47646464088177215
