# Model Cross-Evaluation

## Import the relevant libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torcheval.metrics import R2Score

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import math

import sys
sys.path.insert(1, '../utils')
from paths import *

## Define the feed-forward neural network used for all featuresets

In [2]:
sys.path.insert(1, '../models')
from feedforward_nn import NeuralNetwork

Define neural network parameters

In [3]:
criterion = nn.MSELoss()

## Define Model Testing Function

In [4]:
def test_model(trained_model, input_test_data, target_test_labels):
  input_test_data = input_test_data.float()
  target_test_labels = target_test_labels.float()
  
  with torch.no_grad():
    test_pred = trained_model(input_test_data)
    test_loss = criterion(test_pred.float(), target_test_labels)

    # Separate the output into valence and arousal
    valence_pred = test_pred[:, 0]
    arousal_pred = test_pred[:, 1]
        
    valence_target = target_test_labels[:, 0]
    arousal_target = target_test_labels[:, 1]

     # Calculate RMSE for valence and arousal separately
    valence_rmse = math.sqrt(mean_squared_error(valence_pred, valence_target))
    arousal_rmse = math.sqrt(mean_squared_error(arousal_pred, arousal_target))

  rmse = math.sqrt(test_loss.item())
  print(f'Test RMSE: {round(rmse, 4)}')

  print(f'Valence RMSE: {round(valence_rmse, 4)}')
  print(f'Arousal RMSE: {round(arousal_rmse, 4)}')

  metric = R2Score(multioutput="raw_values")
  metric.update(test_pred, target_test_labels)
  r2_score = metric.compute()
  print(f'Test R^2 score: {r2_score}')

  num_of_test_samples = input_test_data.shape[0]
  num_of_test_features = input_test_data.shape[1] 

  if num_of_test_features < num_of_test_samples:
    metric = R2Score(multioutput="raw_values", num_regressors=input_test_data.shape[1])
    metric.update(test_pred, target_test_labels)
    adjusted_r2_score = metric.compute()
    print(f'Test Adjusted R^2 score: {adjusted_r2_score}')

  metric = R2Score()
  metric.update(test_pred, target_test_labels)
  r2_score = metric.compute()
  print(f'Test R^2 score (overall): {r2_score}')
  return test_pred, rmse, adjusted_r2_score, r2_score

## Load static annotations for DEAM

In [5]:
df_deam_annotations = pd.read_csv('../data/DEAM/processed/annotations/deam_static_annotations.csv')
df_deam_annotations = df_deam_annotations.drop('song_id', axis=1)
df_deam_annotations

Unnamed: 0,valence_mean_mapped,arousal_mean_mapped
0,-0.475,-0.500
1,-0.375,-0.425
2,0.175,0.125
3,-0.150,0.075
4,0.200,0.350
...,...,...
1739,-0.275,0.225
1740,0.075,-0.275
1741,0.350,0.300
1742,-0.100,0.100


## Load static annotations for PMEmo

In [6]:
df_pmemo_annotations = pd.read_csv('../data/PMEmo/PMEmo2019/processed/annotations/pmemo_static_annotations.csv')
df_pmemo_annotations = df_pmemo_annotations.drop('song_id', axis=1)
df_pmemo_annotations

Unnamed: 0,valence_mean_mapped,arousal_mean_mapped
0,0.150,-0.200
1,-0.425,-0.475
2,-0.600,-0.700
3,-0.300,0.025
4,0.450,0.400
...,...,...
762,0.525,0.725
763,0.125,0.750
764,0.325,0.425
765,0.550,0.750


## Define Function to prepare the annotations, features, and model

In [7]:
def prepare_annotations_features_model(path_to_feature_set, test_annotations_dataset, path_to_model):
  # load the feature set
  features = pd.read_csv(path_to_feature_set)

  # drop Unnamed:0 column
  features = features[features.columns[1:]]

  features = features.drop('song_id', axis=1)
  
  # get the correct annotations
  test_annotations_dataset = df_pmemo_annotations if test_annotations_dataset == 'pmemo' else df_deam_annotations

  # create tensors for the test features and test annotations
  features_tensor = torch.tensor(features.values, dtype=torch.float64)
  annotations_tensor = torch.tensor(test_annotations_dataset.values, dtype=torch.float64)

  # set the seed
  seed = 42 
  torch.manual_seed(seed)

  # load the model
  model = NeuralNetwork(features_tensor.shape[1])
  model.load_state_dict(torch.load(path_to_model))
  model.eval()

  return model, features_tensor, annotations_tensor


## Inter-Dataset Model Evaluations

### Feature set used in model: <ins>DEAM Essentia Best Overall & openSMILE GeMAPS Normalised</ins>
### Feature set tested on: <ins>PMEmo Essentia Best Overall & openSMILE GeMAPS Normalised</ins>

In [8]:
model, features_tensor, annotations_tensor = prepare_annotations_features_model(
  path_to_feature_set='../data/PMEmo/PMEmo2019/processed/features/integrated/normalised_essentia_best_overall_opensmile_gemaps_features.csv',
  test_annotations_dataset='pmemo',
  path_to_model='../models/deam_feedforward_nn_essentia_best_overall_opensmile_gemaps_normalised.pt'
)
test_pred, rmse, adjusted_r2_score, r2_score = test_model(model, features_tensor, annotations_tensor)

Test RMSE: 0.3015
Valence RMSE: 0.2838
Arousal RMSE: 0.3182
Test R^2 score: tensor([0.2314, 0.2581])
Test Adjusted R^2 score: tensor([0.1174, 0.1480])
Test R^2 score (overall): 0.2447582483291626


### Feature set used in model: <ins>DEAM Essentia Best Overall & openSMILE eGeMAPS Normalised</ins>
### Feature set tested on: <ins>PMEmo Essentia Best Overall & openSMILE eGeMAPS Normalised</ins>

In [9]:
model, features_tensor, annotations_tensor = prepare_annotations_features_model(
  path_to_feature_set='../data/PMEmo/PMEmo2019/processed/features/integrated/normalised_essentia_best_overall_opensmile_egemaps_features.csv',
  test_annotations_dataset='pmemo',
  path_to_model='../models/deam_feedforward_nn_essentia_best_overall_opensmile_egemaps_normalised.pt'
)
test_pred, rmse, adjusted_r2_score, r2_score = test_model(model, features_tensor, annotations_tensor)

Test RMSE: 0.2849
Valence RMSE: 0.2835
Arousal RMSE: 0.2863
Test R^2 score: tensor([0.2330, 0.3992])
Test Adjusted R^2 score: tensor([0.0835, 0.2820])
Test R^2 score (overall): 0.3161044418811798


### Feature set used in model: <ins>DEAM Essentia Best Overall Mean Normalised</ins>
### Feature set tested on: <ins>PMEmo Essentia Best Overall Mean Normalised</ins>

In [10]:
model, features_tensor, annotations_tensor = prepare_annotations_features_model(
  path_to_feature_set='../data/PMEmo/PMEmo2019/processed/features/normalised_essentia_best_overall_features.csv',
  test_annotations_dataset='pmemo',
  path_to_model='../models/deam_feedforward_nn_essentia_best_overall_mean_normalised.pt'
)
test_pred, rmse, adjusted_r2_score, r2_score = test_model(model, features_tensor, annotations_tensor)

Test RMSE: 0.3414
Valence RMSE: 0.3262
Arousal RMSE: 0.3561
Test R^2 score: tensor([-0.0149,  0.0708])
Test Adjusted R^2 score: tensor([-0.0664,  0.0237])
Test R^2 score (overall): 0.027972787618637085


### Feature set used in model: <ins>DEAM Essentia Best Valence Mean Normalised</ins>
### Feature set tested on: <ins>PMEmo Essentia Best Valence Mean Normalised</ins>

In [11]:
model, features_tensor, annotations_tensor = prepare_annotations_features_model(
  path_to_feature_set='../data/PMEmo/PMEmo2019/processed/features/normalised_essentia_best_valence_features.csv',
  test_annotations_dataset='pmemo',
  path_to_model='../models/deam_feedforward_nn_essentia_best_valence_mean_normalised.pt'
)
test_pred, rmse, adjusted_r2_score, r2_score = test_model(model, features_tensor, annotations_tensor)

Test RMSE: 0.2787
Valence RMSE: 0.2642
Arousal RMSE: 0.2924
Test R^2 score: tensor([0.3343, 0.3733])
Test Adjusted R^2 score: tensor([0.2715, 0.3142])
Test R^2 score (overall): 0.35377374291419983


### Feature set used in model: <ins>DEAM Essentia Best Arousal Mean Normalised</ins>
### Feature set tested on: <ins>PMEmo Essentia Best Arousal Mean Normalised</ins>

In [12]:
model, features_tensor, annotations_tensor = prepare_annotations_features_model(
  path_to_feature_set='../data/PMEmo/PMEmo2019/processed/features/normalised_essentia_best_arousal_features.csv',
  test_annotations_dataset='pmemo',
  path_to_model='../models/deam_feedforward_nn_essentia_best_arousal_mean_normalised.pt'
)
test_pred, rmse, adjusted_r2_score, r2_score = test_model(model, features_tensor, annotations_tensor)

Test RMSE: 0.289
Valence RMSE: 0.2848
Arousal RMSE: 0.2931
Test R^2 score: tensor([0.2260, 0.3704])
Test Adjusted R^2 score: tensor([-0.2833, -0.0438])
Test R^2 score (overall): 0.2982364296913147


### Feature set used in model: <ins>PMEmo Essentia Best Overall & openSMILE GeMAPS Standardised</ins>
### Feature set tested on: <ins>DEAM Essentia Best Overall & openSMILE GeMAPS Standardised</ins>

In [13]:
model, features_tensor, annotations_tensor = prepare_annotations_features_model(
  path_to_feature_set='../data/DEAM/processed/features/integrated/standardised_essentia_best_overall_opensmile_gemaps_features.csv',
  test_annotations_dataset='deam',
  path_to_model='../models/pmemo_feedforward_nn_essentia_best_overall_opensmile_gemaps_standardised.pt'
)
test_pred, rmse, adjusted_r2_score, r2_score = test_model(model, features_tensor, annotations_tensor)

Test RMSE: 0.3607
Valence RMSE: 0.3254
Arousal RMSE: 0.3928
Test R^2 score: tensor([-0.2302, -0.4862])
Test Adjusted R^2 score: tensor([-0.3043, -0.5757])
Test R^2 score (overall): -0.3581765294075012


### Feature set used in model: <ins>PMEmo Essentia Best Overall & openSMILE eGeMAPS Standardised</ins>
### Feature set tested on: <ins>DEAM Essentia Best Overall & openSMILE eGeMAPS Standardised</ins>

In [14]:
model, features_tensor, annotations_tensor = prepare_annotations_features_model(
  path_to_feature_set='../data/DEAM/processed/features/integrated/standardised_essentia_best_overall_opensmile_egemaps_features.csv',
  test_annotations_dataset='deam',
  path_to_model='../models/pmemo_feedforward_nn_essentia_best_overall_opensmile_egemaps_standardised.pt'
)
test_pred, rmse, adjusted_r2_score, r2_score = test_model(model, features_tensor, annotations_tensor)

Test RMSE: 0.3747
Valence RMSE: 0.3291
Arousal RMSE: 0.4152
Test R^2 score: tensor([-0.2585, -0.6604])
Test Adjusted R^2 score: tensor([-0.3558, -0.7887])
Test R^2 score (overall): -0.45946526527404785


### Feature set used in model: <ins>PMEmo Essentia Best Overall Mean Standardised</ins>
### Feature set tested on: <ins>DEAM Essentia Best Overall Mean Standardised</ins>

In [15]:
model, features_tensor, annotations_tensor = prepare_annotations_features_model(
  path_to_feature_set='../data/DEAM/processed/features/standardised_essentia_best_overall_features.csv',
  test_annotations_dataset='deam',
  path_to_model='../models/pmemo_feedforward_nn_essentia_best_overall_mean_standardised.pt'
)
test_pred, rmse, adjusted_r2_score, r2_score = test_model(model, features_tensor, annotations_tensor)

Test RMSE: 0.3956
Valence RMSE: 0.3573
Arousal RMSE: 0.4305
Test R^2 score: tensor([-0.4832, -0.7845])
Test Adjusted R^2 score: tensor([-0.5153, -0.8232])
Test R^2 score (overall): -0.6338290572166443


### Feature set used in model: <ins>PMEmo Essentia Best Valence Mean Standardised</ins>
### Feature set tested on: <ins>DEAM Essentia Best Valence Mean Standardised</ins>

In [16]:
model, features_tensor, annotations_tensor = prepare_annotations_features_model(
  path_to_feature_set='../data/DEAM/processed/features/standardised_essentia_best_valence_features.csv',
  test_annotations_dataset='deam',
  path_to_model='../models/pmemo_feedforward_nn_essentia_best_valence_mean_standardised.pt'
)
test_pred, rmse, adjusted_r2_score, r2_score = test_model(model, features_tensor, annotations_tensor)

Test RMSE: 0.3601
Valence RMSE: 0.3299
Arousal RMSE: 0.3879
Test R^2 score: tensor([-0.2644, -0.4489])
Test Adjusted R^2 score: tensor([-0.3142, -0.5059])
Test R^2 score (overall): -0.3566470146179199


### Feature set used in model: <ins>PMEmo Essentia Best Arousal Mean Normalised</ins>
### Feature set tested on: <ins>DEAM Essentia Best Arousal Mean Normalised</ins>

In [17]:
model, features_tensor, annotations_tensor = prepare_annotations_features_model(
  path_to_feature_set='../data/DEAM/processed/features/normalised_essentia_best_arousal_features.csv',
  test_annotations_dataset='deam',
  path_to_model='../models/pmemo_feedforward_nn_essentia_best_arousal_mean_normalised.pt'
)
test_pred, rmse, adjusted_r2_score, r2_score = test_model(model, features_tensor, annotations_tensor)

Test RMSE: 0.2963
Valence RMSE: 0.2626
Arousal RMSE: 0.3265
Test R^2 score: tensor([ 0.1991, -0.0268])
Test Adjusted R^2 score: tensor([ 0.0299, -0.2438])
Test R^2 score (overall): 0.08614605665206909
