In [1]:
# Import libraries
# transformers: Provides access to pre-trained models like CLAP.
from transformers import ClapModel, AutoProcessor

# librosa: Used for audio analysis and loading audio files.
import librosa

# os: Provides a way to interact with the operating system, like navigating directories.
import os

import torch
import pandas as pd
import numpy as np

In [2]:
#CLAPmodel = "laion/larger_clap_music"
#CLAPmodel = "laion/larger_clap_general"
#CLAPmodel = "laion/larger_clap_music_and_speech"
CLAPmodel = "laion/clap-htsat-unfused"

In [3]:
# Load CLAP model + processor
model = ClapModel.from_pretrained(CLAPmodel)
processor = AutoProcessor.from_pretrained(CLAPmodel)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/615M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/614M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/384 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

In [4]:
#Small test to print all the parameters of CLAP
print('total number of CLAP\'s parameters:', sum(p.numel() for p in model.parameters()))

total number of CLAP's parameters: 153492890


In [5]:
#Calculating the size of the data of all the parameters
param_size = 0 # We are using a for loop to add size of all parameters os initialised as 0 at the beginning

for param in model.parameters():
    #In increments we are adding the size of each parameter given by formula
    #param.nelement() is giving total number of elements in each parameter
    #param.element_size() is giving size of element in bytes
    param_size += param.nelement() * param.element_size()

#Buffers are tensors but not learnable parameters but still take up space
buffer_size = 0
for buffer in model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

#Total size adds buffer size and parameter size
#Converted into MB
size_all_mb = (param_size + buffer_size) / 1024**2
print('CLAP model size: {:.3f}MB'.format(size_all_mb))

CLAP model size: 585.912MB


In [6]:
#This section is to convert the uploaded zip files into folders
import zipfile

# Define the paths to your zip files
zip_files = ["Exp1.zip", "Exp2.zip", "Analysis.zip"]

# Define the directory where you want to extract the files
extract_dir = "/content/" # You can change this if you want to extract elsewhere

# Create the extraction directory if it doesn't exist
os.makedirs(extract_dir, exist_ok=True)

# Extract each zip file
for zip_file in zip_files:
    if os.path.exists(zip_file):
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall(extract_dir)
        print(f"Extracted {zip_file} to {extract_dir}")
    else:
        print(f"Erro r: {zip_file} not found.")

Extracted Exp1.zip to /content/
Extracted Exp2.zip to /content/
Extracted Analysis.zip to /content/


# Process audio

In [7]:
audio_stimuli = []
stimuli_path = "/content/Exp1/Stimuli/"  #/content/Exp2/Stimuli/ if we are running experiment 2

#This section adds all the audio waveforms into the list audio_stimuli
for file in sorted(os.listdir(stimuli_path)):
    if file.endswith(".wav"):
        wav_path = os.path.join(stimuli_path, file)
        #Clap has already been trained on a sample rate of 48,000 so we should use what it knows already
        audio, sample_rate = librosa.load(wav_path, sr=48000)
        audio_stimuli.append(audio)

In [8]:
#Processes the audio waveform in the audio_stimuli list
#The process involves converting the audio files into tensors (As PyTorch deals with Tensors)
inputs = processor(audios=audio_stimuli, return_tensors="pt", padding=True, sampling_rate=48000)

#Inputs the tensors into the CLAP model to extract the audio embeddings
#These embeddings are numerical representations
audio_embeddings = model.get_audio_features(**inputs)

In [9]:
#Returning dimensions of the tensors
#The 59 is how many audio files there are
#The 512 is the size of any audio embedding
print(audio_embeddings.shape)

torch.Size([59, 512])


## Load csv files and extract related columns

In [10]:
def GetData(path, parameter):
  Dim_path = path
  response_dfs = []

  for file in sorted(os.listdir(Dim_path)):
      if file.endswith(".csv"):
          file_path = os.path.join(Dim_path, file)
          try:
              df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
              # Crucial: Strip whitespace from column names
              df.columns = df.columns.str.strip()

              # Ensure required rating columns exist
              required_cols = [parameter]
              if all(col in df.columns for col in required_cols):
                  # Select only the relevant columns and append to our list
                  response_dfs.append(df[required_cols])
              else:
                  print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

          except Exception as e:
              print(f"Error reading or processing file {file_path}: {e}")


  # Concatenate all individual DataFrames into one master DataFrame for human responses
  if response_dfs:
      master_human_responses_df = pd.concat(response_dfs, ignore_index=True)
      return master_human_responses_df, response_dfs
  else:
      raise ValueError("No valid CSV files found or processed in path.")

def GetMergeData(parameter):
  df = pd.read_csv('/content/Exp1/Data/aggregate_data.csv')

  if parameter not in df.columns:
    raise ValueError(f"Parameter '{parameter}' not found in CSV columns.")

  master_human_responses_df = df[[parameter]].copy()
  response_dfs = [master_human_responses_df]  # mimic structure for compatibility

  return master_human_responses_df, response_dfs

In [11]:
def GetDimData(Type):
  print(f"Data collection for {Type}Dim")
  master_human_res_df_positive, Dim_positive_res_dfs = GetData(f'/content/Exp1/Data/{Type}Dim', 'positive')
  master_human_res_df_relaxed, Dim_relaxed_res_dfs = GetData(f'/content/Exp1/Data/{Type}Dim', 'relaxed')
  master_human_res_df_awake, Dim_awake_res_dfs = GetData(f'/content/Exp1/Data/{Type}Dim', 'awake')
  master_human_res_df_like, Dim_like_res_dfs = GetData(f'/content/Exp1/Data/{Type}Dim', 'like')

  print(f"Master human responses Positive DataFrame shape: {master_human_res_df_positive.shape}")
  print(f"Master human responses Relaxed DataFrame shape: {master_human_res_df_relaxed.shape}")
  print(f"Master human responses Awake DataFrame shape: {master_human_res_df_awake.shape}")
  print(f"Master human responses Like DataFrame shape: {master_human_res_df_like.shape}")
  # === Side-by-side preview ===
  print("\nFirst 5 responses for each dimension:")
  df_dict = {
      "positive": master_human_res_df_positive,
      "relaxed": master_human_res_df_relaxed,
      "awake": master_human_res_df_awake,
      "like": master_human_res_df_like
  }
  preview_dfs = [df.head(5).reset_index(drop=True) for df in df_dict.values()]
  combined_preview = pd.concat(preview_dfs, axis=1)
  combined_preview.columns = df_dict.keys()
  display(combined_preview)

  return master_human_res_df_positive, Dim_positive_res_dfs, master_human_res_df_relaxed, Dim_relaxed_res_dfs, master_human_res_df_awake, Dim_awake_res_dfs, master_human_res_df_like, Dim_like_res_dfs

def GetDiscData(Type):
  print(f"Data collection for {Type}Disc")
  master_human_res_df_happiness, Disc_happiness_res_dfs = GetData(f'/content/Exp1/Data/{Type}Disc', 'happiness')
  master_human_res_df_sadness, Disc_sadness_res_dfs = GetData(f'/content/Exp1/Data/{Type}Disc', 'sadness')
  master_human_res_df_anger, Disc_anger_res_dfs = GetData(f'/content/Exp1/Data/{Type}Disc', 'anger')
  master_human_res_df_tenderness, Disc_tenderness_res_dfs = GetData(f'/content/Exp1/Data/{Type}Disc', 'tenderness')
  master_human_res_df_fear, Disc_fear_res_dfs = GetData(f'/content/Exp1/Data/{Type}Disc', 'fear')

  print(f"Master human responses happiness DataFrame shape: {master_human_res_df_happiness.shape}")
  print(f"Master human responses sadness DataFrame shape: {master_human_res_df_sadness.shape}")
  print(f"Master human responses anger DataFrame shape: {master_human_res_df_anger.shape}")
  print(f"Master human responses tenderness DataFrame shape: {master_human_res_df_tenderness.shape}")
  print(f"Master human responses fear DataFrame shape: {master_human_res_df_fear.shape}")
  # === Side-by-side preview ===
  print("\nFirst 5 responses for each dimension:")
  df_dict = {
      "happiness": master_human_res_df_happiness,
      "sadness": master_human_res_df_sadness,
      "anger": master_human_res_df_anger,
      "tenderness": master_human_res_df_tenderness,
      "fear": master_human_res_df_fear
  }
  preview_dfs = [df.head(5).reset_index(drop=True) for df in df_dict.values()]
  combined_preview = pd.concat(preview_dfs, axis=1)
  combined_preview.columns = df_dict.keys()
  display(combined_preview)

  return master_human_res_df_happiness, Disc_happiness_res_dfs, master_human_res_df_sadness, Disc_sadness_res_dfs, master_human_res_df_anger, Disc_anger_res_dfs, master_human_res_df_tenderness, Disc_tenderness_res_dfs, master_human_res_df_fear, Disc_fear_res_dfs

def GetMergedDimData():
  print(f"Data collection for Dim")
  master_human_res_df_positive, Dim_positive_res_dfs = GetMergeData('positive')
  master_human_res_df_relaxed, Dim_relaxed_res_dfs = GetMergeData('relaxed')
  master_human_res_df_awake, Dim_awake_res_dfs = GetMergeData('awake')
  master_human_res_df_like, Dim_like_res_dfs = GetMergeData('like')
  return master_human_res_df_positive, Dim_positive_res_dfs, master_human_res_df_relaxed, Dim_relaxed_res_dfs, master_human_res_df_awake, Dim_awake_res_dfs, master_human_res_df_like, Dim_like_res_dfs

def GetMergedDiscData():
  print(f"Data collection for Disc")
  master_human_res_df_happiness, Disc_happiness_res_dfs = GetMergeData('happiness')
  master_human_res_df_sadness, Disc_sadness_res_dfs = GetMergeData('sadness')
  master_human_res_df_anger, Disc_anger_res_dfs = GetMergeData('anger')
  master_human_res_df_tenderness, Disc_tenderness_res_dfs = GetMergeData('tenderness')
  master_human_res_df_fear, Disc_fear_res_dfs = GetMergeData('fear')

  return master_human_res_df_happiness, Disc_happiness_res_dfs, master_human_res_df_sadness, Disc_sadness_res_dfs, master_human_res_df_anger, Disc_anger_res_dfs, master_human_res_df_tenderness, Disc_tenderness_res_dfs, master_human_res_df_fear, Disc_fear_res_dfs

In [12]:
Imhr_df_positive, IDim_positive_res_dfs, Imhr_df_relaxed, IDim_relaxed_res_dfs, Imhr_df_awake, IDim_awake_res_dfs, Imhr_df_like, IDim_like_res_dfs = GetDimData("I")
print("\n" + "-"*80 + "\n")
Pmhr_df_positive, PDim_positive_res_dfs, Pmhr_df_relaxed, PDim_relaxed_res_dfs, Pmhr_df_awake, PDim_awake_res_dfs, Pmhr_df_like, PDim_like_res_dfs = GetDimData("P")
print("\n" + "-"*80 + "\n")
Imhr_df_happiness, IDisc_happiness_res_dfs, Imhr_df_sadness, IDisc_sadness_res_dfs, Imhr_df_anger, IDisc_anger_res_dfs, Imhr_df_tenderness, IDisc_tenderness_res_dfs, Imhr_df_fear, IDisc_fear_res_dfs = GetDiscData("I")
print("\n" + "-"*80 + "\n")
Pmhr_df_happiness, PDisc_happiness_res_dfs, Pmhr_df_sadness, PDisc_sadness_res_dfs, Pmhr_df_anger, PDisc_anger_res_dfs, Pmhr_df_tenderness, PDisc_tenderness_res_dfs, Pmhr_df_fear, PDisc_fear_res_dfs = GetDiscData("P")
print("\n" + "-"*80 + "\n")
mhr_df_positive, Dim_positive_res_dfs, mhr_df_relaxed, Dim_relaxed_res_dfs, mhr_df_awake, Dim_awake_res_dfs, mhr_df_like, Dim_like_res_dfs = GetMergedDimData()
print("\n" + "-"*80 + "\n")
mhr_df_happiness, Disc_happiness_res_dfs, mhr_df_sadness, Disc_sadness_res_dfs, mhr_df_anger, Disc_anger_res_dfs, mhr_df_tenderness, Disc_tenderness_res_dfs, mhr_df_fear, Disc_fear_res_dfs = GetMergedDiscData()

Data collection for IDim
Master human responses Positive DataFrame shape: (3835, 1)
Master human responses Relaxed DataFrame shape: (3835, 1)
Master human responses Awake DataFrame shape: (3835, 1)
Master human responses Like DataFrame shape: (3835, 1)

First 5 responses for each dimension:


Unnamed: 0,positive,relaxed,awake,like
0,3.68,3.78,4.42,3.41
1,5.88,5.98,3.89,5.54
2,6.53,5.59,6.59,6.17
3,6.26,5.71,6.88,6.18
4,2.8,2.62,5.15,1.87



--------------------------------------------------------------------------------

Data collection for PDim
Master human responses Positive DataFrame shape: (3953, 1)
Master human responses Relaxed DataFrame shape: (3953, 1)
Master human responses Awake DataFrame shape: (3953, 1)
Master human responses Like DataFrame shape: (3953, 1)

First 5 responses for each dimension:


Unnamed: 0,positive,relaxed,awake,like
0,2.23,9.0,2.37,1.68
1,6.56,5.64,4.12,7.52
2,5.11,6.23,4.04,6.72
3,7.14,7.67,2.95,7.41
4,1.85,1.77,2.98,1.4



--------------------------------------------------------------------------------

Data collection for IDisc
Master human responses happiness DataFrame shape: (3894, 1)
Master human responses sadness DataFrame shape: (3894, 1)
Master human responses anger DataFrame shape: (3894, 1)
Master human responses tenderness DataFrame shape: (3894, 1)
Master human responses fear DataFrame shape: (3894, 1)

First 5 responses for each dimension:


Unnamed: 0,happiness,sadness,anger,tenderness,fear
0,1.0,6.97,5.01,9.0,7.89
1,1.0,6.0,5.8,6.78,1.0
2,1.0,2.42,5.99,6.44,1.09
3,3.92,6.13,2.25,5.96,1.22
4,1.99,7.92,6.82,5.83,1.39



--------------------------------------------------------------------------------

Data collection for PDisc
Master human responses happiness DataFrame shape: (3835, 1)
Master human responses sadness DataFrame shape: (3835, 1)
Master human responses anger DataFrame shape: (3835, 1)
Master human responses tenderness DataFrame shape: (3835, 1)
Master human responses fear DataFrame shape: (3835, 1)

First 5 responses for each dimension:


Unnamed: 0,happiness,sadness,anger,tenderness,fear
0,1.32,1.3,1.27,1.25,3.18
1,1.0,1.44,1.76,1.0,1.24
2,1.0,5.87,1.23,1.28,1.56
3,1.92,3.36,4.82,1.2,2.32
4,1.23,2.31,7.1,1.61,2.86



--------------------------------------------------------------------------------

Data collection for Dim

--------------------------------------------------------------------------------

Data collection for Disc


# Prepare features X and targets y

In [13]:
from sklearn.model_selection import train_test_split

def Test_Train_Split(master_human_responses, response_dfs, parameter):
  num_participants = len(response_dfs)
  if master_human_responses.shape[0] % len(audio_stimuli) != 0:
      print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

  X_list = []
  for _ in range(num_participants):
      X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

  # Convert to NumPy array
  X = np.array(X_list)

  # Extract y from the concatenated DataFrame
  y = master_human_responses[[parameter]].values

  print(f"Shape of X (features) after implicit alignment: {X.shape}")
  print(f"Shape of y (labels) after implicit alignment: {y.shape}\n")

  # Sanity check: X and y must have the same number of rows
  if X.shape[0] != y.shape[0]:
      raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

  # --- Split Data into Training and Testing Sets ---

  TEST_PERCENTAGE = 0.2  # Adjust as needed
  TRAIN_PERCENTAGE = 1 - TEST_PERCENTAGE  # Adjust as needed

  X_train, X_test, y_train, y_test = train_test_split(
      X, y, test_size=TEST_PERCENTAGE, random_state=42
  )


  return X_train, X_test, y_train, y_test

In [14]:
def SplitDim(Type, mhr_df_positive, Dim_positive_res_dfs, mhr_df_relaxed, Dim_relaxed_res_dfs, mhr_df_awake, Dim_awake_res_dfs, mhr_df_like, Dim_like_res_dfs):
  print(f"Data collection for {Type}Dim")
  X_train_positive, X_test_positive, y_train_positive, y_test_positive = Test_Train_Split(mhr_df_positive, Dim_positive_res_dfs, 'positive')
  X_train_relaxed, X_test_relaxed, y_train_relaxed, y_test_relaxed = Test_Train_Split(mhr_df_relaxed, Dim_relaxed_res_dfs, 'relaxed')
  X_train_awake, X_test_awake, y_train_awake, y_test_awake = Test_Train_Split(mhr_df_awake, Dim_awake_res_dfs, 'awake')
  X_train_like, X_test_like, y_train_like, y_test_like = Test_Train_Split(mhr_df_like, Dim_like_res_dfs, 'like')

  if X_train_positive.shape == X_train_relaxed.shape == X_train_awake.shape == X_train_like.shape and \
    X_test_positive.shape == X_test_relaxed.shape == X_test_awake.shape == X_test_like.shape and \
    y_train_positive.shape == y_train_relaxed.shape == y_train_awake.shape == y_train_like.shape and \
    y_test_positive.shape == y_test_relaxed.shape == y_test_awake.shape == y_test_like.shape:
    print(f"Training set size for each parameter: {X_train_positive.shape}, {y_train_positive.shape}")
    print(f"Testing set size for each parameter: {X_test_positive.shape}, {y_test_positive.shape}\n")
  else:
    raise ValueError("Training and testing sets do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

  return X_train_positive, X_test_positive, y_train_positive, y_test_positive, X_train_relaxed, X_test_relaxed, y_train_relaxed, y_test_relaxed, X_train_awake, X_test_awake, y_train_awake, y_test_awake, X_train_like, X_test_like, y_train_like, y_test_like

def SplitDisc(Type, mhr_df_happiness, Disc_happiness_res_dfs, mhr_df_sadness, Disc_sadness_res_dfs, mhr_df_anger, Disc_anger_res_dfs, mhr_df_tenderness, Disc_tenderness_res_dfs, mhr_df_fear, Disc_fear_res_dfs):
  print(f"Data collection for {Type}Disc")
  X_train_happiness, X_test_happiness, y_train_happiness, y_test_happiness = Test_Train_Split(mhr_df_happiness, Disc_happiness_res_dfs, 'happiness')
  X_train_sadness, X_test_sadness, y_train_sadness, y_test_sadness = Test_Train_Split(mhr_df_sadness, Disc_sadness_res_dfs, 'sadness')
  X_train_anger, X_test_anger, y_train_anger, y_test_anger = Test_Train_Split(mhr_df_anger, Disc_anger_res_dfs, 'anger')
  X_train_tenderness, X_test_tenderness, y_train_tenderness, y_test_tenderness = Test_Train_Split(mhr_df_tenderness, Disc_tenderness_res_dfs, 'tenderness')
  X_train_fear, X_test_fear, y_train_fear, y_test_fear = Test_Train_Split(mhr_df_fear, Disc_fear_res_dfs, 'fear')

  if X_train_happiness.shape == X_train_sadness.shape == X_train_anger.shape == X_train_tenderness.shape == X_train_fear.shape and \
    X_test_happiness.shape == X_test_sadness.shape == X_test_anger.shape == X_test_tenderness.shape == X_test_fear.shape and \
    y_train_happiness.shape == y_train_sadness.shape == y_train_anger.shape == y_train_tenderness.shape == y_train_fear.shape and \
    y_test_happiness.shape == y_test_sadness.shape == y_test_anger.shape == y_test_tenderness.shape == y_test_fear.shape:
    print(f"Training set size for each parameter: {X_train_happiness.shape}, {y_train_happiness.shape}")
    print(f"Testing set size for each parameter: {X_test_happiness.shape}, {y_test_happiness.shape}\n")
  else:
    raise ValueError("Training and testing sets do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

  return X_train_happiness, X_test_happiness, y_train_happiness, y_test_happiness, X_train_sadness, X_test_sadness, y_train_sadness, y_test_sadness, X_train_anger, X_test_anger, y_train_anger, y_test_anger, X_train_tenderness, X_test_tenderness, y_train_tenderness, y_test_tenderness, X_train_fear, X_test_fear, y_train_fear, y_test_fear

In [15]:
(IX_train_positive, IX_test_positive, Iy_train_positive, Iy_test_positive,
IX_train_relaxed, IX_test_relaxed, Iy_train_relaxed, Iy_test_relaxed,
IX_train_awake, IX_test_awake, Iy_train_awake, Iy_test_awake,
IX_train_like, IX_test_like, Iy_train_like, Iy_test_like) = SplitDim("I", Imhr_df_positive, IDim_positive_res_dfs, Imhr_df_relaxed, IDim_relaxed_res_dfs, Imhr_df_awake, IDim_awake_res_dfs, Imhr_df_like, IDim_like_res_dfs)

print("\n" + "-"*80 + "\n")

(PX_train_positive, PX_test_positive, Py_train_positive, Py_test_positive,
PX_train_relaxed, PX_test_relaxed, Py_train_relaxed, Py_test_relaxed,
PX_train_awake, PX_test_awake, Py_train_awake, Py_test_awake,
PX_train_like, PX_test_like, Py_train_like, Py_test_like) = SplitDim("P", Pmhr_df_positive, PDim_positive_res_dfs, Pmhr_df_relaxed, PDim_relaxed_res_dfs, Pmhr_df_awake, PDim_awake_res_dfs, Pmhr_df_like, PDim_like_res_dfs)

print("\n" + "-"*80 + "\n")

(IX_train_happiness, IX_test_happiness, Iy_train_happiness, Iy_test_happiness,
IX_train_sadness, IX_test_sadness, Iy_train_sadness, Iy_test_sadness,
IX_train_anger, IX_test_anger, Iy_train_anger, Iy_test_anger,
IX_train_tenderness, IX_test_tenderness, Iy_train_tenderness, Iy_test_tenderness,
IX_train_fear, IX_test_fear, Iy_train_fear, Iy_test_fear) = SplitDisc("I", Imhr_df_happiness, IDisc_happiness_res_dfs, Imhr_df_sadness, IDisc_sadness_res_dfs, Imhr_df_anger, IDisc_anger_res_dfs, Imhr_df_tenderness, IDisc_tenderness_res_dfs, Imhr_df_fear, IDisc_fear_res_dfs)

print("\n" + "-"*80 + "\n")

(PX_train_happiness, PX_test_happiness, Py_train_happiness, Py_test_happiness,
PX_train_sadness, PX_test_sadness, Py_train_sadness, Py_test_sadness,
PX_train_anger, PX_test_anger, Py_train_anger, Py_test_anger,
PX_train_tenderness, PX_test_tenderness, Py_train_tenderness, Py_test_tenderness,
PX_train_fear, PX_test_fear, Py_train_fear, Py_test_fear) = SplitDisc("P", Pmhr_df_happiness, PDisc_happiness_res_dfs, Pmhr_df_sadness, PDisc_sadness_res_dfs, Pmhr_df_anger, PDisc_anger_res_dfs, Pmhr_df_tenderness, PDisc_tenderness_res_dfs, Pmhr_df_fear, PDisc_fear_res_dfs)

print("\n" + "-"*80 + "\n")

(X_train_positive, X_test_positive, y_train_positive, y_test_positive,
X_train_relaxed, X_test_relaxed, y_train_relaxed, y_test_relaxed,
X_train_awake, X_test_awake, y_train_awake, y_test_awake,
X_train_like, X_test_like, y_train_like, y_test_like) = SplitDim("", mhr_df_positive, Dim_positive_res_dfs, mhr_df_relaxed, Dim_relaxed_res_dfs, mhr_df_awake, Dim_awake_res_dfs, mhr_df_like, Dim_like_res_dfs)

print("\n" + "-"*80 + "\n")

(X_train_happiness, X_test_happiness, y_train_happiness, y_test_happiness,
X_train_sadness, X_test_sadness, y_train_sadness, y_test_sadness,
X_train_anger, X_test_anger, y_train_anger, y_test_anger,
X_train_tenderness, X_test_tenderness, y_train_tenderness, y_test_tenderness,
X_train_fear, X_test_fear, y_train_fear, y_test_fear) = SplitDisc("", mhr_df_happiness, Disc_happiness_res_dfs, mhr_df_sadness, Disc_sadness_res_dfs, mhr_df_anger, Disc_anger_res_dfs, mhr_df_tenderness, Disc_tenderness_res_dfs, mhr_df_fear, Disc_fear_res_dfs)

Data collection for IDim
Shape of X (features) after implicit alignment: (3835, 512)
Shape of y (labels) after implicit alignment: (3835, 1)

Shape of X (features) after implicit alignment: (3835, 512)
Shape of y (labels) after implicit alignment: (3835, 1)

Shape of X (features) after implicit alignment: (3835, 512)
Shape of y (labels) after implicit alignment: (3835, 1)

Shape of X (features) after implicit alignment: (3835, 512)
Shape of y (labels) after implicit alignment: (3835, 1)

Training set size for each parameter: (3068, 512), (3068, 1)
Testing set size for each parameter: (767, 512), (767, 1)


--------------------------------------------------------------------------------

Data collection for PDim
Shape of X (features) after implicit alignment: (3953, 512)
Shape of y (labels) after implicit alignment: (3953, 1)

Shape of X (features) after implicit alignment: (3953, 512)
Shape of y (labels) after implicit alignment: (3953, 1)

Shape of X (features) after implicit alignmen

# Train regression head (=MLP, a few projection layers)

In [16]:
from sklearn.neural_network import MLPRegressor

def Run_MLP(X_train, y_train, parameter):
  mlp_regressor = MLPRegressor(
      hidden_layer_sizes=(100, 50),
      activation='relu',
      solver='adam',
      max_iter=1000,
      random_state=42, #Determines random number generation for weights and bias
      verbose=False,
      early_stopping=True,
      n_iter_no_change=50,
      tol=1e-4
  )

  print(f"Starting MLP Regressor {parameter} training...")
  mlp_regressor.fit(X_train, y_train)
  print(f"\nMLP Regressor training {parameter} complete.")
  print(f"  → Final loss: {mlp_regressor.loss_:.4f}")
  print(f"  → Iterations: {mlp_regressor.n_iter_}\n")
  return mlp_regressor

In [17]:
def MLPDim(Type, X_train_positive, y_train_positive, X_train_relaxed, y_train_relaxed, X_train_awake, y_train_awake, X_train_like, y_train_like):
  print(f"Data collection for {Type}Dim")
  mlp_regressor_positive = Run_MLP(X_train_positive, y_train_positive, 'positive')
  mlp_regressor_relaxed = Run_MLP(X_train_relaxed, y_train_relaxed, 'relaxed')
  mlp_regressor_awake = Run_MLP(X_train_awake, y_train_awake, 'awake')
  mlp_regressor_like = Run_MLP(X_train_like, y_train_like, 'like')

  return mlp_regressor_positive, mlp_regressor_relaxed, mlp_regressor_awake, mlp_regressor_like

def MLPDisc(Type, X_train_happiness, y_train_happiness, X_train_sadness, y_train_sadness, X_train_anger, y_train_anger, X_train_tenderness, y_train_tenderness, X_train_fear, y_train_fear):
  print(f"Data collection for {Type}Disc")
  mlp_regressor_happiness = Run_MLP(X_train_happiness, y_train_happiness, 'happiness')
  mlp_regressor_sadness = Run_MLP(X_train_sadness, y_train_sadness, 'sadness')
  mlp_regressor_anger = Run_MLP(X_train_anger, y_train_anger, 'anger')
  mlp_regressor_tenderness = Run_MLP(X_train_tenderness, y_train_tenderness, 'tenderness')
  mlp_regressor_fear = Run_MLP(X_train_fear, y_train_fear, 'fear')

  return mlp_regressor_happiness, mlp_regressor_sadness, mlp_regressor_anger, mlp_regressor_tenderness, mlp_regressor_fear


In [18]:
Imlp_positive, Imlp_relaxed, Imlp_awake, Imlp_like = MLPDim("I", IX_train_positive, Iy_train_positive, IX_train_relaxed, Iy_train_relaxed, IX_train_awake, Iy_train_awake, IX_train_like, Iy_train_like)
print("\n" + "-"*80 + "\n")
Pmlp_positive, Pmlp_relaxed, Pmlp_awake, Pmlp_like = MLPDim("P", PX_train_positive, Py_train_positive, PX_train_relaxed, Py_train_relaxed, PX_train_awake, Py_train_awake, PX_train_like, Py_train_like)
print("\n" + "-"*80 + "\n")
Imlp_happiness, Imlp_sadness, Imlp_anger, Imlp_tenderness, Imlp_fear = MLPDisc("I", IX_train_happiness, Iy_train_happiness, IX_train_sadness, Iy_train_sadness, IX_train_anger, Iy_train_anger, IX_train_tenderness, Iy_train_tenderness, IX_train_fear, Iy_train_fear)
print("\n" + "-"*80 + "\n")
Pmlp_happiness, Pmlp_sadness, Pmlp_anger, Pmlp_tenderness, Pmlp_fear = MLPDisc("P", PX_train_happiness, Py_train_happiness, PX_train_sadness, Py_train_sadness, PX_train_anger, Py_train_anger, PX_train_tenderness, Py_train_tenderness, PX_train_fear, Py_train_fear)
print("\n" + "-"*80 + "\n")
mlp_positive, mlp_relaxed, mlp_awake, mlp_like = MLPDim("", X_train_positive, y_train_positive, X_train_relaxed, y_train_relaxed, X_train_awake, y_train_awake, X_train_like, y_train_like)
print("\n" + "-"*80 + "\n")
mlp_happiness, mlp_sadness, mlp_anger, mlp_tenderness, mlp_fear = MLPDisc("", X_train_happiness, y_train_happiness, X_train_sadness, y_train_sadness, X_train_anger, y_train_anger, X_train_tenderness, y_train_tenderness, X_train_fear, y_train_fear)

Data collection for IDim
Starting MLP Regressor positive training...


  y = column_or_1d(y, warn=True)



MLP Regressor training positive complete.
  → Final loss: 1.3681
  → Iterations: 112

Starting MLP Regressor relaxed training...


  y = column_or_1d(y, warn=True)



MLP Regressor training relaxed complete.
  → Final loss: 1.4085
  → Iterations: 90

Starting MLP Regressor awake training...


  y = column_or_1d(y, warn=True)



MLP Regressor training awake complete.
  → Final loss: 1.6303
  → Iterations: 89

Starting MLP Regressor like training...


  y = column_or_1d(y, warn=True)



MLP Regressor training like complete.
  → Final loss: 1.4381
  → Iterations: 84


--------------------------------------------------------------------------------

Data collection for PDim
Starting MLP Regressor positive training...


  y = column_or_1d(y, warn=True)



MLP Regressor training positive complete.
  → Final loss: 1.3562
  → Iterations: 68

Starting MLP Regressor relaxed training...


  y = column_or_1d(y, warn=True)



MLP Regressor training relaxed complete.
  → Final loss: 1.7376
  → Iterations: 190

Starting MLP Regressor awake training...


  y = column_or_1d(y, warn=True)



MLP Regressor training awake complete.
  → Final loss: 1.6656
  → Iterations: 66

Starting MLP Regressor like training...


  y = column_or_1d(y, warn=True)



MLP Regressor training like complete.
  → Final loss: 1.6031
  → Iterations: 88


--------------------------------------------------------------------------------

Data collection for IDisc
Starting MLP Regressor happiness training...


  y = column_or_1d(y, warn=True)



MLP Regressor training happiness complete.
  → Final loss: 1.9734
  → Iterations: 79

Starting MLP Regressor sadness training...


  y = column_or_1d(y, warn=True)



MLP Regressor training sadness complete.
  → Final loss: 2.0236
  → Iterations: 62

Starting MLP Regressor anger training...


  y = column_or_1d(y, warn=True)



MLP Regressor training anger complete.
  → Final loss: 1.9458
  → Iterations: 80

Starting MLP Regressor tenderness training...


  y = column_or_1d(y, warn=True)



MLP Regressor training tenderness complete.
  → Final loss: 2.2959
  → Iterations: 56

Starting MLP Regressor fear training...


  y = column_or_1d(y, warn=True)



MLP Regressor training fear complete.
  → Final loss: 1.8425
  → Iterations: 137


--------------------------------------------------------------------------------

Data collection for PDisc
Starting MLP Regressor happiness training...


  y = column_or_1d(y, warn=True)



MLP Regressor training happiness complete.
  → Final loss: 1.8824
  → Iterations: 74

Starting MLP Regressor sadness training...


  y = column_or_1d(y, warn=True)



MLP Regressor training sadness complete.
  → Final loss: 2.5125
  → Iterations: 61

Starting MLP Regressor anger training...


  y = column_or_1d(y, warn=True)



MLP Regressor training anger complete.
  → Final loss: 2.0753
  → Iterations: 71

Starting MLP Regressor tenderness training...


  y = column_or_1d(y, warn=True)



MLP Regressor training tenderness complete.
  → Final loss: 2.3384
  → Iterations: 82

Starting MLP Regressor fear training...


  y = column_or_1d(y, warn=True)



MLP Regressor training fear complete.
  → Final loss: 2.3160
  → Iterations: 82


--------------------------------------------------------------------------------

Data collection for Dim
Starting MLP Regressor positive training...


  y = column_or_1d(y, warn=True)



MLP Regressor training positive complete.
  → Final loss: 0.0573
  → Iterations: 132

Starting MLP Regressor relaxed training...


  y = column_or_1d(y, warn=True)



MLP Regressor training relaxed complete.
  → Final loss: 0.0205
  → Iterations: 235

Starting MLP Regressor awake training...


  y = column_or_1d(y, warn=True)



MLP Regressor training awake complete.
  → Final loss: 0.0601
  → Iterations: 103

Starting MLP Regressor like training...


  y = column_or_1d(y, warn=True)



MLP Regressor training like complete.
  → Final loss: 0.0523
  → Iterations: 137


--------------------------------------------------------------------------------

Data collection for Disc
Starting MLP Regressor happiness training...


  y = column_or_1d(y, warn=True)



MLP Regressor training happiness complete.
  → Final loss: 0.0199
  → Iterations: 110

Starting MLP Regressor sadness training...

MLP Regressor training sadness complete.
  → Final loss: 0.0284
  → Iterations: 84

Starting MLP Regressor anger training...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



MLP Regressor training anger complete.
  → Final loss: 0.0011
  → Iterations: 328

Starting MLP Regressor tenderness training...


  y = column_or_1d(y, warn=True)



MLP Regressor training tenderness complete.
  → Final loss: 0.0017
  → Iterations: 303

Starting MLP Regressor fear training...


  y = column_or_1d(y, warn=True)



MLP Regressor training fear complete.
  → Final loss: 0.0004
  → Iterations: 496



# Evaluate

In [19]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import pearsonr
import numpy as np

def Evaluate(mlp_regressor, X_test, y_test, parameter):
  # Predict and reshape if needed
  y_pred = mlp_regressor.predict(X_test)
  if y_pred.ndim > 1:
      y_pred= y_pred.flatten()
  if y_test.ndim > 1:
      y_test = y_test.flatten()

  # Evaluation Metrics
  mae = mean_absolute_error(y_test, y_pred)
  rmse = np.sqrt(mean_squared_error(y_test, y_pred))
  r2 = r2_score(y_test, y_pred)


  # Pearson Correlation
  if np.std(y_test) > 1e-6 and np.std(y_pred) > 1e-6:
      corr, _ = pearsonr(y_test, y_pred)
  else:
      print("Pearson Correlation: Cannot calculate (insufficient variance)")


  #Mean Absolute Percentage Error
  y_test_safe = np.clip(y_test, a_min=1e-6, a_max=None)
  mape = np.mean(np.mean(np.abs((y_test - y_pred) / y_test_safe)) * 100)

  return  mae, corr, mape, rmse, r2

In [20]:
def format4(x):
    return "{:.4f}".format(float(x))

def format2(x):
    return "{:.2f}".format(float(x))

def format_tuple(t):
    return ", ".join(format4(x) for x in t)

def DisplayDimResults(Type, mlp_positive, X_test_positive, y_test_positive, mlp_relaxed, X_test_relaxed, y_test_relaxed, mlp_awake, X_test_awake, y_test_awake, mlp_like, X_test_like, y_test_like):
  mae_positive, corr_positive, mape_positive, rmse_positive, r2_positive = Evaluate(mlp_positive, X_test_positive, y_test_positive, 'positive')
  mae_relaxed, corr_relaxed, mape_relaxed, rmse_relaxed, r2_relaxed = Evaluate(mlp_relaxed, X_test_relaxed, y_test_relaxed, 'relaxed')
  mae_awake, corr_awake, mape_awake, rmse_awake, r2_awake = Evaluate(mlp_awake, X_test_awake, y_test_awake, 'awake')
  mae_like, corr_like, mape_like, rmse_like, r2_like = Evaluate(mlp_like, X_test_like, y_test_like, 'like')

  mae = np.mean([mae_positive, mae_relaxed, mae_awake, mae_like])
  corr = np.mean([corr_positive, corr_relaxed, corr_awake, corr_like])
  mape = np.mean([mape_positive, mape_relaxed, mape_awake, mape_like])
  rmse = np.mean([rmse_positive, rmse_relaxed, rmse_awake, rmse_like])
  r2 = np.mean([r2_positive, r2_relaxed, r2_awake, r2_like])

  Dim_rows = [
    ("MAE", format4(mae)),
    ("Pearson Correlation Coefficient (Valence, Tension, Energy, Like)",
    format_tuple([corr_positive, corr_relaxed, corr_awake, corr_like])),
    ("Average Correlation", format4(corr)),
    ("MAPE", f"{format2(mape)}%"),
    ("RMSE", format4(rmse)),
    ("R-Squared Scores (Valence, Tension, Energy, Like)",
    format_tuple([r2_positive, r2_relaxed, r2_awake, r2_like])),
    ("R-Squared Average", format4(r2)),
  ]
  Dim_df = pd.DataFrame(Dim_rows, columns=[f"{Type}Dim", "Statistics"])
  # Display with column borders, no row index
  style = [{"selector": "td, th", "props": [("border", "1px solid gray")]}]
  display(Dim_df.style.hide(axis="index").set_table_styles(style))

def DisplayDiscResults(Type, mlp_happiness, X_test_happiness, y_test_happiness, mlp_sadness, X_test_sadness, y_test_sadness, mlp_anger, X_test_anger, y_test_anger, mlp_tenderness, X_test_tenderness, y_test_tenderness, mlp_fear, X_test_fear, y_test_fear):
  mae_happiness, corr_happiness, mape_happiness, rmse_happiness, r2_happiness = Evaluate(mlp_happiness, X_test_happiness, y_test_happiness, 'happiness')
  mae_sadness, corr_sadness, mape_sadness, rmse_sadness, r2_sadness = Evaluate(mlp_sadness, X_test_sadness, y_test_sadness, 'sadness')
  mae_anger, corr_anger, mape_anger, rmse_anger, r2_anger = Evaluate(mlp_anger, X_test_anger, y_test_anger, 'anger')
  mae_tenderness, corr_tenderness, mape_tenderness, rmse_tenderness, r2_tenderness = Evaluate(mlp_tenderness, X_test_tenderness, y_test_tenderness, 'tenderness')
  mae_fear, corr_fear, mape_fear, rmse_fear, r2_fear = Evaluate(mlp_fear, X_test_fear, y_test_fear, 'fear')

  mae = np.mean([mae_happiness, mae_sadness, mae_anger, mae_tenderness, mae_fear])
  corr = np.mean([corr_happiness, corr_sadness, corr_anger, corr_tenderness, corr_fear])
  mape = np.mean([mape_happiness, mape_sadness, mape_anger, mape_tenderness, mape_fear])
  rmse = np.mean([rmse_happiness, rmse_sadness, rmse_anger, rmse_tenderness, rmse_fear])
  r2 = np.mean([r2_happiness, r2_sadness, r2_anger, r2_tenderness, r2_fear])

  Disc_rows = [
    ("MAE", format4(mae)),
    ("Pearson Correlation Coefficient (Happiness, Sadness, Anger, Tenderness, Fear)",
    format_tuple([corr_happiness, corr_sadness, corr_anger, corr_tenderness, corr_fear])),
    ("Average Correlation", format4(corr)),
    ("MAPE", f"{format2(mape)}%"),
    ("RMSE", format4(rmse)),
    ("R-Squared Scores (Happiness, Sadness, Anger, Tenderness, Fear)",
    format_tuple([r2_happiness, r2_sadness, r2_anger, r2_tenderness, r2_fear])),
    ("R-Squared Average", format4(r2)),
  ]

  Disc_df = pd.DataFrame(Disc_rows, columns=[f"{Type}Disc", "Statistics"])
  # Display with column borders, no row index
  style = [{"selector": "td, th", "props": [("border", "1px solid gray")]}]
  display(Disc_df.style.hide(axis="index").set_table_styles(style))

In [21]:
DisplayDimResults("I", Imlp_positive, IX_test_positive, Iy_test_positive, Imlp_relaxed, IX_test_relaxed, Iy_test_relaxed, Imlp_awake, IX_test_awake, Iy_test_awake, Imlp_like, IX_test_like, Iy_test_like)
print("\n" + "-"*80 + "\n")
DisplayDimResults("P", Pmlp_positive, PX_test_positive, Py_test_positive, Pmlp_relaxed, PX_test_relaxed, Py_test_relaxed, Pmlp_awake, PX_test_awake, Py_test_awake, Pmlp_like, PX_test_like, Py_test_like)
print("\n" + "-"*80 + "\n")
DisplayDiscResults("I", Imlp_happiness, IX_test_happiness, Iy_test_happiness, Imlp_sadness, IX_test_sadness, Iy_test_sadness, Imlp_anger, IX_test_anger, Iy_test_anger, Imlp_tenderness, IX_test_tenderness, Iy_test_tenderness, Imlp_fear, IX_test_fear, Iy_test_fear)
print("\n" + "-"*80 + "\n")
DisplayDiscResults("P", Pmlp_happiness, PX_test_happiness, Py_test_happiness, Pmlp_sadness, PX_test_sadness, Py_test_sadness, Pmlp_anger, PX_test_anger, Py_test_anger, Pmlp_tenderness, PX_test_tenderness, Py_test_tenderness, Pmlp_fear, PX_test_fear, Py_test_fear)
print("\n" + "-"*80 + "\n")
DisplayDimResults("", mlp_positive, X_test_positive, y_test_positive, mlp_relaxed, X_test_relaxed, y_test_relaxed, mlp_awake, X_test_awake, y_test_awake, mlp_like, X_test_like, y_test_like)
print("\n" + "-"*80 + "\n")
DisplayDiscResults("", mlp_happiness, X_test_happiness, y_test_happiness, mlp_sadness, X_test_sadness, y_test_sadness, mlp_anger, X_test_anger, y_test_anger, mlp_tenderness, X_test_tenderness, y_test_tenderness, mlp_fear, X_test_fear, y_test_fear)

IDim,Statistics
MAE,1.3838
"Pearson Correlation Coefficient (Valence, Tension, Energy, Like)","0.5559, 0.6108, 0.2576, 0.5703"
Average Correlation,0.4987
MAPE,42.33%
RMSE,1.7388
"R-Squared Scores (Valence, Tension, Energy, Like)","0.3086, 0.3719, 0.0546, 0.3252"
R-Squared Average,0.2651



--------------------------------------------------------------------------------



PDim,Statistics
MAE,1.4774
"Pearson Correlation Coefficient (Valence, Tension, Energy, Like)","0.5596, 0.5174, 0.4509, 0.5791"
Average Correlation,0.5268
MAPE,45.39%
RMSE,1.8366
"R-Squared Scores (Valence, Tension, Energy, Like)","0.3111, 0.2641, 0.2021, 0.3353"
R-Squared Average,0.2781



--------------------------------------------------------------------------------



IDisc,Statistics
MAE,1.5888
"Pearson Correlation Coefficient (Happiness, Sadness, Anger, Tenderness, Fear)","0.4016, 0.2175, 0.4880, 0.2667, 0.3231"
Average Correlation,0.3394
MAPE,82.44%
RMSE,2.0346
"R-Squared Scores (Happiness, Sadness, Anger, Tenderness, Fear)","0.1612, 0.0427, 0.2289, 0.0705, 0.0989"
R-Squared Average,0.1205



--------------------------------------------------------------------------------



PDisc,Statistics
MAE,1.7996
"Pearson Correlation Coefficient (Happiness, Sadness, Anger, Tenderness, Fear)","0.4128, 0.3628, 0.4230, 0.3342, 0.4076"
Average Correlation,0.3881
MAPE,88.25%
RMSE,2.1917
"R-Squared Scores (Happiness, Sadness, Anger, Tenderness, Fear)","0.1589, 0.1297, 0.1655, 0.1080, 0.1605"
R-Squared Average,0.1445



--------------------------------------------------------------------------------



Dim,Statistics
MAE,0.5212
"Pearson Correlation Coefficient (Valence, Tension, Energy, Like)","0.9355, 0.9282, 0.3446, 0.9299"
Average Correlation,0.7845
MAPE,11.25%
RMSE,0.6575
"R-Squared Scores (Valence, Tension, Energy, Like)","0.8253, 0.8103, -0.1352, 0.7651"
R-Squared Average,0.5664



--------------------------------------------------------------------------------



Disc,Statistics
MAE,0.3549
"Pearson Correlation Coefficient (Happiness, Sadness, Anger, Tenderness, Fear)","0.8877, 0.8546, 0.9354, 0.9180, 0.9254"
Average Correlation,0.9042
MAPE,11.89%
RMSE,0.4698
"R-Squared Scores (Happiness, Sadness, Anger, Tenderness, Fear)","0.7013, 0.6903, 0.8249, 0.7476, 0.7177"
R-Squared Average,0.7364
