In [1]:
from transformers import ClapModel, AutoProcessor
import torch
import librosa
import os
import pandas as pd
import numpy as np

In [2]:
# Load CLAP model + processor
model = ClapModel.from_pretrained("laion/larger_clap_music_and_speech")
processor = AutoProcessor.from_pretrained("laion/larger_clap_music_and_speech")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/635 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/776M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/776M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

# Process audio

In [3]:
import zipfile
import os

zip_files = ["Exp1.zip", "Exp2.zip", "Analysis.zip"]

extract_dir = "/content/" # You can change this if you want to extract elsewhere
os.makedirs(extract_dir, exist_ok=True)

for zip_file in zip_files:
    if os.path.exists(zip_file):
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall(extract_dir)
        print(f"Extracted {zip_file} to {extract_dir}")
    else:
        print(f"Error: {zip_file} not found.")

Extracted Exp1.zip to /content/
Extracted Exp2.zip to /content/
Extracted Analysis.zip to /content/


In [4]:
audio_stimuli = []
stimuli_path = "/content/Exp1/Stimuli/"

for file in sorted(os.listdir(stimuli_path)):
    if file.endswith(".wav"):
        wav_path = os.path.join(stimuli_path, file)
        #Clap has already been trained on a sample rate of 48,000 so we should use what it knows already
        audio, sample_rate = librosa.load(wav_path, sr=48000)
        audio_stimuli.append(audio)

In [5]:
inputs = processor(audios=audio_stimuli, return_tensors="pt", padding=True, sampling_rate=48000)
audio_embeddings = model.get_audio_features(**inputs)

In [6]:
print(audio_embeddings.shape)

torch.Size([59, 512])


# Process text

In [7]:
discrete_tags = ["happiness", "sadness", "anger", "tenderness", "fear"]

discrete_captions_perceived = ["I perceive this sound as " + tag for tag in discrete_tags]
print(discrete_captions_perceived)
discrete_captions_induced = ["This sound makes me feel " + tag for tag in discrete_tags]
print(discrete_captions_induced)

dimensional_tags = ["positive", "relaxed", "awake"]

dimensional_captions_perceived = ["I perceive this sound as " + tag for tag in dimensional_tags]
print(dimensional_captions_perceived)
dimensional_captions_induced = ["This sound makes me feel " + tag for tag in dimensional_tags]
print(dimensional_captions_induced)


['I perceive this sound as happiness', 'I perceive this sound as sadness', 'I perceive this sound as anger', 'I perceive this sound as tenderness', 'I perceive this sound as fear']
['This sound makes me feel happiness', 'This sound makes me feel sadness', 'This sound makes me feel anger', 'This sound makes me feel tenderness', 'This sound makes me feel fear']
['I perceive this sound as positive', 'I perceive this sound as relaxed', 'I perceive this sound as awake']
['This sound makes me feel positive', 'This sound makes me feel relaxed', 'This sound makes me feel awake']


In [8]:
all_tags = discrete_captions_perceived + discrete_captions_induced + dimensional_captions_perceived + dimensional_captions_induced

tag_inputs = processor(text=dimensional_captions_induced, return_tensors="pt", padding=True)
tag_embeds = model.get_text_features(**tag_inputs)

In [9]:
tag_inputs_disc_i = processor(text=discrete_captions_induced, return_tensors="pt", padding=True)
tag_embeds_disc_i = model.get_text_features(**tag_inputs_disc_i)

In [10]:
tag_inputs_dim_p = processor(text=dimensional_captions_perceived, return_tensors="pt", padding=True)
tag_embeds_dim_p = model.get_text_features(**tag_inputs_dim_p)

In [11]:
tag_inputs_disc_p = processor(text=discrete_captions_perceived, return_tensors="pt", padding=True)
tag_embeds_disc_p = model.get_text_features(**tag_inputs_disc_p)

## Load csv files and extract related columns

In [13]:
IDim_path = '/content/Exp1/Data/IDim/'
IDim_response_dfs = []

for file in sorted(os.listdir(IDim_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(IDim_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ['positive', 'relaxed', 'awake']
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                IDim_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")


# Concatenate all individual DataFrames into one master DataFrame for human responses
if IDim_response_dfs:
    master_human_responses_df = pd.concat(IDim_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (3835, 3)

Master human responses (first 5 rows):
   positive  relaxed  awake
0      3.68     3.78   4.42
1      5.88     5.98   3.89
2      6.53     5.59   6.59
3      6.26     5.71   6.88
4      2.80     2.62   5.15



In [14]:
PDim_path = '/content/Exp1/Data/PDim/'
PDim_response_dfs = []

for file in sorted(os.listdir(PDim_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(PDim_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ['positive', 'relaxed', 'awake']
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                PDim_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")


# Concatenate all individual DataFrames into one master DataFrame for human responses
if PDim_response_dfs:
    master_human_responses_df_dim_p = pd.concat(PDim_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df_dim_p.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df_dim_p.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (3953, 3)

Master human responses (first 5 rows):
   positive  relaxed  awake
0      2.23     9.00   2.37
1      6.56     5.64   4.12
2      5.11     6.23   4.04
3      7.14     7.67   2.95
4      1.85     1.77   2.98



In [15]:
IDisc_path = '/content/Exp1/Data/IDisc/'
IDisc_response_dfs = []

for file in sorted(os.listdir(IDisc_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(IDisc_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ["happiness", "sadness", "anger", "tenderness", "fear"]
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                IDisc_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")


# Concatenate all individual DataFrames into one master DataFrame for human responses
if IDisc_response_dfs:
    master_human_responses_df_disc_i = pd.concat(IDisc_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df_disc_i.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df_disc_i.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (3894, 5)

Master human responses (first 5 rows):
   happiness  sadness  anger  tenderness  fear
0       1.00     6.97   5.01        9.00  7.89
1       1.00     6.00   5.80        6.78  1.00
2       1.00     2.42   5.99        6.44  1.09
3       3.92     6.13   2.25        5.96  1.22
4       1.99     7.92   6.82        5.83  1.39



In [16]:
PDisc_path = '/content/Exp1/Data/PDisc/'
PDisc_response_dfs = []

for file in sorted(os.listdir(PDisc_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(PDisc_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ["happiness", "sadness", "anger", "tenderness", "fear"]
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                PDisc_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")


# Concatenate all individual DataFrames into one master DataFrame for human responses
if PDisc_response_dfs:
    master_human_responses_df_disc_p = pd.concat(PDisc_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df_disc_p.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df_disc_p.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (3835, 5)

Master human responses (first 5 rows):
   happiness  sadness  anger  tenderness  fear
0       1.32     1.30   1.27        1.25  3.18
1       1.00     1.44   1.76        1.00  1.24
2       1.00     5.87   1.23        1.28  1.56
3       1.92     3.36   4.82        1.20  2.32
4       1.23     2.31   7.10        1.61  2.86



# Prepare features X and targets y

In [17]:
from sklearn.model_selection import train_test_split


num_participants = len(IDim_response_dfs)
if master_human_responses_df.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

X_list = []
for _ in range(num_participants):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X = np.array(X_list)

# Extract y from the concatenated DataFrame
y = master_human_responses_df[['positive', 'relaxed', 'awake']].values

print(f"Shape of X (features) after implicit alignment: {X.shape}")
print(f"Shape of y (labels) after implicit alignment: {y.shape}\n")

# Sanity check: X and y must have the same number of rows
if X.shape[0] != y.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"Training set size (X_train, y_train): {X_train.shape}, {y_train.shape}")
print(f"Testing set size (X_test, y_test): {X_test.shape}, {y_test.shape}\n")

Shape of X (features) after implicit alignment: (3835, 512)
Shape of y (labels) after implicit alignment: (3835, 3)

Training set size (X_train, y_train): (3068, 512), (3068, 3)
Testing set size (X_test, y_test): (767, 512), (767, 3)



In [18]:
from sklearn.model_selection import train_test_split

num_participants_dim_p = len(PDim_response_dfs)
if master_human_responses_df_dim_p.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

X_list = []
for _ in range(num_participants_dim_p):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X_dim_p = np.array(X_list)

# Extract y from the concatenated DataFrame
y_dim_p = master_human_responses_df_dim_p[['positive', 'relaxed', 'awake']].values

print(f"Shape of X (features) after implicit alignment: {X_dim_p.shape}")
print(f"Shape of y (labels) after implicit alignment: {y_dim_p.shape}\n")

# Sanity check: X and y must have the same number of rows
if X_dim_p.shape[0] != y_dim_p.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
X_train_dim_p, X_test_dim_p, y_train_dim_p, y_test_dim_p = train_test_split(
    X_dim_p, y_dim_p, test_size=0.2, random_state=42
)

print(f"Training set size (X_train, y_train): {X_dim_p.shape}, {y_train_dim_p.shape}")
print(f"Testing set size (X_test, y_test): {X_test_dim_p.shape}, {y_test_dim_p.shape}\n")

Shape of X (features) after implicit alignment: (3953, 512)
Shape of y (labels) after implicit alignment: (3953, 3)

Training set size (X_train, y_train): (3953, 512), (3162, 3)
Testing set size (X_test, y_test): (791, 512), (791, 3)



In [19]:
from sklearn.model_selection import train_test_split

num_participants_disc_i = len(IDisc_response_dfs)
if master_human_responses_df_disc_i.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

X_list = []
for _ in range(num_participants_disc_i):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X_disc_i = np.array(X_list)

# Extract y from the concatenated DataFrame
y_disc_i = master_human_responses_df_disc_i[["happiness", "sadness", "anger", "tenderness", "fear"]].values

print(f"Shape of X (features) after implicit alignment: {X_disc_i.shape}")
print(f"Shape of y (labels) after implicit alignment: {y_disc_i.shape}\n")

# Sanity check: X and y must have the same number of rows
if X_disc_i.shape[0] != y_disc_i.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
X_train_disc_i, X_test_disc_i, y_train_disc_i, y_test_disc_i = train_test_split(
    X_disc_i, y_disc_i, test_size=0.2, random_state=42
)

print(f"Training set size (X_train, y_train): {X_disc_i.shape}, {y_train_disc_i.shape}")
print(f"Testing set size (X_test, y_test): {X_test_disc_i.shape}, {y_test_disc_i.shape}\n")

Shape of X (features) after implicit alignment: (3894, 512)
Shape of y (labels) after implicit alignment: (3894, 5)

Training set size (X_train, y_train): (3894, 512), (3115, 5)
Testing set size (X_test, y_test): (779, 512), (779, 5)



In [20]:
from sklearn.model_selection import train_test_split

num_participants_disc_p = len(PDisc_response_dfs)
if master_human_responses_df_disc_p.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

X_list = []
for _ in range(num_participants_disc_p):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X_disc_p = np.array(X_list)

# Extract y from the concatenated DataFrame
y_disc_p = master_human_responses_df_disc_p[["happiness", "sadness", "anger", "tenderness", "fear"]].values

print(f"Shape of X (features) after implicit alignment: {X_disc_p.shape}")
print(f"Shape of y (labels) after implicit alignment: {y_disc_p.shape}\n")

# Sanity check: X and y must have the same number of rows
if X_disc_p.shape[0] != y_disc_p.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
X_train_disc_p, X_test_disc_p, y_train_disc_p, y_test_disc_p = train_test_split(
    X_disc_p, y_disc_p, test_size=0.2, random_state=42
)

print(f"Training set size (X_train, y_train): {X_disc_p.shape}, {y_train_disc_p.shape}")
print(f"Testing set size (X_test, y_test): {X_test_disc_p.shape}, {y_test_disc_p.shape}\n")

Shape of X (features) after implicit alignment: (3835, 512)
Shape of y (labels) after implicit alignment: (3835, 5)

Training set size (X_train, y_train): (3835, 512), (3068, 5)
Testing set size (X_test, y_test): (767, 512), (767, 5)



# Train regression head (=MLP, a few projection layers)

In [21]:
from sklearn.neural_network import MLPRegressor

mlp_regressor = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor.fit(X_train, y_train)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 13.68906455
Validation score: -4.831260
Iteration 2, loss = 9.83946168
Validation score: -2.425618
Iteration 3, loss = 4.73465400
Validation score: -0.368227
Iteration 4, loss = 2.26078164
Validation score: -0.003795
Iteration 5, loss = 1.82134018
Validation score: 0.135535
Iteration 6, loss = 1.70161423
Validation score: 0.173879
Iteration 7, loss = 1.63992036
Validation score: 0.189991
Iteration 8, loss = 1.60110944
Validation score: 0.199207
Iteration 9, loss = 1.57957796
Validation score: 0.200993
Iteration 10, loss = 1.56349974
Validation score: 0.202945
Iteration 11, loss = 1.55099120
Validation score: 0.205171
Iteration 12, loss = 1.54031398
Validation score: 0.204983
Iteration 13, loss = 1.53214779
Validation score: 0.204596
Iteration 14, loss = 1.52387270
Validation score: 0.206138
Iteration 15, loss = 1.51690921
Validation score: 0.206186
Iteration 16, loss = 1.51262390
Validation score: 0.205948
Iteration 17, loss = 1.50

In [22]:
from sklearn.neural_network import MLPRegressor

mlp_regressor_dim_p = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor_dim_p.fit(X_train_dim_p, y_train_dim_p)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 13.29651569
Validation score: -3.822067
Iteration 2, loss = 9.15599585
Validation score: -1.550501
Iteration 3, loss = 4.13765814
Validation score: -0.167171
Iteration 4, loss = 2.31053973
Validation score: 0.114527
Iteration 5, loss = 1.92286767
Validation score: 0.180841
Iteration 6, loss = 1.82775863
Validation score: 0.217558
Iteration 7, loss = 1.77224910
Validation score: 0.236532
Iteration 8, loss = 1.74008453
Validation score: 0.244478
Iteration 9, loss = 1.71494031
Validation score: 0.249819
Iteration 10, loss = 1.70113811
Validation score: 0.252221
Iteration 11, loss = 1.68950966
Validation score: 0.254274
Iteration 12, loss = 1.67910325
Validation score: 0.255392
Iteration 13, loss = 1.66955134
Validation score: 0.258016
Iteration 14, loss = 1.66572256
Validation score: 0.261545
Iteration 15, loss = 1.65797840
Validation score: 0.262881
Iteration 16, loss = 1.65124472
Validation score: 0.264680
Iteration 17, loss = 1.646

In [23]:
from sklearn.neural_network import MLPRegressor

mlp_regressor_disc_i = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor_disc_i.fit(X_train_disc_i, y_train_disc_i)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 4.85298300
Validation score: -0.664445
Iteration 2, loss = 3.17610768
Validation score: -0.056651
Iteration 3, loss = 2.35837605
Validation score: 0.011791
Iteration 4, loss = 2.26199389
Validation score: 0.049503
Iteration 5, loss = 2.19379551
Validation score: 0.059949
Iteration 6, loss = 2.15225022
Validation score: 0.076332
Iteration 7, loss = 2.11703625
Validation score: 0.085135
Iteration 8, loss = 2.10640310
Validation score: 0.090544
Iteration 9, loss = 2.09623319
Validation score: 0.093481
Iteration 10, loss = 2.08039104
Validation score: 0.095981
Iteration 11, loss = 2.07408082
Validation score: 0.096367
Iteration 12, loss = 2.06570605
Validation score: 0.099986
Iteration 13, loss = 2.05693983
Validation score: 0.101610
Iteration 14, loss = 2.05225393
Validation score: 0.102312
Iteration 15, loss = 2.04492254
Validation score: 0.103174
Iteration 16, loss = 2.04205637
Validation score: 0.103249
Iteration 17, loss = 2.04271

In [24]:
from sklearn.neural_network import MLPRegressor

mlp_regressor_disc_p = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor_disc_p.fit(X_train_disc_p, y_train_disc_p)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 6.95472112
Validation score: -1.092302
Iteration 2, loss = 4.81698396
Validation score: -0.250312
Iteration 3, loss = 3.06460751
Validation score: 0.016783
Iteration 4, loss = 2.71255594
Validation score: 0.056093
Iteration 5, loss = 2.59072169
Validation score: 0.093906
Iteration 6, loss = 2.50906781
Validation score: 0.118181
Iteration 7, loss = 2.45767942
Validation score: 0.133673
Iteration 8, loss = 2.42123180
Validation score: 0.146897
Iteration 9, loss = 2.38853040
Validation score: 0.153862
Iteration 10, loss = 2.36086373
Validation score: 0.164638
Iteration 11, loss = 2.34177864
Validation score: 0.166421
Iteration 12, loss = 2.32466442
Validation score: 0.173175
Iteration 13, loss = 2.31219982
Validation score: 0.174849
Iteration 14, loss = 2.30231923
Validation score: 0.177161
Iteration 15, loss = 2.29452250
Validation score: 0.178580
Iteration 16, loss = 2.29129819
Validation score: 0.179430
Iteration 17, loss = 2.28962

# Evaluate

In [25]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred = mlp_regressor.predict(X_test)

print(f"\nShape of predictions (y_pred): {y_pred.shape}")
print(f"First 5 actual values (y_test):\n{y_test[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error = np.abs((y_test - y_pred) / y_test) * 100
mape = np.mean(absolute_percentage_error)
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%\n")

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae:.4f}")

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['Positive', 'Relaxed', 'Awake']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test[:, i]) > 1e-6 and np.std(y_pred[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test[:, i], y_pred[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test.shape[1]):
    if np.std(y_test[:, i]) > 1e-6 and np.std(y_pred[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test[:, i], y_pred[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
else:
    print("  No correlations could be calculated for averaging.")

from sklearn.metrics import r2_score

# R-squared
print("\nR-squared scores:")
r2_valence = r2_score(y_test[:, 0], y_pred[:, 0])
print("  valence =", r2_valence)

r2_tension = r2_score(y_test[:, 1], y_pred[:, 1])
print("  tension =", r2_tension)

r2_energy = r2_score(y_test[:, 2], y_pred[:, 2])
print("  energy =", r2_energy)



Shape of predictions (y_pred): (767, 3)
First 5 actual values (y_test):
[[3.96 8.32 7.05]
 [7.11 7.01 8.11]
 [4.68 5.04 5.88]
 [2.08 4.04 7.6 ]
 [6.34 6.43 5.32]]
First 5 predicted values (y_pred):
[[5.3356724 5.56115   5.0585966]
 [4.59401   4.912153  4.644649 ]
 [3.4659464 2.873736  6.4419518]
 [2.9004986 2.1290884 7.2228007]
 [4.947168  4.594723  6.2508817]]

Mean Absolute Percentage Error (MAPE): 40.13%

Mean Absolute Error (MAE): 1.3746
Root Mean Squared Error (RMSE): 1.7348

Pearson Correlation Coefficients (per dimension):
  Positive Dimension: 0.5566
  Relaxed Dimension: 0.6125
  Awake Dimension: 0.2533
  Average Pearson Correlation across dimensions: 0.4741

R-squared scores:
  valence = 0.3065415430014429
  tension = 0.3699566074085471
  energy = 0.055864487744964775


In [26]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred_dim_p = mlp_regressor_dim_p.predict(X_test_dim_p)

print(f"\nShape of predictions (y_pred): {y_pred_dim_p.shape}")
print(f"First 5 actual values (y_test):\n{y_test_dim_p[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred_dim_p[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error = np.abs((y_test_dim_p - y_pred_dim_p) / y_test_dim_p) * 100
mape_dim_p = np.mean(absolute_percentage_error)
print(f"Mean Absolute Percentage Error (MAPE): {mape_dim_p:.2f}%\n")

# Mean Absolute Error (MAE)
mae_dim_p = mean_absolute_error(y_test_dim_p, y_pred_dim_p)
print(f"Mean Absolute Error (MAE): {mae_dim_p:.4f}")

# Root Mean Squared Error (RMSE)
rmse_dim_p = np.sqrt(mean_squared_error(y_test_dim_p, y_pred_dim_p))
print(f"Root Mean Squared Error (RMSE): {rmse_dim_p:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['Positive', 'Relaxed', 'Awake']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test_dim_p[:, i]) > 1e-6 and np.std(y_pred_dim_p[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test_dim_p[:, i], y_pred_dim_p[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test_dim_p.shape[1]):
    if np.std(y_test_dim_p[:, i]) > 1e-6 and np.std(y_pred_dim_p[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test_dim_p[:, i], y_pred_dim_p[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
else:
    print("  No correlations could be calculated for averaging.")

from sklearn.metrics import r2_score

# R-squared
print("\nR-squared scores:")
r2_valence_dim_p = r2_score(y_test_dim_p[:, 0], y_pred_dim_p[:, 0])
print("  valence =", r2_valence_dim_p)

r2_tension_dim_p = r2_score(y_test_dim_p[:, 1], y_pred_dim_p[:, 1])
print("  tension =", r2_tension_dim_p)

r2_energy_dim_p = r2_score(y_test_dim_p[:, 2], y_pred_dim_p[:, 2])
print("  energy =", r2_energy_dim_p)



Shape of predictions (y_pred): (791, 3)
First 5 actual values (y_test):
[[3.16 4.31 2.02]
 [8.86 8.86 8.78]
 [8.14 5.35 3.35]
 [5.02 5.01 5.99]
 [5.04 4.31 6.31]]
First 5 predicted values (y_pred):
[[4.640198  3.8658514 6.338495 ]
 [6.8048773 5.7487364 6.2818694]
 [4.502307  3.3811917 6.7949224]
 [4.9354835 4.696111  5.584033 ]
 [3.069755  3.35467   4.034505 ]]

Mean Absolute Percentage Error (MAPE): 46.12%

Mean Absolute Error (MAE): 1.4803
Root Mean Squared Error (RMSE): 1.8518

Pearson Correlation Coefficients (per dimension):
  Positive Dimension: 0.5648
  Relaxed Dimension: 0.5276
  Awake Dimension: 0.4417
  Average Pearson Correlation across dimensions: 0.5114

R-squared scores:
  valence = 0.3185251918150134
  tension = 0.27695789333761633
  energy = 0.19278185488977184


In [27]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred_disc_i = mlp_regressor_disc_i.predict(X_test_disc_i)

print(f"\nShape of predictions (y_pred): {y_pred_disc_i.shape}")
print(f"First 5 actual values (y_test):\n{y_test_disc_i[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred_disc_i[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error_disc_i = np.abs((y_test_disc_i - y_pred_disc_i) / y_test_disc_i) * 100
mape_disc_i = np.mean(absolute_percentage_error_disc_i)
print(f"Mean Absolute Percentage Error (MAPE): {mape_disc_i:.2f}%\n")

# Mean Absolute Error (MAE)
mae_disc_i = mean_absolute_error(y_test_disc_i, y_pred_disc_i)
print(f"Mean Absolute Error (MAE): {mae_disc_i:.4f}")

# Root Mean Squared Error (RMSE)
rmse_disc_i = np.sqrt(mean_squared_error(y_test_disc_i, y_pred_disc_i))
print(f"Root Mean Squared Error (RMSE): {rmse_disc_i:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['Happiness', 'Sadness', 'Anger', 'Tenderness', 'Fear']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test_disc_i[:, i]) > 1e-6 and np.std(y_pred_disc_i[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test_disc_i[:, i], y_pred_disc_i[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test_disc_i.shape[1]):
    if np.std(y_test_disc_i[:, i]) > 1e-6 and np.std(y_pred_disc_i[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test_disc_i[:, i], y_pred_disc_i[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
else:
    print("  No correlations could be calculated for averaging.")

from sklearn.metrics import r2_score

# R-squared IDisc
print("\nR-squared scores (IDisc):")
r2_happiness_disc_i = r2_score(y_test_disc_i[:, 0], y_pred_disc_i[:, 0])
print("  happiness =", r2_happiness_disc_i)
r2_sadness_disc_i = r2_score(y_test_disc_i[:, 1], y_pred_disc_i[:, 1])
print("  sadness =", r2_sadness_disc_i)
r2_anger_disc_i = r2_score(y_test_disc_i[:, 2], y_pred_disc_i[:, 2])
print("  anger =", r2_anger_disc_i)
r2_tenderness_disc_i = r2_score(y_test_disc_i[:, 3], y_pred_disc_i[:, 3])
print("  tenderness =", r2_tenderness_disc_i)
r2_fear_disc_i = r2_score(y_test_disc_i[:, 4], y_pred_disc_i[:, 4])
print("  fear =", r2_fear_disc_i)



Shape of predictions (y_pred): (779, 5)
First 5 actual values (y_test):
[[1.   1.   1.   1.   1.  ]
 [4.23 6.82 5.75 5.13 5.69]
 [1.28 1.24 1.15 1.18 1.23]
 [1.   1.   2.4  1.   1.54]
 [1.36 4.73 1.07 3.37 1.94]]
First 5 predicted values (y_pred):
[[1.8501853 3.0874586 3.2099712 2.557175  2.5936894]
 [1.6310865 2.5259123 4.554597  1.9569632 2.8771925]
 [2.4532912 2.3176894 1.7827998 2.0493135 2.960183 ]
 [1.6570232 2.7581933 5.517244  2.1639774 3.6358066]
 [2.0829694 2.780674  2.4351175 2.5725403 1.929653 ]]

Mean Absolute Percentage Error (MAPE): 80.92%

Mean Absolute Error (MAE): 1.5825
Root Mean Squared Error (RMSE): 2.0329

Pearson Correlation Coefficients (per dimension):
  Happiness Dimension: 0.3999
  Sadness Dimension: 0.2123
  Anger Dimension: 0.4937
  Tenderness Dimension: 0.2754
  Fear Dimension: 0.3168
  Average Pearson Correlation across dimensions: 0.3396

R-squared scores (IDisc):
  happiness = 0.15631216968167538
  sadness = 0.040887521923138825
  anger = 0.24369817889

In [28]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred_disc_p = mlp_regressor_disc_p.predict(X_test_disc_p)

print(f"\nShape of predictions (y_pred): {y_pred_disc_p.shape}")
print(f"First 5 actual values (y_test):\n{y_test_disc_p[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred_disc_p[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error_disc_p = np.abs((y_test_disc_p - y_pred_disc_p) / y_test_disc_p) * 100
mape_disc_p = np.mean(absolute_percentage_error_disc_p)
print(f"Mean Absolute Percentage Error (MAPE): {mape_disc_p:.2f}%\n")

# Mean Absolute Error (MAE)
mae_disc_p = mean_absolute_error(y_test_disc_p, y_pred_disc_p)
print(f"Mean Absolute Error (MAE): {mae_disc_p:.4f}")

# Root Mean Squared Error (RMSE)
rmse_disc_p = np.sqrt(mean_squared_error(y_test_disc_p, y_pred_disc_p))
print(f"Root Mean Squared Error (RMSE): {rmse_disc_p:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['Happiness', 'Sadness', 'Anger', 'Tenderness', 'Fear']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test_disc_p[:, i]) > 1e-6 and np.std(y_pred_disc_p[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test_disc_p[:, i], y_pred_disc_p[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test_disc_p.shape[1]):
    if np.std(y_test_disc_p[:, i]) > 1e-6 and np.std(y_pred_disc_p[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test_disc_p[:, i], y_pred_disc_p[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
else:
    print("  No correlations could be calculated for averaging.")

from sklearn.metrics import r2_score

# R-squared PDisc
print("\nR-squared scores (PDisc):")
r2_happiness_disc_p = r2_score(y_test_disc_p[:, 0], y_pred_disc_p[:, 0])
print("  happiness =", r2_happiness_disc_p)
r2_sadness_disc_p = r2_score(y_test_disc_p[:, 1], y_pred_disc_p[:, 1])
print("  sadness =", r2_sadness_disc_p)
r2_anger_disc_p = r2_score(y_test_disc_p[:, 2], y_pred_disc_p[:, 2])
print("  anger =", r2_anger_disc_p)
r2_tenderness_disc_p = r2_score(y_test_disc_p[:, 3], y_pred_disc_p[:, 3])
print("  tenderness =", r2_tenderness_disc_p)
r2_fear_disc_p = r2_score(y_test_disc_p[:, 4], y_pred_disc_p[:, 4])
print("  fear =", r2_fear_disc_p)



Shape of predictions (y_pred): (767, 5)
First 5 actual values (y_test):
[[2.   6.04 1.86 1.01 1.01]
 [1.   6.06 1.   2.99 1.  ]
 [1.   2.01 4.02 1.   1.  ]
 [1.81 5.81 4.99 3.35 1.61]
 [1.26 3.29 2.45 1.26 6.45]]
First 5 predicted values (y_pred):
[[1.9534453 4.9007325 3.0334053 2.9748595 3.0762045]
 [1.3315947 4.5025153 4.360565  2.2526305 4.197736 ]
 [2.0934818 4.159861  4.037998  2.5864096 4.4016156]
 [2.0913436 3.828976  5.184698  2.2727654 5.476361 ]
 [3.029634  4.605835  2.3737216 3.690449  3.1222017]]

Mean Absolute Percentage Error (MAPE): 87.77%

Mean Absolute Error (MAE): 1.7958
Root Mean Squared Error (RMSE): 2.1943

Pearson Correlation Coefficients (per dimension):
  Happiness Dimension: 0.4143
  Sadness Dimension: 0.3715
  Anger Dimension: 0.4208
  Tenderness Dimension: 0.3239
  Fear Dimension: 0.4083
  Average Pearson Correlation across dimensions: 0.3878

R-squared scores (PDisc):
  happiness = 0.1610085772035228
  sadness = 0.137855013456473
  anger = 0.162922488410992