In [1]:
from transformers import ClapModel, AutoProcessor
import torch
import librosa
import os
import pandas as pd
import numpy as np

In [2]:
# Load CLAP model + processor
model = ClapModel.from_pretrained("laion/larger_clap_general")
processor = AutoProcessor.from_pretrained("laion/larger_clap_general")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/643 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/776M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/776M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

# Process audio

In [3]:
import zipfile
import os

zip_files = ["Exp1.zip", "Exp2.zip", "Analysis.zip"]

extract_dir = "/content/" # You can change this if you want to extract elsewhere
os.makedirs(extract_dir, exist_ok=True)

for zip_file in zip_files:
    if os.path.exists(zip_file):
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall(extract_dir)
        print(f"Extracted {zip_file} to {extract_dir}")
    else:
        print(f"Error: {zip_file} not found.")

Extracted Exp1.zip to /content/
Extracted Exp2.zip to /content/
Extracted Analysis.zip to /content/


In [4]:
audio_stimuli = []
stimuli_path = "/content/Exp2/Stimuli/"

for file in sorted(os.listdir(stimuli_path)):
    if file.endswith(".wav"):
        wav_path = os.path.join(stimuli_path, file)
        #Clap has already been trained on a sample rate of 48,000 so we should use what it knows already
        audio, sample_rate = librosa.load(wav_path, sr=48000)
        audio_stimuli.append(audio)

In [5]:
inputs = processor(audios=audio_stimuli, return_tensors="pt", padding=True, sampling_rate=48000)
audio_embeddings = model.get_audio_features(**inputs)

In [6]:
print(audio_embeddings.shape)

torch.Size([32, 512])


# Process text

In [7]:
discrete_tags = ["happiness", "sadness", "anger", "tenderness", "fear"]

discrete_captions_perceived = ["This sound expresses " + tag for tag in discrete_tags]
print(discrete_captions_perceived)
discrete_captions_induced = ["This sound makes me feel " + tag for tag in discrete_tags]
print(discrete_captions_induced)

dimensional_tags = ["positive", "relaxed", "awake"]

dimensional_captions_perceived = ["This sound expresses " + tag for tag in dimensional_tags]
print(dimensional_captions_perceived)
dimensional_captions_induced = ["This sound makes me feel " + tag for tag in dimensional_tags]
print(dimensional_captions_induced)


['This sound expresses happiness', 'This sound expresses sadness', 'This sound expresses anger', 'This sound expresses tenderness', 'This sound expresses fear']
['This sound makes me feel happiness', 'This sound makes me feel sadness', 'This sound makes me feel anger', 'This sound makes me feel tenderness', 'This sound makes me feel fear']
['This sound expresses positive', 'This sound expresses relaxed', 'This sound expresses awake']
['This sound makes me feel positive', 'This sound makes me feel relaxed', 'This sound makes me feel awake']


In [9]:
all_tags = discrete_captions_perceived + discrete_captions_induced + dimensional_captions_perceived + dimensional_captions_induced

tag_inputs = processor(text=dimensional_captions_induced, return_tensors="pt", padding=True)
tag_embeds = model.get_text_features(**tag_inputs)

In [None]:
tag_inputs_disc_i = processor(text=discrete_captions_induced, return_tensors="pt", padding=True)
tag_embeds_disc_i = model.get_text_features(**tag_inputs_disc_i)

In [None]:
tag_inputs_dim_p = processor(text=dimensional_captions_perceived, return_tensors="pt", padding=True)
tag_embeds_dim_p = model.get_text_features(**tag_inputs_dim_p)

In [None]:
tag_inputs_disc_p = processor(text=discrete_captions_perceived, return_tensors="pt", padding=True)
tag_embeds_disc_p = model.get_text_features(**tag_inputs_disc_p)

## Load csv files and extract related columns

In [11]:
IDim_path = '/content/Exp2/Data/Dim/'
IDim_response_dfs = []

for file in sorted(os.listdir(IDim_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(IDim_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ['Ipositive', 'Irelaxed', 'Iawake', 'stim']
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                IDim_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")

# Verify we found files
if not IDim_response_dfs:
    raise ValueError("No valid CSV files found with the required columns")

# Sort each DataFrame by stimNo
for df in IDim_response_dfs:
    df.sort_values('stim', inplace=True)
    df.reset_index(drop=True, inplace=True)

# Concatenate all individual DataFrames into one master DataFrame for human responses
if IDim_response_dfs:
    master_human_responses_df = pd.concat(IDim_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (2432, 4)

Master human responses (first 5 rows):
   Ipositive  Irelaxed  Iawake  stim
0       7.97      7.62    8.51     1
1       6.72      5.80    6.96     2
2       4.56      2.93    4.94     3
3       7.71      7.45    8.25     4
4       6.67      6.18    4.16     5



In [15]:
PDim_path = '/content/Exp2/Data/Dim/'
PDim_response_dfs = []

for file in sorted(os.listdir(PDim_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(PDim_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ['Ppositive', 'Prelaxed', 'Pawake','stim']
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                PDim_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")

# Verify we found files
if not PDim_response_dfs:
    raise ValueError("No valid CSV files found with the required columns")

# Sort each DataFrame by stimNo
for df in PDim_response_dfs:
    df.sort_values('stim', inplace=True)
    df.reset_index(drop=True, inplace=True)


# Concatenate all individual DataFrames into one master DataFrame for human responses
if PDim_response_dfs:
    master_human_responses_df_dim_p = pd.concat(PDim_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df_dim_p.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df_dim_p.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (2432, 4)

Master human responses (first 5 rows):
   Ppositive  Prelaxed  Pawake  stim
0       4.09      4.37    3.76     1
1       3.96      2.33    3.08     2
2       7.95      7.48    8.19     3
3       6.43      6.11    6.34     4
4       5.59      5.97    4.97     5



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_values('stim', inplace=True)


In [16]:
IDisc_path = '/content/Exp2/Data/Disc/'
IDisc_response_dfs = []

for file in sorted(os.listdir(IDisc_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(IDisc_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ["Ihappiness", "Isadness", "Ianger", "Itenderness", "Ifear",'stim']
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                IDisc_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")

# Verify we found files
if not IDisc_response_dfs:
    raise ValueError("No valid CSV files found with the required columns")

# Sort each DataFrame by stimNo
for df in IDisc_response_dfs:
    df.sort_values('stim', inplace=True)
    df.reset_index(drop=True, inplace=True)


# Concatenate all individual DataFrames into one master DataFrame for human responses
if IDisc_response_dfs:
    master_human_responses_df_disc_i = pd.concat(IDisc_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df_disc_i.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df_disc_i.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (2432, 6)

Master human responses (first 5 rows):
   Ihappiness  Isadness  Ianger  Itenderness  Ifear  stim
0        2.07      4.04    6.09         2.40   6.42     1
1        2.15      2.33    2.36         2.10   2.41     2
2        1.55      1.34    1.60         1.28   1.41     3
3        2.06      2.26    2.10         3.29   2.08     4
4        1.82      1.66    1.84         1.79   1.95     5



In [18]:
PDisc_path = '/content/Exp2/Data/Disc/'
PDisc_response_dfs = []

for file in sorted(os.listdir(PDisc_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(PDisc_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ["Phappiness", "Psadness", "Panger", "Ptenderness", "Pfear",'stim']
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                PDisc_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")

# Verify we found files
if not PDisc_response_dfs:
    raise ValueError("No valid CSV files found with the required columns")

# Sort each DataFrame by stimNo
for df in PDisc_response_dfs:
    df.sort_values('stim', inplace=True)
    df.reset_index(drop=True, inplace=True)

# Concatenate all individual DataFrames into one master DataFrame for human responses
if PDisc_response_dfs:
    master_human_responses_df_disc_p = pd.concat(PDisc_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df_disc_p.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df_disc_p.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (2432, 6)

Master human responses (first 5 rows):
   Phappiness  Psadness  Panger  Ptenderness  Pfear  stim
0        1.88      5.52    3.87         5.53   6.23     1
1        1.95      6.07    3.45         4.33   2.58     2
2        2.35      6.03    3.08         4.88   2.91     3
3        2.89      5.29    3.71         2.40   6.96     4
4        1.23      3.46    2.68         3.73   5.68     5



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_values('stim', inplace=True)


# Prepare features X and targets y

In [12]:
from sklearn.model_selection import train_test_split
"""
For each participant:
1. Takes all 59 audio embeddings
2. Pairs them with that participant's 59 ratings
3. Stacks these pairs for all participants.
"""

# Checks the number of participants is an exact multiple of the number of audio files (ensuring every participant rated every stimuli)
num_participants = len(IDim_response_dfs)
if master_human_responses_df.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

# Audio embeddings generated by CLAP
# Creates one copy of all embeddings per participant
X_list = []
for _ in range(num_participants):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X = np.array(X_list)

# Extract y from the concatenated DataFrame
y = master_human_responses_df[['Ipositive', 'Irelaxed', 'Iawake']].values

print(f"Shape of X (features) after implicit alignment: {X.shape}")
print(f"Shape of y (labels) after implicit alignment: {y.shape}\n")

# Sanity check: X and y must have the same number of rows
# Critical check that we have exactly one set of features per label
if X.shape[0] != y.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
# 80% for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

"""
Example:
Result (for 2 participants × 59 files = 118 total samples):
1. X_train: 94 audio embeddings (80% of 118)
2. y_train: 94 corresponding rating vectors
3. X_test: 24 audio embeddings (20%)
4. y_test: 24 rating vectors
'''

# X_train = combination of audio embeddings stacked on top of eachother
# y_train = combination of participants ratings for an audio file stacked ontop of eachother
# x_test = audio embedding for one audio file only
# y_test = participant rating for one audio file only

# Goal: check if x_test and y_test match

print(f"Training set size (X_train, y_train): {X_train.shape}, {y_train.shape}")
print(f"Testing set size (X_test, y_test): {X_test.shape}, {y_test.shape}\n")

In this example:
y_train = [
    [5.0, 4.0, 6.0],  # P1-A
    [7.0, 6.0, 4.0],   # P1-C
    [2.0, 1.0, 6.0],   # P2-B
    [4.0, 3.0, 5.0],   # P2-A
    [6.0, 7.0, 3.0]    # P2-C
]

X_test = [
    [0.5, 0.6, 0.7, 0.8]  # B.wav (P1)
]  # 1 sample

y_test = [
    [3.0, 2.0, 5.0]  # P1-B
]
- Training:
  X_train[0] = [0.1, 0.2, 0.3, 0.4] → Predict y_train[0] = [5.0, 4.0, 6.0]
"""

Shape of X (features) after implicit alignment: (2432, 512)
Shape of y (labels) after implicit alignment: (2432, 3)



'\nExample:\nResult (for 2 participants × 59 files = 118 total samples):\n1. X_train: 94 audio embeddings (80% of 118)\n2. y_train: 94 corresponding rating vectors\n3. X_test: 24 audio embeddings (20%)\n4. y_test: 24 rating vectors\n\'\'\'\n\n# X_train = combination of audio embeddings stacked on top of eachother\n# y_train = combination of participants ratings for an audio file stacked ontop of eachother\n# x_test = audio embedding for one audio file only\n# y_test = participant rating for one audio file only\n\n# Goal: check if x_test and y_test match\n\nprint(f"Training set size (X_train, y_train): {X_train.shape}, {y_train.shape}")\nprint(f"Testing set size (X_test, y_test): {X_test.shape}, {y_test.shape}\n")\n\nIn this example:\ny_train = [\n    [5.0, 4.0, 6.0],  # P1-A\n    [7.0, 6.0, 4.0],   # P1-C\n    [2.0, 1.0, 6.0],   # P2-B\n    [4.0, 3.0, 5.0],   # P2-A\n    [6.0, 7.0, 3.0]    # P2-C\n]\n\nX_test = [\n    [0.5, 0.6, 0.7, 0.8]  # B.wav (P1)\n]  # 1 sample\n\ny_test = [\n   

In [19]:
from sklearn.model_selection import train_test_split

num_participants_dim_p = len(PDim_response_dfs)
if master_human_responses_df_dim_p.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

X_list = []
for _ in range(num_participants_dim_p):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X_dim_p = np.array(X_list)

# Extract y from the concatenated DataFrame
y_dim_p = master_human_responses_df_dim_p[['Ppositive', 'Prelaxed', 'Pawake']].values

print(f"Shape of X (features) after implicit alignment: {X_dim_p.shape}")
print(f"Shape of y (labels) after implicit alignment: {y_dim_p.shape}\n")

# Sanity check: X and y must have the same number of rows
if X_dim_p.shape[0] != y_dim_p.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
X_train_dim_p, X_test_dim_p, y_train_dim_p, y_test_dim_p = train_test_split(
    X_dim_p, y_dim_p, test_size=0.2, random_state=42
)

print(f"Training set size (X_train, y_train): {X_dim_p.shape}, {y_train_dim_p.shape}")
print(f"Testing set size (X_test, y_test): {X_test_dim_p.shape}, {y_test_dim_p.shape}\n")

Shape of X (features) after implicit alignment: (2432, 512)
Shape of y (labels) after implicit alignment: (2432, 3)

Training set size (X_train, y_train): (2432, 512), (1945, 3)
Testing set size (X_test, y_test): (487, 512), (487, 3)



In [20]:
from sklearn.model_selection import train_test_split

num_participants_disc_i = len(IDisc_response_dfs)
if master_human_responses_df_disc_i.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

X_list = []
for _ in range(num_participants_disc_i):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X_disc_i = np.array(X_list)

# Extract y from the concatenated DataFrame
y_disc_i = master_human_responses_df_disc_i[["Ihappiness", "Isadness", "Ianger", "Itenderness", "Ifear"]].values

print(f"Shape of X (features) after implicit alignment: {X_disc_i.shape}")
print(f"Shape of y (labels) after implicit alignment: {y_disc_i.shape}\n")

# Sanity check: X and y must have the same number of rows
if X_disc_i.shape[0] != y_disc_i.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
X_train_disc_i, X_test_disc_i, y_train_disc_i, y_test_disc_i = train_test_split(
    X_disc_i, y_disc_i, test_size=0.2, random_state=42
)

print(f"Training set size (X_train, y_train): {X_disc_i.shape}, {y_train_disc_i.shape}")
print(f"Testing set size (X_test, y_test): {X_test_disc_i.shape}, {y_test_disc_i.shape}\n")

Shape of X (features) after implicit alignment: (2432, 512)
Shape of y (labels) after implicit alignment: (2432, 5)

Training set size (X_train, y_train): (2432, 512), (1945, 5)
Testing set size (X_test, y_test): (487, 512), (487, 5)



In [21]:
from sklearn.model_selection import train_test_split

num_participants_disc_p = len(PDisc_response_dfs)
if master_human_responses_df_disc_p.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

X_list = []
for _ in range(num_participants_disc_p):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X_disc_p = np.array(X_list)

# Extract y from the concatenated DataFrame
y_disc_p = master_human_responses_df_disc_p[["Phappiness", "Psadness", "Panger", "Ptenderness", "Pfear"]].values

print(f"Shape of X (features) after implicit alignment: {X_disc_p.shape}")
print(f"Shape of y (labels) after implicit alignment: {y_disc_p.shape}\n")

# Sanity check: X and y must have the same number of rows
if X_disc_p.shape[0] != y_disc_p.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
X_train_disc_p, X_test_disc_p, y_train_disc_p, y_test_disc_p = train_test_split(
    X_disc_p, y_disc_p, test_size=0.2, random_state=42
)

print(f"Training set size (X_train, y_train): {X_disc_p.shape}, {y_train_disc_p.shape}")
print(f"Testing set size (X_test, y_test): {X_test_disc_p.shape}, {y_test_disc_p.shape}\n")

Shape of X (features) after implicit alignment: (2432, 512)
Shape of y (labels) after implicit alignment: (2432, 5)

Training set size (X_train, y_train): (2432, 512), (1945, 5)
Testing set size (X_test, y_test): (487, 512), (487, 5)



# Train regression head (=MLP, a few projection layers)

In [13]:
from sklearn.neural_network import MLPRegressor

mlp_regressor = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor.fit(X_train, y_train)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 12.89702636
Validation score: -4.086255
Iteration 2, loss = 10.72005921
Validation score: -2.943097
Iteration 3, loss = 7.77162849
Validation score: -1.516762
Iteration 4, loss = 4.60717141
Validation score: -0.459025
Iteration 5, loss = 2.79293806
Validation score: -0.180837
Iteration 6, loss = 2.20455742
Validation score: 0.000399
Iteration 7, loss = 1.91821049
Validation score: 0.036583
Iteration 8, loss = 1.89785432
Validation score: 0.054384
Iteration 9, loss = 1.85038438
Validation score: 0.077270
Iteration 10, loss = 1.82006631
Validation score: 0.085209
Iteration 11, loss = 1.80358376
Validation score: 0.094090
Iteration 12, loss = 1.79155364
Validation score: 0.099488
Iteration 13, loss = 1.77873260
Validation score: 0.100451
Iteration 14, loss = 1.76664280
Validation score: 0.105794
Iteration 15, loss = 1.75567407
Validation score: 0.108268
Iteration 16, loss = 1.74707620
Validation score: 0.113209
Iteration 17, loss = 1.

In [22]:
from sklearn.neural_network import MLPRegressor

mlp_regressor_dim_p = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor_dim_p.fit(X_train_dim_p, y_train_dim_p)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 12.39903080
Validation score: -3.185585
Iteration 2, loss = 10.26906667
Validation score: -2.191859
Iteration 3, loss = 7.43309832
Validation score: -1.014117
Iteration 4, loss = 4.42951077
Validation score: -0.278766
Iteration 5, loss = 2.80571729
Validation score: -0.205556
Iteration 6, loss = 2.28775937
Validation score: -0.034014
Iteration 7, loss = 2.02075867
Validation score: 0.036595
Iteration 8, loss = 1.99787999
Validation score: 0.064107
Iteration 9, loss = 1.94673805
Validation score: 0.068438
Iteration 10, loss = 1.92541810
Validation score: 0.075106
Iteration 11, loss = 1.90682159
Validation score: 0.096342
Iteration 12, loss = 1.89355683
Validation score: 0.094443
Iteration 13, loss = 1.87788012
Validation score: 0.088639
Iteration 14, loss = 1.86647670
Validation score: 0.093267
Iteration 15, loss = 1.85434386
Validation score: 0.096215
Iteration 16, loss = 1.84299814
Validation score: 0.104885
Iteration 17, loss = 1

In [23]:
from sklearn.neural_network import MLPRegressor

mlp_regressor_disc_i = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor_disc_i.fit(X_train_disc_i, y_train_disc_i)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 5.06720855
Validation score: -1.165687
Iteration 2, loss = 4.03908020
Validation score: -0.643977
Iteration 3, loss = 2.99016759
Validation score: -0.211171
Iteration 4, loss = 2.31196211
Validation score: -0.039858
Iteration 5, loss = 2.13476792
Validation score: -0.005441
Iteration 6, loss = 2.10896921
Validation score: 0.007306
Iteration 7, loss = 2.05203052
Validation score: 0.013899
Iteration 8, loss = 2.02624310
Validation score: 0.016955
Iteration 9, loss = 2.00098072
Validation score: 0.029156
Iteration 10, loss = 1.98043258
Validation score: 0.036762
Iteration 11, loss = 1.96932651
Validation score: 0.039205
Iteration 12, loss = 1.95837893
Validation score: 0.035927
Iteration 13, loss = 1.95203606
Validation score: 0.036040
Iteration 14, loss = 1.94663145
Validation score: 0.038894
Iteration 15, loss = 1.94286755
Validation score: 0.036849
Iteration 16, loss = 1.93842028
Validation score: 0.034074
Iteration 17, loss = 1.93

In [24]:
from sklearn.neural_network import MLPRegressor

mlp_regressor_disc_p = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor_disc_p.fit(X_train_disc_p, y_train_disc_p)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 7.15253715
Validation score: -1.549268
Iteration 2, loss = 5.84377591
Validation score: -0.964648
Iteration 3, loss = 4.31599936
Validation score: -0.382912
Iteration 4, loss = 3.10378318
Validation score: -0.079184
Iteration 5, loss = 2.65206066
Validation score: 0.012475
Iteration 6, loss = 2.57130749
Validation score: 0.022226
Iteration 7, loss = 2.49590157
Validation score: 0.043917
Iteration 8, loss = 2.43332462
Validation score: 0.055084
Iteration 9, loss = 2.39060821
Validation score: 0.074762
Iteration 10, loss = 2.34765653
Validation score: 0.089315
Iteration 11, loss = 2.31669717
Validation score: 0.095599
Iteration 12, loss = 2.29389392
Validation score: 0.097863
Iteration 13, loss = 2.27607765
Validation score: 0.099153
Iteration 14, loss = 2.26526803
Validation score: 0.100165
Iteration 15, loss = 2.25713205
Validation score: 0.102367
Iteration 16, loss = 2.24913694
Validation score: 0.102306
Iteration 17, loss = 2.241

# Evaluate

In [27]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred = mlp_regressor.predict(X_test)

print(f"\nShape of predictions (y_pred): {y_pred.shape}")
print(f"First 5 actual values (y_test):\n{y_test[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error = np.abs((y_test - y_pred) / y_test) * 100
mape = np.mean(absolute_percentage_error)
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%\n")

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae:.4f}")

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['IPositive', 'IRelaxed', 'IAwake']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test[:, i]) > 1e-6 and np.std(y_pred[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test[:, i], y_pred[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test.shape[1]):
    if np.std(y_test[:, i]) > 1e-6 and np.std(y_pred[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test[:, i], y_pred[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
else:
    print("  No correlations could be calculated for averaging.")

from sklearn.metrics import r2_score

# R-squared
print("\nR-squared scores:")
r2_valence = r2_score(y_test[:, 0], y_pred[:, 0])
print("  valence =", r2_valence)

r2_tension = r2_score(y_test[:, 1], y_pred[:, 1])
print("  tension =", r2_tension)

r2_energy = r2_score(y_test[:, 2], y_pred[:, 2])
print("  energy =", r2_energy)



Shape of predictions (y_pred): (487, 3)
First 5 actual values (y_test):
[[4.98 4.98 6.01]
 [7.76 7.42 4.6 ]
 [5.76 1.93 8.87]
 [3.31 2.62 6.98]
 [6.66 7.04 3.94]]
First 5 predicted values (y_pred):
[[3.3982959 3.4968197 4.058697 ]
 [3.8996508 4.145374  4.743466 ]
 [5.456677  5.2343655 5.829736 ]
 [3.8141704 3.7635353 5.0609345]
 [5.834269  5.4987755 5.5807896]]

Mean Absolute Percentage Error (MAPE): 46.09%

Mean Absolute Error (MAE): 1.5568
Root Mean Squared Error (RMSE): 1.8988

Pearson Correlation Coefficients (per dimension):
  IPositive Dimension: 0.4394
  IRelaxed Dimension: 0.4889
  IAwake Dimension: 0.3699
  Average Pearson Correlation across dimensions: 0.4327

R-squared scores:
  valence = 0.19194432434378828
  tension = 0.23607378881747076
  energy = 0.13536355057557314


In [28]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred_dim_p = mlp_regressor_dim_p.predict(X_test_dim_p)

print(f"\nShape of predictions (y_pred): {y_pred_dim_p.shape}")
print(f"First 5 actual values (y_test):\n{y_test_dim_p[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred_dim_p[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error = np.abs((y_test_dim_p - y_pred_dim_p) / y_test_dim_p) * 100
mape_dim_p = np.mean(absolute_percentage_error)
print(f"Mean Absolute Percentage Error (MAPE): {mape_dim_p:.2f}%\n")

# Mean Absolute Error (MAE)
mae_dim_p = mean_absolute_error(y_test_dim_p, y_pred_dim_p)
print(f"Mean Absolute Error (MAE): {mae_dim_p:.4f}")

# Root Mean Squared Error (RMSE)
rmse_dim_p = np.sqrt(mean_squared_error(y_test_dim_p, y_pred_dim_p))
print(f"Root Mean Squared Error (RMSE): {rmse_dim_p:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['PPositive', 'PRelaxed', 'PAwake']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test_dim_p[:, i]) > 1e-6 and np.std(y_pred_dim_p[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test_dim_p[:, i], y_pred_dim_p[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test_dim_p.shape[1]):
    if np.std(y_test_dim_p[:, i]) > 1e-6 and np.std(y_pred_dim_p[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test_dim_p[:, i], y_pred_dim_p[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
else:
    print("  No correlations could be calculated for averaging.")

from sklearn.metrics import r2_score

# R-squared
print("\nR-squared scores:")
r2_valence_dim_p = r2_score(y_test_dim_p[:, 0], y_pred_dim_p[:, 0])
print("  valence =", r2_valence_dim_p)

r2_tension_dim_p = r2_score(y_test_dim_p[:, 1], y_pred_dim_p[:, 1])
print("  tension =", r2_tension_dim_p)

r2_energy_dim_p = r2_score(y_test_dim_p[:, 2], y_pred_dim_p[:, 2])
print("  energy =", r2_energy_dim_p)



Shape of predictions (y_pred): (487, 3)
First 5 actual values (y_test):
[[4.03 2.01 4.99]
 [5.85 6.88 4.44]
 [6.96 3.1  8.27]
 [1.33 1.66 4.82]
 [6.54 6.54 3.52]]
First 5 predicted values (y_pred):
[[2.8029613 2.71952   4.2462153]
 [3.8562493 3.7868454 4.5949645]
 [5.151534  4.8653774 5.7963486]
 [3.6671257 3.3543458 5.059963 ]
 [5.742116  5.341697  5.9792323]]

Mean Absolute Percentage Error (MAPE): 47.73%

Mean Absolute Error (MAE): 1.4897
Root Mean Squared Error (RMSE): 1.8298

Pearson Correlation Coefficients (per dimension):
  PPositive Dimension: 0.5284
  PRelaxed Dimension: 0.5336
  PAwake Dimension: 0.4411
  Average Pearson Correlation across dimensions: 0.5010

R-squared scores:
  valence = 0.2774593770252486
  tension = 0.2811321287935318
  energy = 0.18777406905203597


In [29]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred_disc_i = mlp_regressor_disc_i.predict(X_test_disc_i)

print(f"\nShape of predictions (y_pred): {y_pred_disc_i.shape}")
print(f"First 5 actual values (y_test):\n{y_test_disc_i[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred_disc_i[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error_disc_i = np.abs((y_test_disc_i - y_pred_disc_i) / y_test_disc_i) * 100
mape_disc_i = np.mean(absolute_percentage_error_disc_i)
print(f"Mean Absolute Percentage Error (MAPE): {mape_disc_i:.2f}%\n")

# Mean Absolute Error (MAE)
mae_disc_i = mean_absolute_error(y_test_disc_i, y_pred_disc_i)
print(f"Mean Absolute Error (MAE): {mae_disc_i:.4f}")

# Root Mean Squared Error (RMSE)
rmse_disc_i = np.sqrt(mean_squared_error(y_test_disc_i, y_pred_disc_i))
print(f"Root Mean Squared Error (RMSE): {rmse_disc_i:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['IHappiness', 'ISadness', 'IAnger', 'ITenderness', 'IFear']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test_disc_i[:, i]) > 1e-6 and np.std(y_pred_disc_i[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test_disc_i[:, i], y_pred_disc_i[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test_disc_i.shape[1]):
    if np.std(y_test_disc_i[:, i]) > 1e-6 and np.std(y_pred_disc_i[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test_disc_i[:, i], y_pred_disc_i[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
else:
    print("  No correlations could be calculated for averaging.")

from sklearn.metrics import r2_score

# R-squared IDisc
print("\nR-squared scores (IDisc):")
r2_happiness_disc_i = r2_score(y_test_disc_i[:, 0], y_pred_disc_i[:, 0])
print("  happiness =", r2_happiness_disc_i)
r2_sadness_disc_i = r2_score(y_test_disc_i[:, 1], y_pred_disc_i[:, 1])
print("  sadness =", r2_sadness_disc_i)
r2_anger_disc_i = r2_score(y_test_disc_i[:, 2], y_pred_disc_i[:, 2])
print("  anger =", r2_anger_disc_i)
r2_tenderness_disc_i = r2_score(y_test_disc_i[:, 3], y_pred_disc_i[:, 3])
print("  tenderness =", r2_tenderness_disc_i)
r2_fear_disc_i = r2_score(y_test_disc_i[:, 4], y_pred_disc_i[:, 4])
print("  fear =", r2_fear_disc_i)



Shape of predictions (y_pred): (487, 5)
First 5 actual values (y_test):
[[2.84 5.63 5.79 6.41 6.94]
 [1.   1.02 1.06 1.   3.95]
 [1.   1.   1.   1.   5.01]
 [1.08 1.09 3.95 1.03 3.99]
 [7.01 3.01 1.05 9.   2.02]]
First 5 predicted values (y_pred):
[[1.8166515 3.1745896 3.6641743 2.100478  4.316547 ]
 [2.067399  3.1770773 3.2965858 2.4111142 4.0920696]
 [2.5023654 2.8941157 1.9739139 2.9772735 2.8429325]
 [2.4264126 2.833293  2.8747592 2.7422407 3.8061655]
 [3.0192971 2.211254  1.7187824 3.455167  2.768572 ]]

Mean Absolute Percentage Error (MAPE): 80.30%

Mean Absolute Error (MAE): 1.6060
Root Mean Squared Error (RMSE): 2.0425

Pearson Correlation Coefficients (per dimension):
  IHappiness Dimension: 0.3564
  ISadness Dimension: 0.1584
  IAnger Dimension: 0.3333
  ITenderness Dimension: 0.2590
  IFear Dimension: 0.1834
  Average Pearson Correlation across dimensions: 0.2581

R-squared scores (IDisc):
  happiness = 0.11479119434572693
  sadness = 0.02393513297945382
  anger = 0.1069163

In [30]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred_disc_p = mlp_regressor_disc_p.predict(X_test_disc_p)

print(f"\nShape of predictions (y_pred): {y_pred_disc_p.shape}")
print(f"First 5 actual values (y_test):\n{y_test_disc_p[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred_disc_p[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error_disc_p = np.abs((y_test_disc_p - y_pred_disc_p) / y_test_disc_p) * 100
mape_disc_p = np.mean(absolute_percentage_error_disc_p)
print(f"Mean Absolute Percentage Error (MAPE): {mape_disc_p:.2f}%\n")

# Mean Absolute Error (MAE)
mae_disc_p = mean_absolute_error(y_test_disc_p, y_pred_disc_p)
print(f"Mean Absolute Error (MAE): {mae_disc_p:.4f}")

# Root Mean Squared Error (RMSE)
rmse_disc_p = np.sqrt(mean_squared_error(y_test_disc_p, y_pred_disc_p))
print(f"Root Mean Squared Error (RMSE): {rmse_disc_p:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['Happiness', 'Sadness', 'Anger', 'Tenderness', 'Fear']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test_disc_p[:, i]) > 1e-6 and np.std(y_pred_disc_p[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test_disc_p[:, i], y_pred_disc_p[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test_disc_p.shape[1]):
    if np.std(y_test_disc_p[:, i]) > 1e-6 and np.std(y_pred_disc_p[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test_disc_p[:, i], y_pred_disc_p[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
else:
    print("  No correlations could be calculated for averaging.")

from sklearn.metrics import r2_score

# R-squared PDisc
print("\nR-squared scores (PDisc):")
r2_happiness_disc_p = r2_score(y_test_disc_p[:, 0], y_pred_disc_p[:, 0])
print("  happiness =", r2_happiness_disc_p)
r2_sadness_disc_p = r2_score(y_test_disc_p[:, 1], y_pred_disc_p[:, 1])
print("  sadness =", r2_sadness_disc_p)
r2_anger_disc_p = r2_score(y_test_disc_p[:, 2], y_pred_disc_p[:, 2])
print("  anger =", r2_anger_disc_p)
r2_tenderness_disc_p = r2_score(y_test_disc_p[:, 3], y_pred_disc_p[:, 3])
print("  tenderness =", r2_tenderness_disc_p)
r2_fear_disc_p = r2_score(y_test_disc_p[:, 4], y_pred_disc_p[:, 4])
print("  fear =", r2_fear_disc_p)



Shape of predictions (y_pred): (487, 5)
First 5 actual values (y_test):
[[1.95 4.97 6.8  7.5  6.57]
 [1.05 1.03 4.13 1.01 4.99]
 [1.   5.02 1.   1.   5.97]
 [1.   2.96 7.88 1.94 8.01]
 [4.99 1.86 1.12 5.97 1.99]]
First 5 predicted values (y_pred):
[[1.4284275 4.1894703 5.459589  2.1959016 5.557076 ]
 [1.8164551 4.1166615 4.6339135 2.7697976 5.186108 ]
 [2.9684944 3.8716683 2.165728  3.9806304 3.522064 ]
 [1.5210605 3.2192855 4.389944  2.2771811 5.655737 ]
 [4.296932  2.5220885 1.8008424 4.642335  2.9493792]]

Mean Absolute Percentage Error (MAPE): 81.86%

Mean Absolute Error (MAE): 1.7292
Root Mean Squared Error (RMSE): 2.1224

Pearson Correlation Coefficients (per dimension):
  Happiness Dimension: 0.4021
  Sadness Dimension: 0.2025
  Anger Dimension: 0.5184
  Tenderness Dimension: 0.3020
  Fear Dimension: 0.3806
  Average Pearson Correlation across dimensions: 0.3611

R-squared scores (PDisc):
  happiness = 0.1575281426462084
  sadness = 0.03575449869858083
  anger = 0.2661319495757