In [1]:
!pip install --upgrade gspread gspread_dataframe oauth2client



In [95]:
# Authenticate with Google in Colab
from google.colab import auth
#auth.authenticate_user()

# Authorize with gspread
import gspread
from google.auth import default

creds, _ = default()
gc = gspread.authorize(creds)

# Open Google Sheet by URL
sh = gc.open_by_url("https://docs.google.com/spreadsheets/d/1DG3jWi9nWZ2hEzOsO5t6xtNgsmDJ8sdKOmGxJZKkMl8/edit#gid=1706329183")

worksheet = sh.worksheet("htsat-unfused")

In [96]:
from transformers import ClapModel, AutoProcessor
import torch
import librosa
import os
import pandas as pd
import numpy as np

In [97]:
# Load CLAP model + processor
model = ClapModel.from_pretrained("laion/clap-htsat-unfused")
processor = AutoProcessor.from_pretrained("laion/clap-htsat-unfused")

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/615M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/384 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/614M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

# Process audio

In [5]:
import zipfile
import os

zip_files = ["Exp1.zip", "Exp2.zip", "Analysis.zip"]

extract_dir = "/content/" # You can change this if you want to extract elsewhere
os.makedirs(extract_dir, exist_ok=True)

for zip_file in zip_files:
    if os.path.exists(zip_file):
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall(extract_dir)
        print(f"Extracted {zip_file} to {extract_dir}")
    else:
        print(f"Error: {zip_file} not found.")

Extracted Exp1.zip to /content/
Extracted Exp2.zip to /content/
Extracted Analysis.zip to /content/


In [98]:
audio_stimuli = []
stimuli_path = "/content/Exp1/Stimuli/"

for file in sorted(os.listdir(stimuli_path)):
    if file.endswith(".wav"):
        wav_path = os.path.join(stimuli_path, file)
        #Clap has already been trained on a sample rate of 48,000 so we should use what it knows already
        audio, sample_rate = librosa.load(wav_path, sr=48000)
        audio_stimuli.append(audio)

In [99]:
inputs = processor(audios=audio_stimuli, return_tensors="pt", padding=True, sampling_rate=48000)
audio_embeddings = model.get_audio_features(**inputs)

In [100]:
print(audio_embeddings.shape)

torch.Size([59, 512])


# Process text

In [101]:
discrete_tags = ["happiness", "sadness", "anger", "tenderness", "fear"]

discrete_captions_perceived = [tag for tag in discrete_tags]
print(discrete_captions_perceived)
discrete_captions_induced = [tag for tag in discrete_tags]
print(discrete_captions_induced)

dimensional_tags = ["positive", "relaxed", "awake", "like"]

dimensional_captions_perceived = [tag for tag in dimensional_tags]
print(dimensional_captions_perceived)
dimensional_captions_induced = [tag for tag in dimensional_tags]
print(dimensional_captions_induced)


['happiness', 'sadness', 'anger', 'tenderness', 'fear']
['happiness', 'sadness', 'anger', 'tenderness', 'fear']
['positive', 'relaxed', 'awake', 'like']
['positive', 'relaxed', 'awake', 'like']


In [102]:
all_tags = discrete_captions_perceived + discrete_captions_induced + dimensional_captions_perceived + dimensional_captions_induced

tag_inputs = processor(text=dimensional_captions_induced, return_tensors="pt", padding=True)
tag_embeds = model.get_text_features(**tag_inputs)

In [103]:
tag_inputs_disc_i = processor(text=discrete_captions_induced, return_tensors="pt", padding=True)
tag_embeds_disc_i = model.get_text_features(**tag_inputs_disc_i)

In [104]:
tag_inputs_dim_p = processor(text=dimensional_captions_perceived, return_tensors="pt", padding=True)
tag_embeds_dim_p = model.get_text_features(**tag_inputs_dim_p)

In [105]:
tag_inputs_disc_p = processor(text=discrete_captions_perceived, return_tensors="pt", padding=True)
tag_embeds_disc_p = model.get_text_features(**tag_inputs_disc_p)

## Load csv files and extract related columns

In [106]:
IDim_path = '/content/Exp1/Data/IDim/'
IDim_response_dfs = []

for file in sorted(os.listdir(IDim_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(IDim_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ['positive', 'relaxed', 'awake','like']
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                IDim_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")


# Concatenate all individual DataFrames into one master DataFrame for human responses
if IDim_response_dfs:
    master_human_responses_df = pd.concat(IDim_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (3835, 4)

Master human responses (first 5 rows):
   positive  relaxed  awake  like
0      3.68     3.78   4.42  3.41
1      5.88     5.98   3.89  5.54
2      6.53     5.59   6.59  6.17
3      6.26     5.71   6.88  6.18
4      2.80     2.62   5.15  1.87



In [107]:
PDim_path = '/content/Exp1/Data/PDim/'
PDim_response_dfs = []

for file in sorted(os.listdir(PDim_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(PDim_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ['positive', 'relaxed', 'awake', 'like']
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                PDim_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")


# Concatenate all individual DataFrames into one master DataFrame for human responses
if PDim_response_dfs:
    master_human_responses_df_dim_p = pd.concat(PDim_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df_dim_p.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df_dim_p.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (3953, 4)

Master human responses (first 5 rows):
   positive  relaxed  awake  like
0      2.23     9.00   2.37  1.68
1      6.56     5.64   4.12  7.52
2      5.11     6.23   4.04  6.72
3      7.14     7.67   2.95  7.41
4      1.85     1.77   2.98  1.40



In [108]:
IDisc_path = '/content/Exp1/Data/IDisc/'
IDisc_response_dfs = []

for file in sorted(os.listdir(IDisc_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(IDisc_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ["happiness", "sadness", "anger", "tenderness", "fear"]
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                IDisc_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")


# Concatenate all individual DataFrames into one master DataFrame for human responses
if IDisc_response_dfs:
    master_human_responses_df_disc_i = pd.concat(IDisc_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df_disc_i.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df_disc_i.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (3894, 5)

Master human responses (first 5 rows):
   happiness  sadness  anger  tenderness  fear
0       1.00     6.97   5.01        9.00  7.89
1       1.00     6.00   5.80        6.78  1.00
2       1.00     2.42   5.99        6.44  1.09
3       3.92     6.13   2.25        5.96  1.22
4       1.99     7.92   6.82        5.83  1.39



In [109]:
PDisc_path = '/content/Exp1/Data/PDisc/'
PDisc_response_dfs = []

for file in sorted(os.listdir(PDisc_path)):
    if file.endswith(".csv"):
        file_path = os.path.join(PDisc_path, file)
        try:
            df = pd.read_csv(file_path, sep=r'\s*,\s*', engine='python')
            # Crucial: Strip whitespace from column names
            df.columns = df.columns.str.strip()

            # Ensure required rating columns exist
            required_cols = ["happiness", "sadness", "anger", "tenderness", "fear"]
            if all(col in df.columns for col in required_cols):
                # Select only the relevant columns and append to our list
                PDisc_response_dfs.append(df[required_cols])
            else:
                print(f"Skipping file '{file_path}': Missing required columns ({required_cols}). Found columns: {df.columns.tolist()}")

        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")


# Concatenate all individual DataFrames into one master DataFrame for human responses
if PDisc_response_dfs:
    master_human_responses_df_disc_p = pd.concat(PDisc_response_dfs, ignore_index=True)
    print(f"Master human responses DataFrame shape: {master_human_responses_df_disc_p.shape}\n")
    print(f"Master human responses (first 5 rows):\n{master_human_responses_df_disc_p.head()}\n")
else:
    raise ValueError("No valid CSV files found or processed in IDim_path.")

Master human responses DataFrame shape: (3835, 5)

Master human responses (first 5 rows):
   happiness  sadness  anger  tenderness  fear
0       1.32     1.30   1.27        1.25  3.18
1       1.00     1.44   1.76        1.00  1.24
2       1.00     5.87   1.23        1.28  1.56
3       1.92     3.36   4.82        1.20  2.32
4       1.23     2.31   7.10        1.61  2.86



# Prepare features X and targets y

In [110]:
from sklearn.model_selection import train_test_split
"""
For each participant:
1. Takes all 59 audio embeddings
2. Pairs them with that participant's 59 ratings
3. Stacks these pairs for all participants.
"""

# Checks the number of participants is an exact multiple of the number of audio files (ensuring every participant rated every stimuli)
num_participants = len(IDim_response_dfs)
if master_human_responses_df.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

# Audio embeddings generated by CLAP
# Creates one copy of all embeddings per participant
X_list = []
for _ in range(num_participants):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X = np.array(X_list)

# Extract y from the concatenated DataFrame
y = master_human_responses_df[['positive', 'relaxed', 'awake', 'like']].values

print(f"Shape of X (features) after implicit alignment: {X.shape}")
print(f"Shape of y (labels) after implicit alignment: {y.shape}\n")

# Sanity check: X and y must have the same number of rows
# Critical check that we have exactly one set of features per label
if X.shape[0] != y.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
# 80% for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

"""
Example:
Result (for 2 participants × 59 files = 118 total samples):
1. X_train: 94 audio embeddings (80% of 118)
2. y_train: 94 corresponding rating vectors
3. X_test: 24 audio embeddings (20%)
4. y_test: 24 rating vectors
'''

# X_train = combination of audio embeddings stacked on top of eachother
# y_train = combination of participants ratings for an audio file stacked ontop of eachother
# x_test = audio embedding for one audio file only
# y_test = participant rating for one audio file only

# Goal: check if x_test and y_test match

print(f"Training set size (X_train, y_train): {X_train.shape}, {y_train.shape}")
print(f"Testing set size (X_test, y_test): {X_test.shape}, {y_test.shape}\n")

In this example:
y_train = [
    [5.0, 4.0, 6.0],  # P1-A
    [7.0, 6.0, 4.0],   # P1-C
    [2.0, 1.0, 6.0],   # P2-B
    [4.0, 3.0, 5.0],   # P2-A
    [6.0, 7.0, 3.0]    # P2-C
]

X_test = [
    [0.5, 0.6, 0.7, 0.8]  # B.wav (P1)
]  # 1 sample

y_test = [
    [3.0, 2.0, 5.0]  # P1-B
]
- Training:
  X_train[0] = [0.1, 0.2, 0.3, 0.4] → Predict y_train[0] = [5.0, 4.0, 6.0]
"""

Shape of X (features) after implicit alignment: (3835, 512)
Shape of y (labels) after implicit alignment: (3835, 4)



'\nExample:\nResult (for 2 participants × 59 files = 118 total samples):\n1. X_train: 94 audio embeddings (80% of 118)\n2. y_train: 94 corresponding rating vectors\n3. X_test: 24 audio embeddings (20%)\n4. y_test: 24 rating vectors\n\'\'\'\n\n# X_train = combination of audio embeddings stacked on top of eachother\n# y_train = combination of participants ratings for an audio file stacked ontop of eachother\n# x_test = audio embedding for one audio file only\n# y_test = participant rating for one audio file only\n\n# Goal: check if x_test and y_test match\n\nprint(f"Training set size (X_train, y_train): {X_train.shape}, {y_train.shape}")\nprint(f"Testing set size (X_test, y_test): {X_test.shape}, {y_test.shape}\n")\n\nIn this example:\ny_train = [\n    [5.0, 4.0, 6.0],  # P1-A\n    [7.0, 6.0, 4.0],   # P1-C\n    [2.0, 1.0, 6.0],   # P2-B\n    [4.0, 3.0, 5.0],   # P2-A\n    [6.0, 7.0, 3.0]    # P2-C\n]\n\nX_test = [\n    [0.5, 0.6, 0.7, 0.8]  # B.wav (P1)\n]  # 1 sample\n\ny_test = [\n   

In [111]:
from sklearn.model_selection import train_test_split

num_participants_dim_p = len(PDim_response_dfs)
if master_human_responses_df_dim_p.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

X_list = []
for _ in range(num_participants_dim_p):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X_dim_p = np.array(X_list)

# Extract y from the concatenated DataFrame
y_dim_p = master_human_responses_df_dim_p[['positive', 'relaxed', 'awake', 'like']].values

print(f"Shape of X (features) after implicit alignment: {X_dim_p.shape}")
print(f"Shape of y (labels) after implicit alignment: {y_dim_p.shape}\n")

# Sanity check: X and y must have the same number of rows
if X_dim_p.shape[0] != y_dim_p.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
X_train_dim_p, X_test_dim_p, y_train_dim_p, y_test_dim_p = train_test_split(
    X_dim_p, y_dim_p, test_size=0.2, random_state=42
)

print(f"Training set size (X_train, y_train): {X_dim_p.shape}, {y_train_dim_p.shape}")
print(f"Testing set size (X_test, y_test): {X_test_dim_p.shape}, {y_test_dim_p.shape}\n")

Shape of X (features) after implicit alignment: (3953, 512)
Shape of y (labels) after implicit alignment: (3953, 4)

Training set size (X_train, y_train): (3953, 512), (3162, 4)
Testing set size (X_test, y_test): (791, 512), (791, 4)



In [112]:
from sklearn.model_selection import train_test_split

num_participants_disc_i = len(IDisc_response_dfs)
if master_human_responses_df_disc_i.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

X_list = []
for _ in range(num_participants_disc_i):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X_disc_i = np.array(X_list)

# Extract y from the concatenated DataFrame
y_disc_i = master_human_responses_df_disc_i[["happiness", "sadness", "anger", "tenderness", "fear"]].values

print(f"Shape of X (features) after implicit alignment: {X_disc_i.shape}")
print(f"Shape of y (labels) after implicit alignment: {y_disc_i.shape}\n")

# Sanity check: X and y must have the same number of rows
if X_disc_i.shape[0] != y_disc_i.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
X_train_disc_i, X_test_disc_i, y_train_disc_i, y_test_disc_i = train_test_split(
    X_disc_i, y_disc_i, test_size=0.2, random_state=42
)

print(f"Training set size (X_train, y_train): {X_disc_i.shape}, {y_train_disc_i.shape}")
print(f"Testing set size (X_test, y_test): {X_test_disc_i.shape}, {y_test_disc_i.shape}\n")

Shape of X (features) after implicit alignment: (3894, 512)
Shape of y (labels) after implicit alignment: (3894, 5)

Training set size (X_train, y_train): (3894, 512), (3115, 5)
Testing set size (X_test, y_test): (779, 512), (779, 5)



In [113]:
from sklearn.model_selection import train_test_split

num_participants_disc_p = len(PDisc_response_dfs)
if master_human_responses_df_disc_p.shape[0] % len(audio_stimuli) != 0:
    print("Warning: Total responses is not a perfect multiple of unique audio files. This might indicate inconsistent data or that not all participants rated all items, which could break implicit ordering.")

X_list = []
for _ in range(num_participants_disc_p):
    X_list.extend(audio_embeddings.detach()) # Add a full set of embeddings for each participant

# Convert to NumPy array
X_disc_p = np.array(X_list)

# Extract y from the concatenated DataFrame
y_disc_p = master_human_responses_df_disc_p[["happiness", "sadness", "anger", "tenderness", "fear"]].values

print(f"Shape of X (features) after implicit alignment: {X_disc_p.shape}")
print(f"Shape of y (labels) after implicit alignment: {y_disc_p.shape}\n")

# Sanity check: X and y must have the same number of rows
if X_disc_p.shape[0] != y_disc_p.shape[0]:
    raise ValueError("Number of rows in X and y do not match after implicit alignment. This indicates an issue with the implicit ordering assumption or data loading.")

# --- Split Data into Training and Testing Sets ---
X_train_disc_p, X_test_disc_p, y_train_disc_p, y_test_disc_p = train_test_split(
    X_disc_p, y_disc_p, test_size=0.2, random_state=42
)

print(f"Training set size (X_train, y_train): {X_disc_p.shape}, {y_train_disc_p.shape}")
print(f"Testing set size (X_test, y_test): {X_test_disc_p.shape}, {y_test_disc_p.shape}\n")

Shape of X (features) after implicit alignment: (3835, 512)
Shape of y (labels) after implicit alignment: (3835, 5)

Training set size (X_train, y_train): (3835, 512), (3068, 5)
Testing set size (X_test, y_test): (767, 512), (767, 5)



# Train regression head (=MLP, a few projection layers)

In [114]:
from sklearn.neural_network import MLPRegressor

mlp_regressor = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor.fit(X_train, y_train)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 13.31063317
Validation score: -5.276756
Iteration 2, loss = 9.50447936
Validation score: -2.533854
Iteration 3, loss = 4.64468634
Validation score: -0.499038
Iteration 4, loss = 2.42140757
Validation score: 0.014932
Iteration 5, loss = 1.91904397
Validation score: 0.112737
Iteration 6, loss = 1.80165730
Validation score: 0.153599
Iteration 7, loss = 1.74180513
Validation score: 0.181373
Iteration 8, loss = 1.69694443
Validation score: 0.198502
Iteration 9, loss = 1.66553251
Validation score: 0.212963
Iteration 10, loss = 1.63934738
Validation score: 0.223290
Iteration 11, loss = 1.61809549
Validation score: 0.233029
Iteration 12, loss = 1.60105698
Validation score: 0.238656
Iteration 13, loss = 1.58780991
Validation score: 0.244438
Iteration 14, loss = 1.57615172
Validation score: 0.248751
Iteration 15, loss = 1.56580804
Validation score: 0.250390
Iteration 16, loss = 1.55807191
Validation score: 0.254624
Iteration 17, loss = 1.552

In [115]:
from sklearn.neural_network import MLPRegressor

mlp_regressor_dim_p = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor_dim_p.fit(X_train_dim_p, y_train_dim_p)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 13.16822443
Validation score: -4.030982
Iteration 2, loss = 9.08194970
Validation score: -1.658484
Iteration 3, loss = 4.20499245
Validation score: -0.221894
Iteration 4, loss = 2.39141605
Validation score: 0.102459
Iteration 5, loss = 1.97599856
Validation score: 0.175871
Iteration 6, loss = 1.86374944
Validation score: 0.208090
Iteration 7, loss = 1.81065370
Validation score: 0.223908
Iteration 8, loss = 1.76901794
Validation score: 0.236534
Iteration 9, loss = 1.73829367
Validation score: 0.245435
Iteration 10, loss = 1.71555496
Validation score: 0.251179
Iteration 11, loss = 1.69658502
Validation score: 0.256528
Iteration 12, loss = 1.68489370
Validation score: 0.260585
Iteration 13, loss = 1.67630221
Validation score: 0.263238
Iteration 14, loss = 1.66496718
Validation score: 0.266137
Iteration 15, loss = 1.65721305
Validation score: 0.268962
Iteration 16, loss = 1.65273472
Validation score: 0.272534
Iteration 17, loss = 1.648

In [116]:
from sklearn.neural_network import MLPRegressor

mlp_regressor_disc_i = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor_disc_i.fit(X_train_disc_i, y_train_disc_i)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 4.81931376
Validation score: -0.663295
Iteration 2, loss = 3.20863654
Validation score: -0.090467
Iteration 3, loss = 2.39530736
Validation score: -0.006781
Iteration 4, loss = 2.27529629
Validation score: 0.028188
Iteration 5, loss = 2.20639567
Validation score: 0.041990
Iteration 6, loss = 2.16044005
Validation score: 0.057209
Iteration 7, loss = 2.12641917
Validation score: 0.064880
Iteration 8, loss = 2.11196261
Validation score: 0.070191
Iteration 9, loss = 2.10091115
Validation score: 0.075291
Iteration 10, loss = 2.08293156
Validation score: 0.079461
Iteration 11, loss = 2.07412656
Validation score: 0.080414
Iteration 12, loss = 2.06453683
Validation score: 0.083076
Iteration 13, loss = 2.05353259
Validation score: 0.084033
Iteration 14, loss = 2.04938903
Validation score: 0.084405
Iteration 15, loss = 2.04049992
Validation score: 0.085302
Iteration 16, loss = 2.03701609
Validation score: 0.084146
Iteration 17, loss = 2.0347

In [117]:
from sklearn.neural_network import MLPRegressor

mlp_regressor_disc_p = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    verbose=True,
    early_stopping=True,
    n_iter_no_change=50,
    tol=1e-4
)

print("Starting MLP Regressor training...")
mlp_regressor_disc_p.fit(X_train_disc_p, y_train_disc_p)
print("\nMLP Regressor training complete.")

Starting MLP Regressor training...
Iteration 1, loss = 6.91828901
Validation score: -1.085125
Iteration 2, loss = 4.87238555
Validation score: -0.294462
Iteration 3, loss = 3.15787867
Validation score: -0.008771
Iteration 4, loss = 2.74362637
Validation score: 0.055817
Iteration 5, loss = 2.61120647
Validation score: 0.097934
Iteration 6, loss = 2.52269483
Validation score: 0.127194
Iteration 7, loss = 2.46112987
Validation score: 0.148778
Iteration 8, loss = 2.42320569
Validation score: 0.163036
Iteration 9, loss = 2.39032095
Validation score: 0.171804
Iteration 10, loss = 2.36399982
Validation score: 0.181710
Iteration 11, loss = 2.34465841
Validation score: 0.185066
Iteration 12, loss = 2.32646997
Validation score: 0.189875
Iteration 13, loss = 2.31281265
Validation score: 0.191257
Iteration 14, loss = 2.30193293
Validation score: 0.193851
Iteration 15, loss = 2.29448443
Validation score: 0.194295
Iteration 16, loss = 2.29127324
Validation score: 0.194041
Iteration 17, loss = 2.2900

# Evaluate

In [118]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred = mlp_regressor.predict(X_test)

print(f"\nShape of predictions (y_pred): {y_pred.shape}")
print(f"First 5 actual values (y_test):\n{y_test[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error = np.abs((y_test - y_pred) / y_test) * 100
mape = np.mean(absolute_percentage_error)
worksheet.update('E5', [[f"{mape:.2f}%"]])
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%\n")

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
worksheet.update('E2', [[f"{mae:.4f}"]])
print(f"Mean Absolute Error (MAE): {mae:.4f}")

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
worksheet.update('E6', [[f"{rmse:.4f}"]])
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['Positive', 'Relaxed', 'Awake', 'Like']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test[:, i]) > 1e-6 and np.std(y_pred[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test[:, i], y_pred[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test.shape[1]):
    if np.std(y_test[:, i]) > 1e-6 and np.std(y_pred[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test[:, i], y_pred[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
    worksheet.update(
    range_name='E4',
    values=[[f"{average_correlation:.4f}"]]
    )
else:
    print("  No correlations could be calculated for averaging.")

correlation_string = ', '.join([f"{c:.4f}" for c in correlations])
# Update cell E3 with the full string
worksheet.update('E3', [[correlation_string]])

from sklearn.metrics import r2_score

# R-squared
print("\nR-squared scores:")
r2_valence = r2_score(y_test[:, 0], y_pred[:, 0])
print("  valence =", r2_valence)

r2_tension = r2_score(y_test[:, 1], y_pred[:, 1])
print("  tension =", r2_tension)

r2_energy = r2_score(y_test[:, 2], y_pred[:, 2])
print("  energy =", r2_energy)

r2_like = r2_score(y_test[:, 3], y_pred[:, 3])
print("  like =", r2_like)

worksheet.update(
    range_name='E7',
    values=[[f"{r2_valence:.4f}, {r2_tension:.4f}, {r2_energy:.4f}, {r2_like:.4f}"]]
)



Shape of predictions (y_pred): (767, 4)
First 5 actual values (y_test):
[[3.96 8.32 7.05 8.  ]
 [7.11 7.01 8.11 7.01]
 [4.68 5.04 5.88 5.1 ]
 [2.08 4.04 7.6  1.26]
 [6.34 6.43 5.32 4.8 ]]
First 5 predicted values (y_pred):
[[5.3032618 5.6494355 5.039844  5.319792 ]
 [4.4887786 4.9056044 4.847345  4.8941174]
 [3.798002  3.290676  6.40048   3.418524 ]
 [2.9240682 2.1566882 7.0895853 2.395229 ]
 [5.11155   4.874712  6.0350957 4.7625284]]



  worksheet.update('E5', [[f"{mape:.2f}%"]])


Mean Absolute Percentage Error (MAPE): 42.22%

Mean Absolute Error (MAE): 1.3852


  worksheet.update('E2', [[f"{mae:.4f}"]])
  worksheet.update('E6', [[f"{rmse:.4f}"]])


Root Mean Squared Error (RMSE): 1.7378

Pearson Correlation Coefficients (per dimension):
  Positive Dimension: 0.5599
  Relaxed Dimension: 0.6094
  Awake Dimension: 0.2598
  Like Dimension: 0.5706
  Average Pearson Correlation across dimensions: 0.5000


  worksheet.update('E3', [[correlation_string]])



R-squared scores:
  valence = 0.31265230774326647
  tension = 0.3702616538666077
  energy = 0.06310839897144582
  like = 0.32487007011651037


{'spreadsheetId': '1DG3jWi9nWZ2hEzOsO5t6xtNgsmDJ8sdKOmGxJZKkMl8',
 'updatedRange': "'htsat-unfused'!E7",
 'updatedRows': 1,
 'updatedColumns': 1,
 'updatedCells': 1}

In [119]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred_dim_p = mlp_regressor_dim_p.predict(X_test_dim_p)

print(f"\nShape of predictions (y_pred): {y_pred_dim_p.shape}")
print(f"First 5 actual values (y_test):\n{y_test_dim_p[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred_dim_p[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error = np.abs((y_test_dim_p - y_pred_dim_p) / y_test_dim_p) * 100
mape_dim_p = np.mean(absolute_percentage_error)
worksheet.update('E12', [[f"{mape_dim_p:.2f}%"]])
print(f"Mean Absolute Percentage Error (MAPE): {mape_dim_p:.2f}%\n")

# Mean Absolute Error (MAE)
mae_dim_p = mean_absolute_error(y_test_dim_p, y_pred_dim_p)
worksheet.update('E9', [[f"{mae_dim_p:.4f}"]])
print(f"Mean Absolute Error (MAE): {mae_dim_p:.4f}")

# Root Mean Squared Error (RMSE)
rmse_dim_p = np.sqrt(mean_squared_error(y_test_dim_p, y_pred_dim_p))
worksheet.update('E13', [[f"{rmse_dim_p:.4f}"]])
print(f"Root Mean Squared Error (RMSE): {rmse_dim_p:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['Positive', 'Relaxed', 'Awake', 'Like']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test_dim_p[:, i]) > 1e-6 and np.std(y_pred_dim_p[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test_dim_p[:, i], y_pred_dim_p[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test_dim_p.shape[1]):
    if np.std(y_test_dim_p[:, i]) > 1e-6 and np.std(y_pred_dim_p[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test_dim_p[:, i], y_pred_dim_p[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    worksheet.update(
    range_name='E11',
    values=[[f"{average_correlation:.4f}"]]
    )
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
else:
    print("  No correlations could be calculated for averaging.")

correlation_string_dim_p = ', '.join([f"{c:.4f}" for c in correlations])
# Update cell E3 with the full string
worksheet.update('E10', [[correlation_string_dim_p]])

from sklearn.metrics import r2_score

# R-squared
print("\nR-squared scores:")
r2_valence_dim_p = r2_score(y_test_dim_p[:, 0], y_pred_dim_p[:, 0])
print("  valence =", r2_valence_dim_p)

r2_tension_dim_p = r2_score(y_test_dim_p[:, 1], y_pred_dim_p[:, 1])
print("  tension =", r2_tension_dim_p)

r2_energy_dim_p = r2_score(y_test_dim_p[:, 2], y_pred_dim_p[:, 2])
print("  energy =", r2_energy_dim_p)

r2_like_dim_p = r2_score(y_test_dim_p[:, 3], y_pred_dim_p[:, 3])
print("  like =", r2_like_dim_p)

worksheet.update(
    range_name='E14',
    values=[[f"{r2_valence_dim_p:.4f}, {r2_tension_dim_p:.4f}, {r2_energy_dim_p:.4f}, {r2_like_dim_p:.4f}"]]
)


Shape of predictions (y_pred): (791, 4)
First 5 actual values (y_test):
[[3.16 4.31 2.02 5.18]
 [8.86 8.86 8.78 8.53]
 [8.14 5.35 3.35 7.12]
 [5.02 5.01 5.99 5.98]
 [5.04 4.31 6.31 5.45]]
First 5 predicted values (y_pred):
[[4.688132  3.7876284 6.550198  3.8233213]
 [6.7588215 5.9544535 6.2454295 6.5196877]
 [4.533147  3.3442233 6.956966  3.627676 ]
 [5.015488  4.7724524 5.993473  4.9061947]
 [3.3465247 3.6918082 3.5279071 3.935307 ]]

Mean Absolute Percentage Error (MAPE): 46.64%



  worksheet.update('E12', [[f"{mape_dim_p:.2f}%"]])
  worksheet.update('E9', [[f"{mae_dim_p:.4f}"]])


Mean Absolute Error (MAE): 1.4678
Root Mean Squared Error (RMSE): 1.8376

Pearson Correlation Coefficients (per dimension):
  Positive Dimension: 0.5678
  Relaxed Dimension: 0.5231
  Awake Dimension: 0.4450
  Like Dimension: 0.5764


  worksheet.update('E13', [[f"{rmse_dim_p:.4f}"]])


  Average Pearson Correlation across dimensions: 0.5281

R-squared scores:
  valence = 0.3218567852526789
  tension = 0.26821870545759763
  energy = 0.193145514413954
  like = 0.32939065104294774


  worksheet.update('E10', [[correlation_string_dim_p]])


{'spreadsheetId': '1DG3jWi9nWZ2hEzOsO5t6xtNgsmDJ8sdKOmGxJZKkMl8',
 'updatedRange': "'htsat-unfused'!E14",
 'updatedRows': 1,
 'updatedColumns': 1,
 'updatedCells': 1}

In [120]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred_disc_i = mlp_regressor_disc_i.predict(X_test_disc_i)

print(f"\nShape of predictions (y_pred): {y_pred_disc_i.shape}")
print(f"First 5 actual values (y_test):\n{y_test_disc_i[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred_disc_i[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error_disc_i = np.abs((y_test_disc_i - y_pred_disc_i) / y_test_disc_i) * 100
mape_disc_i = np.mean(absolute_percentage_error_disc_i)
worksheet.update('E19', [[f"{mape_disc_i:.2f}%"]])
print(f"Mean Absolute Percentage Error (MAPE): {mape_disc_i:.2f}%\n")

# Mean Absolute Error (MAE)
mae_disc_i = mean_absolute_error(y_test_disc_i, y_pred_disc_i)
worksheet.update('E16', [[f"{mae_disc_i:.4f}"]])
print(f"Mean Absolute Error (MAE): {mae_disc_i:.4f}")

# Root Mean Squared Error (RMSE)
rmse_disc_i = np.sqrt(mean_squared_error(y_test_disc_i, y_pred_disc_i))
worksheet.update('E20', [[f"{rmse_disc_i:.4f}"]])
print(f"Root Mean Squared Error (RMSE): {rmse_disc_i:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['Happiness', 'Sadness', 'Anger', 'Tenderness', 'Fear']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test_disc_i[:, i]) > 1e-6 and np.std(y_pred_disc_i[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test_disc_i[:, i], y_pred_disc_i[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test_disc_i.shape[1]):
    if np.std(y_test_disc_i[:, i]) > 1e-6 and np.std(y_pred_disc_i[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test_disc_i[:, i], y_pred_disc_i[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    worksheet.update(
        range_name='E18',
        values=[[f"{average_correlation:.4f}"]]
      )
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
else:
    print("  No correlations could be calculated for averaging.")

correlation_string_disc_i = ', '.join([f"{c:.4f}" for c in correlations])
# Update cell E3 with the full string
worksheet.update('E17', [[correlation_string_disc_i]])

from sklearn.metrics import r2_score

# R-squared IDisc
print("\nR-squared scores (IDisc):")
r2_happiness_disc_i = r2_score(y_test_disc_i[:, 0], y_pred_disc_i[:, 0])
print("  happiness =", r2_happiness_disc_i)
r2_sadness_disc_i = r2_score(y_test_disc_i[:, 1], y_pred_disc_i[:, 1])
print("  sadness =", r2_sadness_disc_i)
r2_anger_disc_i = r2_score(y_test_disc_i[:, 2], y_pred_disc_i[:, 2])
print("  anger =", r2_anger_disc_i)
r2_tenderness_disc_i = r2_score(y_test_disc_i[:, 3], y_pred_disc_i[:, 3])
print("  tenderness =", r2_tenderness_disc_i)
r2_fear_disc_i = r2_score(y_test_disc_i[:, 4], y_pred_disc_i[:, 4])
print("  fear =", r2_fear_disc_i)

worksheet.update(
    range_name='E21',
    values=[[f"{r2_happiness_disc_i:.4f}, {r2_sadness_disc_i:.4f}, {r2_anger_disc_i:.4f}, {r2_tenderness_disc_i:.4f}, {r2_fear_disc_i:.4f}"]]
)


Shape of predictions (y_pred): (779, 5)
First 5 actual values (y_test):
[[1.   1.   1.   1.   1.  ]
 [4.23 6.82 5.75 5.13 5.69]
 [1.28 1.24 1.15 1.18 1.23]
 [1.   1.   2.4  1.   1.54]
 [1.36 4.73 1.07 3.37 1.94]]
First 5 predicted values (y_pred):
[[1.8899946 3.2895775 3.0191157 2.535004  2.6511407]
 [1.8770005 2.957187  4.0346847 2.291461  3.0450397]
 [2.8302734 2.1073637 1.4187726 2.465417  2.6810124]
 [1.797071  2.999306  4.9705105 2.2446218 3.549974 ]
 [2.498596  3.0057156 2.2795243 2.9316654 2.039636 ]]

Mean Absolute Percentage Error (MAPE): 84.41%



  worksheet.update('E19', [[f"{mape_disc_i:.2f}%"]])
  worksheet.update('E16', [[f"{mae_disc_i:.4f}"]])


Mean Absolute Error (MAE): 1.6081
Root Mean Squared Error (RMSE): 2.0361

Pearson Correlation Coefficients (per dimension):
  Happiness Dimension: 0.3998
  Sadness Dimension: 0.2101
  Anger Dimension: 0.4797
  Tenderness Dimension: 0.2714
  Fear Dimension: 0.3112


  worksheet.update('E20', [[f"{rmse_disc_i:.4f}"]])


  Average Pearson Correlation across dimensions: 0.3345

R-squared scores (IDisc):
  happiness = 0.15979907409434768
  sadness = 0.04216904075514838
  anger = 0.2267517744816837
  tenderness = 0.0720347921747786
  fear = 0.09626577986871132


  worksheet.update('E17', [[correlation_string_disc_i]])


{'spreadsheetId': '1DG3jWi9nWZ2hEzOsO5t6xtNgsmDJ8sdKOmGxJZKkMl8',
 'updatedRange': "'htsat-unfused'!E21",
 'updatedRows': 1,
 'updatedColumns': 1,
 'updatedCells': 1}

In [121]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

y_pred_disc_p = mlp_regressor_disc_p.predict(X_test_disc_p)

print(f"\nShape of predictions (y_pred): {y_pred_disc_p.shape}")
print(f"First 5 actual values (y_test):\n{y_test_disc_p[:5]}")
print(f"First 5 predicted values (y_pred):\n{y_pred_disc_p[:5]}\n")

# Evaluation Metrics:

# Mean Absolute Percentage Error (MAPE)
absolute_percentage_error_disc_p = np.abs((y_test_disc_p - y_pred_disc_p) / y_test_disc_p) * 100
mape_disc_p = np.mean(absolute_percentage_error_disc_p)
worksheet.update('E26', [[f"{mape_disc_p:.2f}%"]])
print(f"Mean Absolute Percentage Error (MAPE): {mape_disc_p:.2f}%\n")

# Mean Absolute Error (MAE)
mae_disc_p = mean_absolute_error(y_test_disc_p, y_pred_disc_p)
worksheet.update('E23', [[f"{mae_disc_p:.4f}"]])
print(f"Mean Absolute Error (MAE): {mae_disc_p:.4f}")

# Root Mean Squared Error (RMSE)
rmse_disc_p = np.sqrt(mean_squared_error(y_test_disc_p, y_pred_disc_p))
worksheet.update('E27', [[f"{rmse_disc_p:.4f}"]])
print(f"Root Mean Squared Error (RMSE): {rmse_disc_p:.4f}")

# Pearson Correlation Coefficient (per dimension)
print("\nPearson Correlation Coefficients (per dimension):")
for i, dim_name in enumerate(['Happiness', 'Sadness', 'Anger', 'Tenderness', 'Fear']):
    # Check for sufficient variance to calculate correlation
    if np.std(y_test_disc_p[:, i]) > 1e-6 and np.std(y_pred_disc_p[:, i]) > 1e-6:
        correlation, _ = pearsonr(y_test_disc_p[:, i], y_pred_disc_p[:, i])
        print(f"  {dim_name} Dimension: {correlation:.4f}")
    else:
        print(f"  {dim_name} Dimension: Cannot calculate (insufficient variance in data for this dimension)")

correlations = []
for i in range(y_test_disc_p.shape[1]):
    if np.std(y_test_disc_p[:, i]) > 1e-6 and np.std(y_pred_disc_p[:, i]) > 1e-6:
        correlations.append(pearsonr(y_test_disc_p[:, i], y_pred_disc_p[:, i])[0])
if correlations:
    average_correlation = np.mean(correlations)
    worksheet.update(
        range_name='E25',
        values=[[f"{average_correlation:.4f}"]]
      )
    print(f"  Average Pearson Correlation across dimensions: {average_correlation:.4f}")
else:
    print("  No correlations could be calculated for averaging.")

correlation_string_disc_p = ', '.join([f"{c:.4f}" for c in correlations])
# Update cell E3 with the full string
worksheet.update('E24', [[correlation_string_disc_p]])

from sklearn.metrics import r2_score

# R-squared PDisc
print("\nR-squared scores (PDisc):")
r2_happiness_disc_p = r2_score(y_test_disc_p[:, 0], y_pred_disc_p[:, 0])
print("  happiness =", r2_happiness_disc_p)
r2_sadness_disc_p = r2_score(y_test_disc_p[:, 1], y_pred_disc_p[:, 1])
print("  sadness =", r2_sadness_disc_p)
r2_anger_disc_p = r2_score(y_test_disc_p[:, 2], y_pred_disc_p[:, 2])
print("  anger =", r2_anger_disc_p)
r2_tenderness_disc_p = r2_score(y_test_disc_p[:, 3], y_pred_disc_p[:, 3])
print("  tenderness =", r2_tenderness_disc_p)
r2_fear_disc_p = r2_score(y_test_disc_p[:, 4], y_pred_disc_p[:, 4])
print("  fear =", r2_fear_disc_p)

worksheet.update(
    range_name='E28',
    values=[[f"{r2_happiness_disc_p:.4f}, {r2_sadness_disc_p:.4f}, {r2_anger_disc_p:.4f}, {r2_tenderness_disc_p:.4f}, {r2_fear_disc_p:.4f}"]]
)

average_r2 = (r2_valence + r2_energy + r2_tension) /3
worksheet.update('E8', [[f"{average_r2:.4f}"]])

average_r2_dim_p = (r2_valence_dim_p + r2_energy_dim_p + r2_tension_dim_p) /3
worksheet.update('E15', [[f"{average_r2_dim_p:.4f}"]])

average_r2_disc_i = (r2_happiness_disc_i + r2_sadness_disc_i + r2_anger_disc_i + r2_tenderness_disc_i + r2_fear_disc_i) / 5
worksheet.update('E22', [[f"{average_r2_disc_i:.4f}"]])

average_r2_disc_p = (r2_happiness_disc_p + r2_sadness_disc_p + r2_anger_disc_p + r2_tenderness_disc_p + r2_fear_disc_p) / 5
worksheet.update('E29', [[f"{average_r2_disc_p:.4f}"]])

print("\nAverage R-squared scores:")
print("IDim: ", average_r2)
print("PDim: ", average_r2_dim_p)
print("IDisc: ", average_r2_disc_i)
print("PDisc: ", average_r2_disc_p)


Shape of predictions (y_pred): (767, 5)
First 5 actual values (y_test):
[[2.   6.04 1.86 1.01 1.01]
 [1.   6.06 1.   2.99 1.  ]
 [1.   2.01 4.02 1.   1.  ]
 [1.81 5.81 4.99 3.35 1.61]
 [1.26 3.29 2.45 1.26 6.45]]
First 5 predicted values (y_pred):
[[1.9326186 4.9623857 3.2431555 2.8338242 3.2725585]
 [1.4263269 4.5049353 5.2519917 2.200772  4.7984333]
 [2.3997126 4.1089883 3.6924589 2.9121454 4.206098 ]
 [2.2625172 3.5101078 4.7747307 2.3624127 5.0357676]
 [2.9475935 4.446949  2.378245  3.5748682 3.1526537]]

Mean Absolute Percentage Error (MAPE): 89.25%



  worksheet.update('E26', [[f"{mape_disc_p:.2f}%"]])
  worksheet.update('E23', [[f"{mae_disc_p:.4f}"]])


Mean Absolute Error (MAE): 1.8062
Root Mean Squared Error (RMSE): 2.1977

Pearson Correlation Coefficients (per dimension):
  Happiness Dimension: 0.4022
  Sadness Dimension: 0.3674
  Anger Dimension: 0.4205
  Tenderness Dimension: 0.3262
  Fear Dimension: 0.4113


  worksheet.update('E27', [[f"{rmse_disc_p:.4f}"]])


  Average Pearson Correlation across dimensions: 0.3855

R-squared scores (PDisc):
  happiness = 0.14399436021302658
  sadness = 0.13394152221845168
  anger = 0.16294447496930975
  tenderness = 0.10269006724394403
  fear = 0.16583585931881928


  worksheet.update('E24', [[correlation_string_disc_p]])
  worksheet.update('E8', [[f"{average_r2:.4f}"]])
  worksheet.update('E15', [[f"{average_r2_dim_p:.4f}"]])
  worksheet.update('E22', [[f"{average_r2_disc_i:.4f}"]])



Average R-squared scores:
IDim:  0.24867412019377333
PDim:  0.2610736683747435
IDisc:  0.11940409227493394
PDisc:  0.14188125679271027


  worksheet.update('E29', [[f"{average_r2_disc_p:.4f}"]])
