In [42]:
# NOTE: change this to the path in your setup
korsmit_exp1_path = "../../data/Korsmit/Exp1/"

In [24]:
import os
import pandas as pd
import numpy as np

In [64]:
preds_path = "centaur_responses_pos_rel_awa_170725.csv"

df = pd.read_csv(preds_path)

positive_preds = df["positive"].to_numpy()
relaxed_preds = df["relaxed"].to_numpy()
awake_preds = df["awake"].to_numpy()

all_preds = np.stack([positive_preds, relaxed_preds, awake_preds], axis=1)

In [51]:
IDim_path = korsmit_exp1_path+"Data/IDim/"
IDim_responses = []

all_dfs = []

for file in os.listdir(IDim_path):
    if file.endswith(".csv"):
        df = pd.read_csv(os.path.join(IDim_path, file))
        df.columns = df.columns.str.strip()
        required_columns = ['positive', 'relaxed', 'awake']
        if all(col in df.columns for col in required_columns):
            all_dfs.append(df[required_columns])

# Stack and compute mean along the third axis
stacked = np.stack(
    [df.values for df in all_dfs], axis=2
)  # shape: (rows, cols, num_dfs)
IDim_mean_reponses = np.mean(stacked, axis=2)  # shape: (rows, cols)

In [72]:
from scipy.stats import pearsonr # For Pearson correlation

# Comparison Method 1: Mean Absolute Error (MAE)
# MAE measures the average magnitude of the errors in a set of predictions, without considering their direction.
mae = np.mean(np.abs(all_preds - IDim_mean_reponses))
print(f"Mean Absolute Error (MAE) between all_preds and human_ratings: {mae:.4f}\n")

# Comparison Method 2: Mean Absolute Percentage Error (MAPE)
# MAPE measures the accuracy of a forecasting method in terms of percentage.
# Formula: MAPE = (1/n) * sum(|(Actual - Forecast) / Actual|) * 100%

# Calculate the absolute percentage error for each element
# Since human responses are between 1 and 9, division by zero is not a concern.
absolute_percentage_error = np.abs((IDim_mean_reponses - all_preds) / IDim_mean_reponses) * 100

# Calculate the mean of these percentage errors
mape = np.mean(absolute_percentage_error)

print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%\n")

# Comparison Method 3: Root Mean Squared Error (RMSE)
# RMSE measures the square root of the average of the squared differences between predicted and actual values.
# It gives a relatively high weight to large errors.
# Formula: RMSE = sqrt(mean((Actual - Forecast)^2))

# Calculate the squared differences
squared_differences = (IDim_mean_reponses - all_preds)**2

# Calculate the mean of the squared differences (Mean Squared Error - MSE)
mse = np.mean(squared_differences)

# Calculate the square root to get RMSE
rmse = np.sqrt(mse)

print(f"Root Mean Squared Error (RMSE): {rmse:.4f}\n")

# Comparison Method 4: Pearson Correlation Coefficient (per column)
# Pearson correlation measures the linear relationship between two sets of data.
# We'll calculate it for each of the three columns (dimensions).

correlation_positive = pearsonr(all_preds[:, 0], IDim_mean_reponses[:, 0])[0]
correlation_relaxed = pearsonr(all_preds[:, 1], IDim_mean_reponses[:, 1])[0]
correlation_awake = pearsonr(all_preds[:, 2], IDim_mean_reponses[:, 2])[0]

assert isinstance(correlation_positive, np.float64)
assert isinstance(correlation_relaxed, np.float64)
assert isinstance(correlation_awake, np.float64)

print("Pearson Correlation Coefficients (between all_preds and human_ratings):")
print(f"  Positive Dimension: {correlation_positive:.4f}")
print(f"  Relaxed Dimension:  {correlation_relaxed:.4f}")
print(f"  Awake Dimension:    {correlation_awake:.4f}")

average_correlation = (correlation_positive + correlation_relaxed + correlation_awake) / 3
print(f"  Average Correlation: {average_correlation:.4f}")

from sklearn.metrics import r2_score

# R-squared
print("\nR-squared scores:")
r2_valence = r2_score(IDim_mean_reponses[:, 0], all_preds[:, 0])
print("  valence =", r2_valence)

r2_tension = r2_score(IDim_mean_reponses[:, 1], all_preds[:, 1])
print("  tension =", r2_tension)

r2_energy = r2_score(IDim_mean_reponses[:, 2], all_preds[:, 2])
print("  energy =", r2_energy)

r2_overall = r2_score(IDim_mean_reponses, all_preds)
print("\nOverall R-squared_score =", r2_overall)


Mean Absolute Error (MAE) between all_preds and human_ratings: 1.9235

Mean Absolute Percentage Error (MAPE): 42.39%

Root Mean Squared Error (RMSE): 2.3560

Pearson Correlation Coefficients (between all_preds and human_ratings):
  Positive Dimension: 0.0472
  Relaxed Dimension:  -0.0066
  Awake Dimension:    0.0352
  Average Correlation: 0.0253

R-squared scores:
  valence = -3.5813092321065625
  tension = -3.4057039405999605
  energy = -10.366481227825673

Overall R-squared_score = -5.784498133510733
