In [None]:
import os
import glob
import pandas as pd
import numpy as np
from scipy.stats import pearsonr

LOGFOLDER = "logs/"

In [None]:
for filename in glob.glob(os.path.join(LOGFOLDER, "**", "out.csv")):

    # Load the CSV file
    data = pd.read_csv(filename)

    # Convert 'pred' and 'ref' columns to float
    data['pred'] = pd.to_numeric(data['pred'], errors='coerce')
    data['ref'] = pd.to_numeric(data['ref'], errors='coerce')

    # Drop rows with NaN values in 'pred' or 'ref' after conversion
    filtered_data = data.dropna(subset=['pred', 'ref'])

    # Calculate Mean average error
    mae = np.mean(np.abs(filtered_data['pred'] - filtered_data['ref']))

    # Calculate the Pearson correlation coefficient
    correlation, p_value = pearsonr(filtered_data['pred'], filtered_data['ref'])

    print(f"---- {filename} ----")
    print(f"Mean average error (MAE): {mae}")
    print(f"Pearson correlation coefficient (R): {correlation}")
    print(f"P-value: {p_value}")
    print("-" * 38)