In [31]:
import pandas as pd
import re
import numpy as np
from scipy.stats import wilcoxon


def parse_gain_tuple(gain_str):
    # Extracts the float values from a string such as:
    # (np.float64(360419120.4397801), np.float64(360334000.0))
    matches = re.findall(r'np\.float64\(([\d\.eE+-]+)\)', gain_str)
    if len(matches) != 2:
        raise ValueError(f"Unexpected gain string format: {gain_str}")
    return float(matches[0]), float(matches[1])


In [32]:
results = pd.read_csv('results.csv')
results[['moving_average_rev', 'mean_over_best_gain']] = results['gain'].apply(parse_gain_tuple).apply(pd.Series)


In [34]:
results = results.drop(columns=['gain'])
results.to_csv('results.csv', index=False)

In [27]:
baseline = results[results['experiment_name'] == 'zero_shot_baseline']
zero_clean = results[results['experiment_name'] == 'zero_shot_zero_clean']
percentile_1 = results[results['experiment_name'] == 'zero_shot_percentiles_1']
percentile_5 = results[results['experiment_name'] == 'zero_shot_percentiles_5']


In [29]:
alpha = 0.05
baseline_ganancias = baseline["moving_average_rev"].values
for other_ganancias, name in [(zero_clean["moving_average_rev"].values, "zero_clean"), (percentile_1["moving_average_rev"].values, "percentile_1"), (percentile_5["moving_average_rev"].values, "percentile_5")]:
    statistic, p_value = wilcoxon(baseline_ganancias, other_ganancias, 
                                         alternative='greater')

    is_significant = p_value < alpha
    print(f"Experimento: {name}, p-value: {p_value}, significativo: {is_significant}, statistic: {statistic}")

Experimento: zero_clean, p-value: 9.5367431640625e-07, significativo: True, statistic: 210.0
Experimento: percentile_1, p-value: 9.5367431640625e-07, significativo: True, statistic: 210.0
Experimento: percentile_5, p-value: 9.5367431640625e-07, significativo: True, statistic: 210.0
