In [None]:
import pandas as pd
import numpy as np
import os
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
file_path = '/content/drive/My Drive/HSE Agents RL/Experiments/'
exp_1_df = pd.read_csv(os.path.join(file_path, 'Exp_1/Results/exp_1_aggregated_results.csv'))
exp_2_df = pd.read_csv(os.path.join(file_path, 'Exp_2/Results/exp_2_aggregated_results.csv'))
exp_3_df = pd.read_csv(os.path.join(file_path, 'Exp_3/Results/exp_3_aggregated_results.csv'))
exp_4_df = pd.read_csv(os.path.join(file_path, 'Exp_4/Results/exp_4_aggregated_results.csv'))

In [None]:
def calculate_mean_confidence_interval(data, confidence=0.9):
    mean = np.mean(data)
    sem = stats.sem(data)
    margin = sem * stats.t.ppf((1 + confidence) / 2., len(data) - 1)
    return mean, margin

metrics = ['total_reward', 'total_regret', 'average_reward', 'best_handle_probability']
results = {}

exp_list = [exp_1_df, exp_2_df, exp_3_df, exp_4_df]
for metric in metrics:
    experiment_results = []
    for exp_df in exp_list:
        exp_mean, exp_margin = calculate_mean_confidence_interval(exp_df[metric])
        experiment_results.append((exp_mean, exp_margin))

    p_values = []
    t_stats = []
    for i in range(len(exp_list)):
        for j in range(i + 1, len(exp_list)):
            t_stat, p_value = stats.ttest_ind(exp_list[i][metric], exp_list[j][metric])
            t_stats.append(t_stat)
            p_values.append(p_value)

    results[metric] = {
        'means_margins': experiment_results,
        't_stats': t_stats,
        'p_values': p_values
    }

significance_level = 0.1
for metric, result in results.items():
    print(f"{metric}:")
    for i, exp_result in enumerate(result['means_margins']):
        print(f"  Experiment {i+1}: {exp_result[0]} ± {exp_result[1]}")
    index = 0
    for i in range(len(exp_list)):
        for j in range(i + 1, len(exp_list)):
            significance = 'significant' if result['p_values'][index] < significance_level else 'not significant'
            print(f"  t-statistic (Exp {i+1} vs Exp {j+1}): {result['t_stats'][index]}, p-value: {result['p_values'][index]} ({significance})")
            index += 1
    print()

total_reward:
  Experiment 1: 12.9 ± 1.3906809093262
  Experiment 2: 15.2 ± 1.915587448333623
  Experiment 3: 13.0 ± 2.0777027090722364
  Experiment 4: 14.2 ± 2.1131657179951486
  t-statistic (Exp 1 vs Exp 2): -1.7400432399688577, p-value: 0.09117122678853691 (significant)
  t-statistic (Exp 1 vs Exp 3): -0.0730420000787267, p-value: 0.9422419543010665 (not significant)
  t-statistic (Exp 1 vs Exp 4): -0.9291641968594214, p-value: 0.3607456612179011 (not significant)
  t-statistic (Exp 2 vs Exp 3): 1.3796851419714535, p-value: 0.17943469980226617 (not significant)
  t-statistic (Exp 2 vs Exp 4): 0.6123724356957945, p-value: 0.5462986147180486 (not significant)
  t-statistic (Exp 3 vs Exp 4): -0.7179453559770287, p-value: 0.48070248458823084 (not significant)

total_regret:
  Experiment 1: 17.1 ± 1.3906809093262
  Experiment 2: 14.8 ± 1.915587448333623
  Experiment 3: 17.0 ± 2.0777027090722364
  Experiment 4: 15.8 ± 2.1131657179951486
  t-statistic (Exp 1 vs Exp 2): 1.740043239968859, p

In [None]:
exp_1_df['Experiment'] = 'Experiment 1'
exp_2_df['Experiment'] = 'Experiment 2'
exp_3_df['Experiment'] = 'Experiment 3'
exp_4_df['Experiment'] = 'Experiment 4'
combined_df = pd.concat([exp_1_df, exp_2_df, exp_3_df, exp_4_df])

In [None]:
def bootstrap(data, num_samples=10000):
    n = len(data)
    samples = np.random.choice(data, (num_samples, n), replace=True)
    return np.mean(samples, axis=1)

bootstrap_results = {}
ci_results = {}
for metric in metrics:
    bootstrap_results[metric] = {}
    ci_results[metric] = {}
    for i, exp_df in enumerate(exp_list):
        bootstrapped_data = bootstrap(exp_df[metric].values)
        bootstrap_results[metric][f'exp_{i+1}'] = bootstrapped_data
        ci_results[metric][f'exp_{i+1}'] = np.percentile(bootstrapped_data, [10, 90])

    for i in range(len(exp_list)):
        print(f"{metric} - Эксперимент {i+1}: {np.mean(exp_list[i][metric]):.3f} (90% CI: {ci_results[metric][f'exp_{i+1}']}])")
    print()

total_reward - Эксперимент 1: 12.900 (90% CI: [11.9 13.9]])
total_reward - Эксперимент 2: 15.200 (90% CI: [13.8        16.53333333]])
total_reward - Эксперимент 3: 13.000 (90% CI: [11.53846154 14.38461538]])
total_reward - Эксперимент 4: 14.200 (90% CI: [12.8 15.6]])

total_regret - Эксперимент 1: 17.100 (90% CI: [16.1 18.1]])
total_regret - Эксперимент 2: 14.800 (90% CI: [13.46666667 16.2       ]])
total_regret - Эксперимент 3: 17.000 (90% CI: [15.53846154 18.46153846]])
total_regret - Эксперимент 4: 15.800 (90% CI: [14.4 17.2]])

average_reward - Эксперимент 1: 0.429 (90% CI: [0.396  0.4625]])
average_reward - Эксперимент 2: 0.505 (90% CI: [0.45866667 0.54933333]])
average_reward - Эксперимент 3: 0.433 (90% CI: [0.38384615 0.48076923]])
average_reward - Эксперимент 4: 0.472 (90% CI: [0.425 0.518]])

best_handle_probability - Эксперимент 1: 0.121 (90% CI: [0.099 0.143]])
best_handle_probability - Эксперимент 2: 0.221 (90% CI: [0.174      0.26866667]])
best_handle_probability - Экспери

In [None]:
anova_results = {}
for metric in metrics:
    model = ols(f'{metric} ~ C(Experiment)', data=combined_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    anova_results[metric] = anova_table
    print(f"ANOVA для {metric}:")
    print(anova_table)
    print()

ANOVA для total_reward:
                  sum_sq    df         F    PR(>F)
C(Experiment)   55.32069   3.0  1.205828  0.316448
Residual       825.80000  54.0       NaN       NaN

ANOVA для total_regret:
                  sum_sq    df         F    PR(>F)
C(Experiment)   55.32069   3.0  1.205828  0.316448
Residual       825.80000  54.0       NaN       NaN

ANOVA для average_reward:
                 sum_sq    df         F    PR(>F)
C(Experiment)  0.059853   3.0  1.178596  0.326494
Residual       0.914105  54.0       NaN       NaN

ANOVA для best_handle_probability:
                 sum_sq    df         F    PR(>F)
C(Experiment)  0.243254   3.0  6.336632  0.000924
Residual       0.690994  54.0       NaN       NaN



In [None]:
metrics = ['total_reward', 'total_regret', 'average_reward', 'best_handle_probability']
for metric in metrics:
    print(f"Tukey's HSD for {metric}:")
    tukey_result = pairwise_tukeyhsd(endog=combined_df[metric],
                                     groups=combined_df['Experiment'],
                                     alpha=0.05)
    print(tukey_result)
    print("\n")

Tukey's HSD for total_reward:
      Multiple Comparison of Means - Tukey HSD, FWER=0.05      
   group1       group2    meandiff p-adj   lower  upper  reject
---------------------------------------------------------------
Experiment 1 Experiment 2      2.3 0.3225 -1.2408 5.8408  False
Experiment 1 Experiment 3      0.1 0.9999 -3.5932 3.7932  False
Experiment 1 Experiment 4      1.3 0.8261 -2.7149 5.3149  False
Experiment 2 Experiment 3     -2.2 0.4537 -6.1282 1.7282  False
Experiment 2 Experiment 4     -1.0 0.9231 -5.2321 3.2321  False
Experiment 3 Experiment 4      1.2 0.8848 -3.1604 5.5604  False
---------------------------------------------------------------


Tukey's HSD for total_regret:
      Multiple Comparison of Means - Tukey HSD, FWER=0.05      
   group1       group2    meandiff p-adj   lower  upper  reject
---------------------------------------------------------------
Experiment 1 Experiment 2     -2.3 0.3225 -5.8408 1.2408  False
Experiment 1 Experiment 3     -0.1 0.9999 

In [None]:
experiments = ['Experiment 1', 'Experiment 2', 'Experiment 3', 'Experiment 4']
exp_data_frames = [exp_1_df, exp_2_df, exp_3_df, exp_4_df]
metrics = ['total_reward', 'total_regret', 'average_reward', 'best_handle_probability']

summary_data = {'Metric': metrics}
for exp, df in zip(experiments, exp_data_frames):
    mean_values = [f"{df[metric].mean():.3f} ± {df[metric].std():.3f}" for metric in metrics]
    summary_data[exp] = mean_values

summary_df = pd.DataFrame(summary_data)

print(summary_df)

Unnamed: 0,Metric,Experiment 1,Experiment 2,Experiment 3,Experiment 4
0,total_reward,12.900 ± 3.597,15.200 ± 4.212,13.000 ± 4.203,14.200 ± 3.645
1,total_regret,17.100 ± 3.597,14.800 ± 4.212,17.000 ± 4.203,15.800 ± 3.645
2,average_reward,0.429 ± 0.119,0.505 ± 0.140,0.433 ± 0.140,0.472 ± 0.122
3,best_handle_probability,0.121 ± 0.079,0.221 ± 0.148,0.122 ± 0.113,0.283 ± 0.111
