In [1]:
import os
import pandas as pd
import pickle
from scalers import scaler_identity

with open(os.path.join('logs','scalers_07','gridsearch_results.pkl'), 'rb') as f:
    grid_search_results = pickle.load(f)

In [2]:
print(len(grid_search_results))
for exp in grid_search_results:
    print(f"scaler: {exp["dataset-scaler"]}")
    #print(f"epochs: {exp["training-epochs"]}")
    folds = exp["metrics_per_fold"]
    for i, fold in enumerate(folds):
        print(f"\tFold {i+1}:")
        for metric, values in fold.items():
            scaled_val = values["scaled"]
            unscaled_val = values["unscaled"]
            print(f"\t\t{metric}: {scaled_val.metric:.6f} ({unscaled_val.metric:.6f})\n\t\t\tper_sample s (u):\n\t\t\t\t {scaled_val.metric_per_sample}\n\t\t\t\t({unscaled_val.metric_per_sample})")
    print()
    print("######################################")

9
scaler: FunctionTransformer(func=<function scaler_identity at 0x00000195B6E52FC0>,
                    inverse_func=<function scaler_identity at 0x00000195B6E52FC0>)
	Fold 1:
		mean_squared_error: 476430.562500 (476430.562500)
			per_sample s (u):
				 [454893.5, 505970.2, 463131.1, 461699.3, 453427.25, 456546.72, 471480.8, 497924.56, 488925.06, 481929.9, 511027.53, 462497.12, 453615.66, 527666.7, 465418.75, 514712.9, 455429.44, 453316.47, 453430.62, 461307.44, 498950.47, 453398.97, 459644.62, 488783.53, 481660.4, 510914.5, 455067.0, 468686.22, 515603.0, 459818.28, 484999.34, 455157.53, 461684.75, 492656.1, 483121.7, 453395.78, 453223.44, 517964.16, 455741.53, 482657.4, 510406.03, 486037.16, 461100.16, 479535.97, 474816.1]
				([454893.5, 505970.2, 463131.1, 461699.3, 453427.25, 456546.72, 471480.8, 497924.56, 488925.06, 481929.9, 511027.53, 462497.12, 453615.66, 527666.7, 465418.75, 514712.9, 455429.44, 453316.47, 453430.62, 461307.44, 498950.47, 453398.97, 459644.62, 488783.53, 481

1. 
   - For each experiment, get all samples. 
   - The samples are ordered for each experiment in the same way. 

In [3]:
from sklearn.preprocessing import FunctionTransformer
import re

exp_samples = {}
for i, exp in enumerate(grid_search_results):
    #if i == 1:
    #    break
    exp_name = exp["dataset-scaler"]
    if isinstance(exp_name, FunctionTransformer):
        exp_name = exp_name.func.__name__
        exp_name = exp_name.replace("scaler_", "")
        #continue
    elif not isinstance(exp_name, str):
        exp_name = exp_name.__class__.__name__
    exp_name = exp_name.replace("()", "")
    exp_name = exp_name.replace("Scaler", "")
    exp_name = re.sub(r'(?<!^)(?=[A-Z])', ' ', exp_name).lower()
    exp_name = exp_name.capitalize()

    exp_samples[exp_name] = []
    folds = exp["metrics_per_fold"]
    for fold in folds:
        exp_samples[exp_name].extend([fold["root_mean_squared_error"]["unscaled"].metric])
    #print(len(exp_samples[exp_name]))

for exp_name, samples in exp_samples.items():
    print(f"{exp_name}: {len(samples)}")
    print(f"mean: {sum(samples)/len(samples)}")
    print(f"std: {pd.Series(samples).std()}")
    


Identity: 10
mean: 703.7726013183594
std: 171.64804077148438
Global min max: 10
mean: 3.614452528953552
std: 0.7339891791343689
Global max abs: 10
mean: 6.6939128875732425
std: 1.3422720432281494
Global standard: 10
mean: 1.704558402299881
std: 0.6470454335212708
Global robust: 10
mean: 1.778327763080597
std: 0.45307657122612
Min max: 10
mean: 4.093721401691437
std: 2.513179063796997
Max abs: 10
mean: 6.315600490570068
std: 1.3525644540786743
Standard: 10
mean: 1.6160909414291382
std: 0.3287510871887207
Robust: 10
mean: 1.8564866483211517
std: 0.5554167032241821


In [4]:
from scipy import stats
import statsmodels.stats.multitest
from critdd import Diagram
from itertools import combinations
import numpy as np
import pandas as pd

samples = list(exp_samples.values())
print(np.array(samples).shape)
friedman_result = stats.friedmanchisquare(*samples)
friedman_result

(9, 10)


FriedmanchisquareResult(statistic=67.89333333333337, pvalue=1.2891451827333526e-11)

In [5]:
combs = combinations(samples, 2)

pvals = []
for comb in combs:
    pvals.append(stats.wilcoxon(comb[0], comb[1]).pvalue)

pvals = np.array(pvals)
pvals.sort()
print(pvals)

for i in range(len(pvals)):
    if pvals[i] > 0.05/(len(pvals)-i):
        print(f"{pvals[i]} is not st. significant")
        print(f"{0.05/(len(pvals)-i)} is the threshold")
        break
    else:
        print(f"{pvals[i]} is st. significant")

statsmodels.stats.multitest.multipletests(pvals, method="holm")

[0.00195312 0.00195312 0.00195312 0.00195312 0.00195312 0.00195312
 0.00195312 0.00195312 0.00195312 0.00195312 0.00195312 0.00195312
 0.00195312 0.00195312 0.00195312 0.00195312 0.00195312 0.00195312
 0.00195312 0.00195312 0.00195312 0.00195312 0.00195312 0.00585938
 0.01367188 0.02734375 0.02734375 0.02734375 0.06445312 0.55664062
 0.625      0.625      0.6953125  0.76953125 0.921875   1.        ]
0.001953125 is not st. significant
0.001388888888888889 is the threshold


(array([False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False]),
 array([0.0703125 , 0.0703125 , 0.0703125 , 0.0703125 , 0.0703125 ,
        0.0703125 , 0.0703125 , 0.0703125 , 0.0703125 , 0.0703125 ,
        0.0703125 , 0.0703125 , 0.0703125 , 0.0703125 , 0.0703125 ,
        0.0703125 , 0.0703125 , 0.0703125 , 0.0703125 , 0.0703125 ,
        0.0703125 , 0.0703125 , 0.0703125 , 0.07617188, 0.1640625 ,
        0.30078125, 0.30078125, 0.30078125, 0.515625  , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        ]),
 0.0014237991678133222,
 0.001388888888888889)

In [6]:
diag_df = pd.DataFrame(exp_samples)
diag_df.columns

Index(['Identity', 'Global min max', 'Global max abs', 'Global standard',
       'Global robust', 'Min max', 'Max abs', 'Standard', 'Robust'],
      dtype='object')

In [7]:
exp_samples.values()

dict_values([[690.2395, 533.7556, 635.63354, 516.0511, 638.2398, 561.492, 851.0142, 662.54456, 1004.5744, 944.1813], [4.059056, 3.069059, 2.7742832, 4.3655853, 2.9209526, 4.027797, 2.4854548, 4.0626245, 4.5732493, 3.8064637], [4.9465475, 5.825639, 7.46285, 5.812009, 7.688271, 7.0280805, 8.978247, 4.689832, 7.0574856, 7.4501677], [1.4960804, 1.915028, 1.6664538, 0.9441982, 1.0882552, 1.6468424, 1.5249391, 3.3313396, 1.8550345, 1.577413], [2.5345964, 1.5269696, 1.355432, 1.419731, 2.3250601, 1.4881922, 2.22599, 1.2926453, 2.0121274, 1.6025335], [3.1120903, 8.727079, 7.085992, 3.6923654, 6.9630866, 1.9099075, 2.639161, 2.248191, 2.590037, 1.9693036], [5.8246846, 6.935252, 4.8240685, 7.737423, 4.5136, 5.59569, 8.948892, 5.5933933, 6.259735, 6.923267], [1.361229, 1.9114362, 1.4924686, 1.8523979, 1.7234977, 1.9993126, 0.9739499, 1.4738141, 1.4138962, 1.9589071], [1.8852024, 1.9978439, 1.6722597, 1.9970117, 0.9601031, 2.9457135, 1.1991915, 2.0120327, 1.6020036, 2.2935045]])

In [8]:
x=np.array(list(exp_samples.values()))
treatment_names = list(exp_samples.keys())
x = x.T
diagram = Diagram(x,
    treatment_names=treatment_names,
    maximize_outcome = False,
)

print(diagram.maximize_outcome)

sorted_treatments = sorted(zip(diagram.treatment_names, diagram.average_ranks), key=lambda x: x[1])
for name, avg_r in sorted_treatments:
    print(f"{name}: {avg_r}")

#groups = diagram.get_groups(alpha=0.05, adjustment="holm", return_names=True)
#print("Groups:", groups)

#print(diagram.average_ranks)
#diagram.get_groups(alpha=0.05, adjustment="holm")
diagram.to_file(
    "scalers_per_fold.tex",
    alpha=0.05,
    adjustment="holm",
    reverse_x=True,
    #axis_options = {"title", "Normalization methods"}
)

False
Standard: 2.2
Global robust: 2.4
Global standard: 2.5
Robust: 3.3
Min max: 5.4
Global min max: 5.6
Max abs: 7.1
Global max abs: 7.5
Identity: 9.0
