In [2]:
import os
import pandas as pd
import pickle


#exp_name = 'preprocessing_type'
#experiments = ["adam", "other", "rmsprop", "sgd"]
layernorm_path = os.path.join('logs_mac', 'multi_layer_layernorm')
general_multi_path = os.path.join('logs_mac', 'run_pending_experiments_small')
general_single_path = os.path.join('..', '07_single_layer_rnns', 'mac_logs', 'general_rnn_gridsearch')

def load_gridsearch_results(logs_path):
    results_path = os.path.join(logs_path, 'gridsearch_results.pkl')
    with open(results_path, 'rb') as f:
        results = pickle.load(f)
    return results

def add_param_to_results(results, param_name, param_value):
    for res in results:
        if param_value not in res:
            res[param_name] = param_value
    return results

def get_non_layernorm_data(layernorm_results: list, general_results: list):
    non_layernorm_results = []
    for lres in layernorm_results:
        for gres in general_results:
            if gres['model-layer_normalization'] == False and gres["model-dilation_base"] is None and gres["model-residual_block_size"] is None and gres["model-smyl_std"] == False and gres["model-smyl_residual"] == False:
                has_same_params = True
                for key in lres.keys():
                    if key not in ["model-layer_normalization", "metrics_per_fold"] and key in gres:
                        if lres[key] != gres[key]:
                            has_same_params = False
                            break
                if has_same_params:
                    non_layernorm_results.append(gres)
    return non_layernorm_results

def remove_layernorm_excessive_results(layernorm_results: list, general_results: list, excessive_keys: list = ["model-rnn_cell_params-units", "model-num_layers", "model-bidirectional"]):
    """There are results with 100 units and 8 layers which are not in general results - remove them"""
    to_be_removed = []
    for i, lres in enumerate(layernorm_results):
        if lres["model-rnn_cell_params-units"] == 20 and lres["model-num_layers"] == 4 and lres["model-bidirectional"] == True:
            print("FOUND")
        all_vals_in_results = False
        for gres in general_results:
            all_vals_in_cur_res = True
            for key in excessive_keys:
                if lres[key] != gres[key]:
                    all_vals_in_cur_res = False
                    break
            if all_vals_in_cur_res:
                all_vals_in_results = True
                break
        if not all_vals_in_results:
            to_be_removed.append(i)
    for i in reversed(to_be_removed):
        layernorm_results.pop(i)

single_layer_results = load_gridsearch_results(general_single_path)
lstm_50 = single_layer_results[2]
assert lstm_50["model-rnn_cell_params-units"] == 50 and lstm_50["model-cell_type"] == "lstm"
lstm_50 = [lstm_50]
lstm_50 = add_param_to_results(lstm_50, 'model-layer_normalization', False)
lstm_50 = add_param_to_results(lstm_50, 'model-bidirectional', False)
lstm_50 = add_param_to_results(lstm_50, 'model-num_layers', 1)

general_results = load_gridsearch_results(general_multi_path)
general_results = add_param_to_results(general_results, 'model-layer_normalization', False)

layernorm_results = load_gridsearch_results(layernorm_path)
non_layernorm_results = get_non_layernorm_data(layernorm_results, general_results) 
remove_layernorm_excessive_results(layernorm_results, non_layernorm_results, excessive_keys=["model-rnn_cell_params-units", "model-num_layers", "model-bidirectional"])

print(layernorm_results[0].keys())
print(len(layernorm_results))
print(len(non_layernorm_results))

def print_results_units_layers_and_bi(results):
    for i, res in enumerate(results):
        print(i)
        for key in res.keys():
            if key not in ["model-layer_normalization", "metrics_per_fold", "model-dilation_base", "model-residual_block_size", "model-smyl_std", "model-smyl_residual"]:
                print(key, res[key])

print("LAYERNORM")
print_results_units_layers_and_bi(layernorm_results)
print("NON LAYERNORM")
print_results_units_layers_and_bi(non_layernorm_results)
    
grid_search_results = layernorm_results + non_layernorm_results + lstm_50

def remove_units20(results):
    to_be_removed = []
    for i, res in enumerate(results):
        if res["model-rnn_cell_params-units"] == 20:
            to_be_removed.append(i)
    for i in reversed(to_be_removed):
        results.pop(i)
    return results

grid_search_results = remove_units20(grid_search_results)
print(len(grid_search_results))
    

FOUND
dict_keys(['model-rnn_cell_params-units', 'model-bidirectional', 'model-num_layers', 'model-layer_normalization', 'metrics_per_fold'])
8
8
LAYERNORM
0
model-rnn_cell_params-units 20
model-bidirectional False
model-num_layers 4
1
model-rnn_cell_params-units 20
model-bidirectional False
model-num_layers 6
2
model-rnn_cell_params-units 50
model-bidirectional True
model-num_layers 2
3
model-rnn_cell_params-units 50
model-bidirectional True
model-num_layers 4
4
model-rnn_cell_params-units 50
model-bidirectional True
model-num_layers 6
5
model-rnn_cell_params-units 50
model-bidirectional False
model-num_layers 2
6
model-rnn_cell_params-units 50
model-bidirectional False
model-num_layers 4
7
model-rnn_cell_params-units 50
model-bidirectional False
model-num_layers 6
NON LAYERNORM
0
model-rnn_cell_params-units 20
model-bidirectional False
model-num_layers 4
1
model-rnn_cell_params-units 20
model-bidirectional False
model-num_layers 6
2
model-rnn_cell_params-units 50
model-bidirectional T

In [28]:
logs_folders = [
    "general_multi_layer_100",
    "general_multi_layer_100_pending",
    "general_multi_layer_part1",
    "multi_layer_layernorm",
    "run_pending_experiments copy",
    "run_pending_experiments_small",
]

for f in logs_folders:
    path = os.path.join('logs_mac', f)
    results = load_gridsearch_results(path)
    print(len(results))

9
9
288
24
128
128


In [64]:
print(len(grid_search_results))
for exp in grid_search_results:
    for key in exp.keys():
        if key != 'metrics_per_fold':
            print(f"{key}: {exp[key]}")
    #print(f"epochs: {exp["training-epochs"]}")
    folds = exp["metrics_per_fold"]
    for i, fold in enumerate(folds):
        print(f"\tFold {i+1}:")
        for metric, values in fold.items():
            scaled_val = values["scaled"]
            unscaled_val = values["unscaled"]
            print(f"\t\t{metric}: {scaled_val.metric:.6f} ({unscaled_val.metric:.6f})\n\t\t\tper_sample s (u):\n\t\t\t\t {scaled_val.metric_per_sample}\n\t\t\t\t({unscaled_val.metric_per_sample})")
    print()
    print("######################################")

13
model-rnn_cell_params-units: 50
model-bidirectional: True
model-num_layers: 2
model-layer_normalization: True
	Fold 1:
		mean_squared_error: 0.000009 (0.274517)
			per_sample s (u):
				 [5.084199e-06, 2.33666e-06, 5.143929e-06, 2.3445568e-06, 1.6240303e-05, 6.499413e-06, 2.3382152e-06, 6.623767e-06, 2.5519082e-06, 3.3557467e-06, 1.2841824e-05, 1.6391288e-05, 2.5498925e-06, 3.1039685e-06, 6.6556267e-06, 6.59539e-06, 4.112513e-06, 6.591317e-06, 2.2903193e-06, 4.143334e-06, 2.69291e-06, 2.82782e-06, 2.3539944e-06, 3.514689e-06, 3.1425202e-06, 4.946416e-05, 1.751962e-05, 7.733783e-06, 1.901166e-05, 1.6304115e-05, 1.3181969e-05, 2.3497464e-06, 6.600339e-06, 2.3891407e-06, 2.6391567e-06, 2.5805966e-05, 2.60298e-06, 2.473527e-06, 2.3941628e-05, 2.3369962e-06, 4.9723294e-05, 2.3050466e-06, 1.5536803e-05, 6.9855378e-06, 2.3306688e-06]
				([0.17345384, 0.09870128, 0.1710714, 0.09827823, 0.48118752, 0.20052607, 0.10031207, 0.20229882, 0.09200877, 0.09463786, 0.41979042, 0.46990934, 0.1556771

1. 
   - For each experiment, get all samples. 
   - The samples are ordered for each experiment in the same way. 

In [65]:
from sklearn.preprocessing import FunctionTransformer
from types import FunctionType
import re

def get_exp_name(exp):
    name = ""
    for i, key in enumerate(exp.keys()):
        if key != 'metrics_per_fold':
            if i > 0:
                name += " "
            if isinstance(exp[key], str):
                name += exp[key]
            elif isinstance(exp[key], FunctionTransformer):
                return exp[key].func.__name__
            elif isinstance(exp[key], bool):
                name += key if exp[key] else "not "+ key
            elif isinstance(exp[key], FunctionType):
                name += exp[key].__qualname__
            else:
                key_parts = key.split("-")
                last_part = key_parts[-1]
                key_words = last_part.split("_")
                shortcut = "".join([word[0] for word in key_words]) if len(key_words) > 1 else key_words[0]
                name += shortcut + "=" + str(exp[key])

    #name = name.replace("dataset-", "").replace("_", " ").replace("-", " ")
    name = name.capitalize()
    return name

def filter_arch_level_params(exp):
    for key, value in exp.items():
        if (
            key == "model-dilation_base" and value != None
            or key == "model-residual_block_size" and value != None
            or key == "model-smyl_std" and value != False
            or key == "model-smyl_residual" and value != False
        ):
            return True

    return False

exp_samples = {}
for i, exp in enumerate(grid_search_results):
    if filter_arch_level_params(exp):
        continue
    exp_name = get_exp_name(exp)
    exp_samples[exp_name] = []
    folds = exp["metrics_per_fold"]
    for fold in folds:
        exp_samples[exp_name].extend(fold["root_mean_squared_error"]["unscaled"].metric_per_sample)
    #print(len(exp_samples[exp_name]))

#for exp_name, samples in exp_samples.items():
#    print(f"{exp_name}: {len(samples)}")
#    print(f"mean: {sum(samples)/len(samples)}")
#    print(f"std: {pd.Series(samples).std()}")
    


In [66]:
from scipy import stats
import statsmodels.stats.multitest
from critdd import Diagram
from itertools import combinations
import numpy as np
import pandas as pd

samples = list(exp_samples.values())
print(np.array(samples).shape)
friedman_result = stats.friedmanchisquare(*samples)
friedman_result

(13, 450)


FriedmanchisquareResult(statistic=1831.6955311355305, pvalue=0.0)

In [67]:
combs = combinations(samples, 2)

pvals = []
for comb in combs:
    pvals.append(stats.wilcoxon(comb[0], comb[1]).pvalue)

pvals = np.array(pvals)
pvals.sort()
print(pvals)

for i in range(len(pvals)):
    if pvals[i] > 0.05/(len(pvals)-i):
        print(f"{pvals[i]} is not st. significant")
        print(f"{0.05/(len(pvals)-i)} is the threshold")
        break
    else:
        print(f"{pvals[i]} is st. significant")

statsmodels.stats.multitest.multipletests(pvals, method="holm")

[3.81344340e-70 4.46931400e-68 2.20452830e-64 1.62066339e-62
 3.17554155e-62 7.20187400e-60 5.97625468e-56 2.55208292e-55
 3.80042238e-53 7.90433945e-51 3.19896460e-47 2.36992269e-46
 5.19207613e-46 9.08156127e-46 1.26624440e-44 2.74612062e-44
 8.57521091e-43 1.63297902e-40 3.63153277e-40 1.97438534e-39
 5.20371972e-39 9.97785886e-39 1.46534582e-37 1.61623215e-37
 9.42725487e-37 1.92821767e-35 8.05287821e-35 1.23277786e-34
 4.49013889e-34 1.10252400e-33 2.87570954e-33 8.94950653e-33
 1.68651383e-32 2.03487899e-29 6.65641760e-28 5.31674961e-27
 3.97735146e-24 1.31725723e-23 2.27742861e-22 1.47115808e-21
 3.15107067e-21 2.09760780e-20 5.92620086e-20 6.84419764e-20
 2.33223384e-19 3.45761445e-18 4.79965877e-18 3.61410990e-16
 5.09265625e-14 1.13756158e-11 5.03719640e-11 1.55088658e-10
 2.14911613e-10 7.22065156e-10 1.13059306e-07 2.23915860e-07
 2.56432586e-07 4.76450961e-07 6.63173466e-06 7.65021584e-06
 9.11922277e-06 2.16777920e-05 1.14762497e-04 2.09673376e-04
 3.52884840e-04 3.913475

(array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True, False, False, False, False, False,
        False, False, False, False, False, False]),
 array([2.97448585e-68, 3.44137178e-66, 1.67544151e-62, 1.21549754e-60,
        2.34990074e-60, 5.25736802e-58, 4.30290337e-54, 1.81197888e-53,
        2.66029567e-51, 5.45399422e-49, 2.17529593e-45, 1.58784820e-44,
        3.42677025e-44, 5.90301482e-44, 8.10396414e-43, 1.73005599e-42,
        5.31663076e-41, 9.96117205e-39, 2.17891966e-38, 1.16488735e-37,
        3.01815744e-

In [68]:
diag_df = pd.DataFrame(exp_samples)
diag_df.columns

Index(['Units=50 model-bidirectional nl=2 model-layer_normalization',
       'Units=50 model-bidirectional nl=4 model-layer_normalization',
       'Units=50 model-bidirectional nl=6 model-layer_normalization',
       'Units=50 not model-bidirectional nl=2 model-layer_normalization',
       'Units=50 not model-bidirectional nl=4 model-layer_normalization',
       'Units=50 not model-bidirectional nl=6 model-layer_normalization',
       'Units=50 model-bidirectional nl=2 db=none rbs=none not model-smyl_std not model-smyl_residual not model-layer_normalization',
       'Units=50 model-bidirectional nl=4 db=none rbs=none not model-smyl_std not model-smyl_residual not model-layer_normalization',
       'Units=50 model-bidirectional nl=6 db=none rbs=none not model-smyl_std not model-smyl_residual not model-layer_normalization',
       'Units=50 not model-bidirectional nl=2 db=none rbs=none not model-smyl_std not model-smyl_residual not model-layer_normalization',
       'Units=50 not model-b

In [69]:
exp_samples.values()

dict_values([[0.4164764, 0.31416583, 0.41360858, 0.3134937, 0.6936707, 0.4478029, 0.3167213, 0.44977674, 0.30333048, 0.30763444, 0.64791334, 0.6854965, 0.3945604, 0.4379366, 0.45169643, 0.45282394, 0.353314, 0.4625183, 0.30735576, 0.35631585, 0.4065419, 0.29385486, 0.31226298, 0.31895182, 0.43997413, 1.1371069, 0.68252224, 0.58074325, 0.6933969, 0.6887019, 0.64378685, 0.31513909, 0.44831973, 0.31500217, 0.3337528, 0.8361114, 0.32650346, 0.31294551, 0.8108447, 0.3016943, 1.0988537, 0.3167312, 0.6933697, 0.52510816, 0.31127858, 0.48504895, 0.48102885, 0.65500104, 0.81237257, 0.6249906, 0.65965796, 0.9233435, 0.59645504, 0.74249995, 0.91233623, 0.9037091, 0.9261311, 0.7629932, 0.6302397, 0.8276532, 0.47576118, 0.49383017, 0.60456806, 0.90516907, 0.9428388, 0.80958295, 0.6479046, 0.90268475, 0.48815435, 0.9250841, 0.90582716, 0.8116614, 0.87049145, 0.85141397, 0.90524673, 0.48317316, 0.48124287, 0.48274285, 0.55564606, 0.47753915, 0.5121567, 0.49076027, 0.4820469, 0.4838947, 0.8974338, 0.4

In [70]:
x=np.array(list(exp_samples.values()))
treatment_names = list(exp_samples.keys())
print(treatment_names)
x = x.T
diagram = Diagram(x,
    treatment_names=treatment_names,
    maximize_outcome = False,
)

diagram.to_file(
    "critdd_layernorm_multilayer_50added.tex",
    alpha=0.05,
    adjustment="holm",
    reverse_x=True,
)

['Units=50 model-bidirectional nl=2 model-layer_normalization', 'Units=50 model-bidirectional nl=4 model-layer_normalization', 'Units=50 model-bidirectional nl=6 model-layer_normalization', 'Units=50 not model-bidirectional nl=2 model-layer_normalization', 'Units=50 not model-bidirectional nl=4 model-layer_normalization', 'Units=50 not model-bidirectional nl=6 model-layer_normalization', 'Units=50 model-bidirectional nl=2 db=none rbs=none not model-smyl_std not model-smyl_residual not model-layer_normalization', 'Units=50 model-bidirectional nl=4 db=none rbs=none not model-smyl_std not model-smyl_residual not model-layer_normalization', 'Units=50 model-bidirectional nl=6 db=none rbs=none not model-smyl_std not model-smyl_residual not model-layer_normalization', 'Units=50 not model-bidirectional nl=2 db=none rbs=none not model-smyl_std not model-smyl_residual not model-layer_normalization', 'Units=50 not model-bidirectional nl=4 db=none rbs=none not model-smyl_std not model-smyl_residua

In [29]:
def get_models_without_dropout(diagram: Diagram):
    """Gets the best optimizer for each type (Adam, RMSprop, SGD, ...)"""
    sorted_treatments = sorted(zip(diagram.treatment_names, diagram.average_ranks), key=lambda x: x[1])
    no_dropout_models = {}
    for name, avg_rank in sorted_treatments:
        if name.endswith("dropout=0.0"):
            #name = re.sub(" dropout=0.0", "", name)
            no_dropout_models[name] = (name, avg_rank)
    return no_dropout_models

def get_partial_diagram(subset_names, samples):
    x = np.array([samples[name] for name in subset_names])
    x = x.T
    return Diagram(x,
        treatment_names=subset_names,
        maximize_outcome = False,
    )

no_dropout_models = get_models_without_dropout(diagram)
partial_diagram = get_partial_diagram([name for name, _ in no_dropout_models.values()], exp_samples)

print("Previous average ranks:")
for name, avg_r in no_dropout_models.values():
    print(f"{name}: {avg_r}")

print("\nNew average ranks:")
sorted_treatments = sorted(zip(partial_diagram.treatment_names, partial_diagram.average_ranks), key=lambda x: x[1])
for name, avg_r in sorted_treatments:
    print(f"{name}: {avg_r}")

partial_diagram.to_file(
    "critdd_layernorm_no_drop.tex",
    alpha=0.05,
    adjustment="holm",
    reverse_x=True,
)

Previous average ranks:
Not model-layer_normalization rd=0.0 dropout=0.0: 4.302222222222222
Model-layer_normalization rd=0.2 dropout=0.0: 4.377777777777778
Model-layer_normalization rd=0.1 dropout=0.0: 4.988888888888889
Model-layer_normalization rd=0.0 dropout=0.0: 5.015555555555555
Not model-layer_normalization rd=0.2 dropout=0.0: 5.3933333333333335
Model-layer_normalization rd=0.05 dropout=0.0: 5.595555555555555
Model-layer_normalization rd=0.3 dropout=0.0: 6.0311111111111115
Not model-layer_normalization rd=0.3 dropout=0.0: 6.153333333333333
Not model-layer_normalization rd=0.1 dropout=0.0: 6.388888888888889
Not model-layer_normalization rd=0.05 dropout=0.0: 6.753333333333333

New average ranks:
Not model-layer_normalization rd=0.0 dropout=0.0: 4.302222222222222
Model-layer_normalization rd=0.2 dropout=0.0: 4.377777777777778
Model-layer_normalization rd=0.1 dropout=0.0: 4.988888888888889
Model-layer_normalization rd=0.0 dropout=0.0: 5.015555555555555
Not model-layer_normalization rd