In [2]:
from experiments_utils import get_experiments_samples, load_logs_pickle
from critdd import Diagram
import numpy as np

# Load test results
l1_path = r"logs/multi_layer_dynamic_1l_300"
l10_dyn_payh = r"logs/multi_layer_dynamic_10l"
static_path  = r"logs/multi_layer_static"

l1_logs = load_logs_pickle(l1_path, pickle_filename="testing_results.pkl")
l10_dyn_logs = load_logs_pickle(l10_dyn_payh, pickle_filename="testing_results.pkl")
static_logs = load_logs_pickle(static_path, pickle_filename="testing_results.pkl")
static_logs_1l = load_logs_pickle(static_path, pickle_filename="testing_results_1l.pkl")




In [3]:
for log in static_logs_1l:
    print(log["label"])

l1 lrFixed


In [4]:
l1_logs[0]["test_results"][0].metrics_values

{'mean_squared_error': {'scaled': <optimization.data_classes.MetricResults at 0x1ca25587ad0>,
  'unscaled': <optimization.data_classes.MetricResults at 0x1ca2574eff0>},
 'root_mean_squared_error': {'scaled': <optimization.data_classes.MetricResults at 0x1ca2574fb00>,
  'unscaled': <optimization.data_classes.MetricResults at 0x1ca25794650>},
 'mean_absolute_error': {'scaled': <optimization.data_classes.MetricResults at 0x1ca25795160>,
  'unscaled': <optimization.data_classes.MetricResults at 0x1ca25795c70>}}

In [75]:
def get_experiment_metrics_for_model_n(test_results, model_n: int = 0, metric_summarization="metric_per_sample"):
    unscaled_metrics = test_results[model_n].metric_values["root_mean_squared_error"]["unscaled"]
    metric_values = getattr(unscaled_metrics, metric_summarization)
    return metric_values

# need only static experiments from l1_logs and l10_dyn_logs (other were trained with same parameters as dynamic/static experiments and therefore are suboptimal -> were not explained in text)
def get_experiment_samples(logs, fold_k, dynamic_lr_ok=True, static_lr_ok=True, scaled=False):
    exp_samples = []
    exp_labels = []
    for log in logs:
        has_dynamic_lr = "training-additional_callbacks" in log["training_results"] and log["training_results"]["training-additional_callbacks"]
        if (has_dynamic_lr and not dynamic_lr_ok) or (not has_dynamic_lr and not static_lr_ok):
            continue
        exp_labels.append(log["label"])
        exp_samples.append(log["test_results"][fold_k].metrics_values["root_mean_squared_error"]["scaled" if scaled else "unscaled"].metric_per_sample)

    return exp_samples, exp_labels

model_k = 0 # test models trained on k-th fold of the training data
number_of_models = 1 # number of models to average over, if 1 then use only model trained on first fold
scaled = False

l1_vars = {"logs": l1_logs, "fold_k": model_k, "dynamic_lr_ok": True, "static_lr_ok": False, "scaled": scaled}
l10_dyn_vars = {"logs": l10_dyn_logs, "fold_k": model_k, "dynamic_lr_ok": True, "static_lr_ok": False, "scaled": scaled}
static_vars = {"logs": static_logs, "fold_k": model_k, "dynamic_lr_ok": False, "static_lr_ok": True, "scaled": scaled}
static_1l_vars = {"logs": static_logs_1l, "fold_k": model_k, "dynamic_lr_ok": False, "static_lr_ok": True, "scaled": scaled}
vars_for_exps = [l1_vars, l10_dyn_vars, static_vars, static_1l_vars]

all_exp_samples, all_exp_labels = [], []
for vars in vars_for_exps:
    for model_k in range(number_of_models):
        vars["fold_k"] = model_k
        s, l = get_experiment_samples(**vars)
        if model_k == 0:
            model_samples = s
        else:
            model_samples = np.concatenate((model_samples, s), axis=1)
    all_exp_samples.extend(model_samples)
    all_exp_labels.extend(l)

In [67]:
#all_exp_samples[0].shape

In [76]:
print(len(all_exp_samples))
print(all_exp_labels)

10
['l1 lrDynamic', 'l10 lrDynamic', 'l6 lrDynamic', 'l4 lrDynamic', 'l2 lrDynamic', 'l2 lrFixed', 'l4 lrFixed', 'l6 lrFixed', 'l10 lrFixed', 'l1 lrFixed']


In [77]:
mean_samples = []
for label, samples in zip(all_exp_labels, all_exp_samples):
    mean_samples.append((label, np.mean(samples), np.median(samples), np.std(samples)))
mean_samples.sort(key=lambda x: x[1])
print("Is scaled: ", scaled, "... We use standardisation (sklearn StandardScaler) for features scaling")
for label, mean, median, std_dev in mean_samples:
    print(f"{label}: mean RMSE: {mean:.5f}, median RMSE: {median:.5f}, std: {std_dev:.5f}")

Is scaled:  False ... We use standardisation (sklearn StandardScaler) for features scaling
l10 lrDynamic: mean RMSE: 0.35402, median RMSE: 0.36042, std: 0.07552
l6 lrFixed: mean RMSE: 0.36204, median RMSE: 0.37019, std: 0.06047
l10 lrFixed: mean RMSE: 0.36491, median RMSE: 0.38415, std: 0.09361
l6 lrDynamic: mean RMSE: 0.38862, median RMSE: 0.39463, std: 0.07743
l4 lrDynamic: mean RMSE: 0.43002, median RMSE: 0.40741, std: 0.08443
l1 lrFixed: mean RMSE: 0.44926, median RMSE: 0.47081, std: 0.10754
l4 lrFixed: mean RMSE: 0.45630, median RMSE: 0.43610, std: 0.07885
l2 lrFixed: mean RMSE: 0.48277, median RMSE: 0.43424, std: 0.10027
l2 lrDynamic: mean RMSE: 0.48691, median RMSE: 0.50039, std: 0.05285
l1 lrDynamic: mean RMSE: 0.53380, median RMSE: 0.54210, std: 0.12083


In [78]:
x=np.array(all_exp_samples)
treatment_names = all_exp_labels
print(treatment_names)
x = x.T
diagram = Diagram(x,
    treatment_names=treatment_names,
    maximize_outcome = False,
)

diagram.to_file(
    "critdd_test_results.tex",
    alpha=0.05,
    adjustment="holm",
    reverse_x=True,
)

['l1 lrDynamic', 'l10 lrDynamic', 'l6 lrDynamic', 'l4 lrDynamic', 'l2 lrDynamic', 'l2 lrFixed', 'l4 lrFixed', 'l6 lrFixed', 'l10 lrFixed', 'l1 lrFixed']


In [60]:
samples_per_group = 5

all_exp_samples_grouped = []
for sample in all_exp_samples:
    groups = [sample[i:i+samples_per_group] for i in range(0, len(sample), samples_per_group)]
    means = [np.mean(group) for group in groups]
    all_exp_samples_grouped.append(means)

x=np.array(all_exp_samples_grouped)
treatment_names = all_exp_labels
print(treatment_names)
x = x.T
diagram = Diagram(x,
    treatment_names=treatment_names,
    maximize_outcome = False,
)

diagram.to_file(
    "critdd_test_results_grouped.tex",
    alpha=0.05,
    adjustment="holm",
    reverse_x=True,
)

['l1 lrDynamic', 'l10 lrDynamic', 'l6 lrDynamic', 'l4 lrDynamic', 'l2 lrDynamic', 'l2 lrFixed', 'l4 lrFixed', 'l6 lrFixed', 'l10 lrFixed', 'l1 lrFixed']
