# Notebook for Multiple Experiment Results

### Libraries import

In [1]:
import os
import sys
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import auc

sys.path.append("../../")

2022-10-22 15:53:28.312508: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
from utils.metrics import tpr_fpr_curve, all_metrics_curve
from utils.common import format_index, repeat_vector_to_size
from utils.metrics import homocedasticity_level, shapeness_level
from utils.metrics import accuracy, precision, recall, specificity, f1_score

### Experiments selection

In [4]:
major_exps_ids = [format_index(i) for i in [12, 10, 8, 13, 11, 9]]
minor_exps_ids = [format_index(i) for i in range(1, 22)]
# major_exps_ids = [format_index(i) for i in [12, 14]]
# minor_4_major_ids = {"0012": ["0004", "0013"], "0014":["0002","0010"]}
save_path_2_excels = "../../results/Results_Reports/"

exps = {}
seed = 8128
root_path = "../../results/Ganomaly_3D/"

for i in major_exps_ids:
    for exp_id in sorted(os.listdir(root_path)):
        if i in exp_id:
            sub_path = os.path.join(root_path, exp_id)
            for j in minor_exps_ids:#minor_4_major_ids[i]:
                for subexp_id in sorted(os.listdir(sub_path)):
                    if j in subexp_id:
                        final_id = "G3D_{}_{}".format(i, int(j))
                        exp_path = os.path.join(sub_path, subexp_id)
                        exps[final_id] = {
                            "path": exp_path
                        }

exps

{'G3D_0012_1': {'path': '../../results/Ganomaly_3D/0012_train_healthy/0001_Ganomaly3D-64x64x64x1'},
 'G3D_0012_2': {'path': '../../results/Ganomaly_3D/0012_train_healthy/0002_Ganomaly3D-64x64x64x1'},
 'G3D_0012_3': {'path': '../../results/Ganomaly_3D/0012_train_healthy/0003_Ganomaly3D-64x64x64x1'},
 'G3D_0012_4': {'path': '../../results/Ganomaly_3D/0012_train_healthy/0004_Ganomaly3D-64x64x64x1'},
 'G3D_0012_5': {'path': '../../results/Ganomaly_3D/0012_train_healthy/0005_Ganomaly3D-64x64x64x1'},
 'G3D_0012_6': {'path': '../../results/Ganomaly_3D/0012_train_healthy/0006_Ganomaly3D-64x64x64x1'},
 'G3D_0012_7': {'path': '../../results/Ganomaly_3D/0012_train_healthy/0007_Ganomaly3D-64x64x64x1'},
 'G3D_0012_8': {'path': '../../results/Ganomaly_3D/0012_train_healthy/0008_Ganomaly3D-64x64x64x1'},
 'G3D_0012_9': {'path': '../../results/Ganomaly_3D/0012_train_healthy/0009_Ganomaly3D-64x64x64x1'},
 'G3D_0012_10': {'path': '../../results/Ganomaly_3D/0012_train_healthy/0010_Ganomaly3D-64x64x64x1'},

### Errors loading

In [6]:
for exp_id in exps:
    errors_path = os.path.join(exps[exp_id]["path"], "outputs/errors")
    vectors_path = os.path.join(exps[exp_id]["path"], "outputs/latent_vectors/input_generator")

    # Initializing dict for losses of different elements in network
    for t in ["encoder", "contextual", "adversarial"]:
        data = "{}_losses".format(t)
        exps[exp_id][data] = {}
        for c in ["normal", "abnormal"]:
            exps[exp_id][data][c] = {}
            for m in ["train", "val", "test"]:
                exps[exp_id][data][c][m] = {}

    for t in ["encoder", "contextual", "adversarial"]:
        for m in ["train", "val", "test"]:
            if m == "train":
                if os.path.isfile(os.path.join(errors_path, t, m, "normal.npy")):
                    classes = ["normal"] 
                else:
                    classes = ["abnormal"]
            else:
                classes = ["normal", "abnormal"]

            for c in classes:
                error_file = os.path.join(errors_path, t, m, c + ".npy")
                if os.path.isfile(error_file):
                    data = "{}_losses".format(t)
                    errors = np.load(error_file)
                    patients_ids_positions = [
                        int(i.split("_")[1].split("-")[1]) for i in sorted(
                            os.listdir(os.path.join(vectors_path, m, c))
                        )
                    ]
                    assert len(errors) == len(patients_ids_positions)
                    for p_id in np.unique(patients_ids_positions):
                        exps[exp_id][data][c][m][p_id] = []

                    for i, p_id in enumerate(patients_ids_positions):
                        exps[exp_id][data][c][m][p_id].append(errors[i])

### Quantitative metrics

In [12]:
data_table = []
lambda_value = 5
data_columns = ["Exp ID", "Major ID", "Minor ID", "Group", "Partition", "AUC", "Threshold", "Acc", "Pre", "Rec", "Spe", "F1", "Homo", "Class"]

for t in ["encoder", "contextual", "adversarial"]:
    for exp_id in exps:
        for part in ["val", "test"]:
            data = "{}_losses".format(t)
            if len(exps[exp_id][data]["abnormal"]["train"]) != 0:
                errors = ["abnormal", "normal"]
            else:
                errors = ["normal", "abnormal"]
            y_true = []
            y_pred = []
            for ci, c in enumerate(errors):
                samples = exps[exp_id][data][c][part]
                y_pred += [
                    np.max(samples[i]) for i in samples
#                     np.mean(samples[i]) + lambda_value*np.std(samples[i]) for i in samples
                ]
                y_true += [ci]*len(samples)
            y_true = np.r_[y_true]
            y_pred = np.r_[y_pred]
            tpr, fpr, _ = tpr_fpr_curve(y_true, y_pred)

            if part == "val":
                accs, pres, recs, spes, f1s, thresholds = all_metrics_curve(y_true, y_pred)
                table_metrics = np.concatenate([
                    accs.reshape([-1,1]), 
    #                 pres.reshape([-1,1]), 
                    recs.reshape([-1,1]), 
    #                 spes.reshape([-1,1]), 
    #                 f1s.reshape([-1,1])
                ], axis=1)
                threshold = thresholds[np.argmax(np.mean(table_metrics, axis=1))]
                # deltas = np.abs(tpr - fpr)
                # threshold = thresholds[np.argmin(deltas[deltas != 0])]

            #threshold = 1.174
            y_pred = (y_pred > threshold).astype(np.int64)

            TP = tf.keras.metrics.TruePositives()
            TN = tf.keras.metrics.TrueNegatives()
            FP = tf.keras.metrics.FalsePositives()
            FN = tf.keras.metrics.FalseNegatives()

            TP.update_state(y_true, y_pred)
            TN.update_state(y_true, y_pred)
            FP.update_state(y_true, y_pred)
            FN.update_state(y_true, y_pred)

            classes_data = []
            for c in ["normal", "abnormal"]:
                parts = []
                for g in ["train", "val", "test"]:
                    samples = exps[exp_id][data][c][g]
                    for p_id in samples:
                        parts.append(np.r_[samples[p_id]])
                classes_data.append(parts)


            data_table.append([
                exp_id,
                exp_id.split("_")[1],
                exp_id.split("_")[2],
                t,
                part,
                round(auc(fpr, tpr), 3),
                round(threshold, 3),
                round(accuracy(TP.result().numpy(), TN.result().numpy(), FP.result().numpy(), FN.result().numpy()), 3),
                round(precision(TP.result().numpy(), FP.result().numpy()), 3),
                round(recall(TP.result().numpy(), FN.result().numpy()), 3),
                round(specificity(TN.result().numpy(), FP.result().numpy()), 3),
                round(f1_score(TP.result().numpy(), FP.result().numpy(), FN.result().numpy()), 3),
                round(homocedasticity_level(*classes_data), 3),
                round(shapeness_level(*classes_data, seed=seed), 3)
            ])
df = pd.DataFrame(data_table, columns=data_columns)

for exp_id_1, exp_id_2, new_id in [
    ("G3D_0012_1", "G3D_0010_1", "G3D_Control_1"),
    ("G3D_0012_11", "G3D_0010_2", "G3D_Control_1000"),
    ("G3D_0012_21", "G3D_0010_3", "G3D_Control_2000"),
    ("G3D_0008_1", "G3D_0010_6", "G3D_Control_5000"),
    ("G3D_0008_2", "G3D_0010_11", "G3D_Control_10000"),
    ("G3D_0008_3", "G3D_0010_16", "G3D_Control_15000"),
    ("G3D_0008_4", "G3D_0010_21", "G3D_Control_20000"),
    ("G3D_0013_1", "G3D_0011_1", "G3D_Park_1"),
    ("G3D_0013_11", "G3D_0011_2", "G3D_Park_1000"),
    ("G3D_0013_21", "G3D_0011_3", "G3D_Park_2000"),
    ("G3D_0009_1", "G3D_0011_6", "G3D_Park_5000"),
    ("G3D_0009_2", "G3D_0011_11", "G3D_Park_10000"),
    ("G3D_0009_3", "G3D_0011_16", "G3D_Park_15000"),
    ("G3D_0009_4", "G3D_0011_21", "G3D_Park_20000"),
]:
    column_ids = np.r_[df.loc[df["Exp ID"] == exp_id_1].shape[0] * [new_id]].reshape([-1, 1])
    columns_fields = df.loc[df["Exp ID"] == exp_id_1].values[:, 1:5]
    data1 = df.loc[df["Exp ID"] == exp_id_1].values[:, 5:]
    data2 = df.loc[df["Exp ID"] == exp_id_2].values[:, 5:]
    average = (data1 + data2) / 2
    concat_data = pd.DataFrame(np.concatenate([column_ids, columns_fields, average], axis=1), columns = df.columns)
    df = pd.concat([df, concat_data], axis=0)

# df.to_excel(os.path.join(save_path_2_excels, "quantitative_metrics_lambda-{}.xlsx".format(lambda_value)), index=False)
df.to_excel(os.path.join(save_path_2_excels, "quantitative_metrics_max.xlsx"), index=False)
df

Unnamed: 0,Exp ID,Major ID,Minor ID,Group,Partition,AUC,Threshold,Acc,Pre,Rec,Spe,F1,Homo,Class
0,G3D_0012_1,0012,1,encoder,val,0.667,5.963,0.833,1.0,0.667,1.0,0.8,0.538,0.417
1,G3D_0012_1,0012,1,encoder,test,-0.0,5.963,0.75,0.667,1.0,0.5,0.8,0.538,0.417
2,G3D_0012_2,0012,2,encoder,val,0.389,0.764,0.333,0.4,0.667,0.0,0.5,0.533,0.588
3,G3D_0012_2,0012,2,encoder,test,-0.0,0.764,0.75,0.667,1.0,0.5,0.8,0.533,0.588
4,G3D_0012_3,0012,3,encoder,val,0.222,0.118,0.667,0.6,1.0,0.333,0.75,0.55,0.636
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,G3D_Park_20000,0009,4,encoder,test,0.125,0.151,0.5,0.5,1.0,0.0,0.667,0.582,0.591
2,G3D_Park_20000,0009,4,contextual,val,0.333,0.133,0.833,0.75,1.0,0.667,0.857,0.545,0.6185
3,G3D_Park_20000,0009,4,contextual,test,0.125,0.133,0.5,0.5,1.0,0.0,0.667,0.545,0.6185
4,G3D_Park_20000,0009,4,adversarial,val,0.222,0.3815,0.5,0.5,0.667,0.333,0.571,0.5295,0.591


### Classing Qualitative Metrics

In [None]:
data_table = []
data_columns = [
    "Exp ID", "Element", "Group", "vs Group", "Homo", "Class", 
    "Chi2 N -> A", "Delta Chi2 N -> A", "Chi2 A -> N", "Delta Chi2 A -> N"
]
for t in ["encoder", "contextual", "adversarial"]:
    for g1 in ["train", "val", "test"]:
        for g2 in ["val", "test"]:
            for exp_id in exps:
                if "train_{}_normal".format(t) in exps[exp_id].keys():
                    data1 = exps[exp_id]["{}_{}_normal".format(g1, t)]
                    data2 = exps[exp_id]["{}_{}_abnormal".format(g2, t)]
                else:
                    data2 = exps[exp_id]["{}_{}_abnormal".format(g1, t)]
                    data1 = exps[exp_id]["{}_{}_normal".format(g2, t)]
                if data1.shape[0] > data2.shape[0]:
                    sub_data1 = np.r_[sorted(data1)]
                    sub_data2 = np.r_[sorted(repeat_vector_to_size(data2, data1.shape[0], seed))]
                elif data1.shape[0] < data2.shape[0]:
                    sub_data1 = np.r_[sorted(repeat_vector_to_size(data1, data2.shape[0], seed))]
                    sub_data2 = np.r_[sorted(data2)]
                else:
                    sub_data1 = np.r_[sorted(data1)]
                    sub_data2 = np.r_[sorted(data2)]
                chi_test_1 = chiSquare_test(sub_data1, sub_data2)
                chi_test_2 = chiSquare_test(sub_data2, sub_data1)
                data1 = np.r_[sorted(data1)]
                data2 = np.r_[sorted(data2)]
                homo_level = [abs(1 - int(brownForsythe_test(data1, data2))), abs(1 - int(levene_test(data1, data2)))]
                class_level = [abs(1 - int(chi_test_1[0])), abs(1 - int(chi_test_2[0]))]
                data_table.append([
                    exp_id,
                    t,
                    g1,
                    g2, 
                    np.mean(homo_level),
                    np.mean(class_level),
                    int(chi_test_1[0]), 
                    round(chi_test_1[1], 5),
                    int(chi_test_2[0]), 
                    round(chi_test_2[1], 5),
                ])
df = pd.DataFrame(data_table, columns=data_columns)

for exp_id_1, exp_id_2, new_id in [
    ("G3D_0012_1", "G3D_0010_1", "G3D_Control_1"),
    ("G3D_0012_11", "G3D_0010_2", "G3D_Control_1000"),
    ("G3D_0012_21", "G3D_0010_3", "G3D_Control_2000"),
    ("G3D_0008_1", "G3D_0010_6", "G3D_Control_5000"),
    ("G3D_0008_2", "G3D_0010_11", "G3D_Control_10000"),
    ("G3D_0008_3", "G3D_0010_16", "G3D_Control_15000"),
    ("G3D_0008_4", "G3D_0010_21", "G3D_Control_20000"),
    ("G3D_0013_1", "G3D_0011_1", "G3D_Park_1"),
    ("G3D_0013_11", "G3D_0011_2", "G3D_Park_1000"),
    ("G3D_0013_21", "G3D_0011_3", "G3D_Park_2000"),
    ("G3D_0009_1", "G3D_0011_6", "G3D_Park_5000"),
    ("G3D_0009_2", "G3D_0011_11", "G3D_Park_10000"),
    ("G3D_0009_3", "G3D_0011_16", "G3D_Park_15000"),
    ("G3D_0009_4", "G3D_0011_21", "G3D_Park_20000"),
]:
    column_ids = np.r_[df.loc[df["Exp ID"] == exp_id_1].shape[0] * [new_id]].reshape([-1, 1])
    columns_fields = df.loc[df["Exp ID"] == exp_id_1].values[:, 1:4]
    data1 = df.loc[df["Exp ID"] == exp_id_1].values[:, 4:]
    data2 = df.loc[df["Exp ID"] == exp_id_2].values[:, 4:]
    average = (data1 + data2) / 2
    concat_data = pd.DataFrame(np.concatenate([column_ids, columns_fields, average], axis=1), columns = df.columns)
    df = pd.concat([df, concat_data], axis=0)

df.to_excel(os.path.join(save_path_2_excels, "classing_metrics.xlsx"), index=False)
df

### Grouping Qualitative Metrics

In [None]:
data_table = []
data_columns = [
    "Exp ID", "Element", "G1", "G2", "Homo", "Class", 
    "Chi2 G1 -> G2", "Delta Chi2 G1 -> G2", "Chi2 G2 -> G1", "Delta Chi2 G2 -> G1"
]

for t in ["encoder", "contextual", "adversarial"]:
    for g1, g2 in [
        ("train", "val"), ("train", "test"), ("val", "test"),
    ]:
        for exp_id in exps:
            if "train_{}_normal".format(t) in exps[exp_id].keys():
                c = "normal"
            else:
                c = "abnormal"
            data1 = exps[exp_id]["{}_{}_{}".format(g1, t, c)]
            data2 = exps[exp_id]["{}_{}_{}".format(g2, t, c)]
            if data1.shape[0] > data2.shape[0]:
                sub_data1 = np.r_[sorted(data1)]
                sub_data2 = np.r_[sorted(repeat_vector_to_size(data2, data1.shape[0], seed))]
            elif data1.shape[0] < data2.shape[0]:
                sub_data1 = np.r_[sorted(repeat_vector_to_size(data1, data2.shape[0], seed))]
                sub_data2 = np.r_[sorted(data2)]
            else:
                sub_data1 = np.r_[sorted(data1)]
                sub_data2 = np.r_[sorted(data2)]
            chi_test_1 = chiSquare_test(sub_data1, sub_data2)
            chi_test_2 = chiSquare_test(sub_data2, sub_data1)
            data1 = np.r_[sorted(data1)]
            data2 = np.r_[sorted(data2)]
            homo_level = [int(brownForsythe_test(data1, data2)), int(levene_test(data1, data2))]
            class_level = [int(chi_test_1[0]), int(chi_test_2[0])]
            data_table.append([
                exp_id,
                t,
                g1,
                g2,
                np.mean(homo_level),
                np.mean(class_level),
                int(chi_test_1[0]), 
                round(chi_test_1[1], 5),
                int(chi_test_2[0]), 
                round(chi_test_2[1], 5),
            ])
df = pd.DataFrame(data_table, columns=data_columns)

for exp_id_1, exp_id_2, new_id in [
    ("G3D_0012_1", "G3D_0010_1", "G3D_Control_1"),
    ("G3D_0012_11", "G3D_0010_2", "G3D_Control_1000"),
    ("G3D_0012_21", "G3D_0010_3", "G3D_Control_2000"),
    ("G3D_0008_1", "G3D_0010_6", "G3D_Control_5000"),
    ("G3D_0008_2", "G3D_0010_11", "G3D_Control_10000"),
    ("G3D_0008_3", "G3D_0010_16", "G3D_Control_15000"),
    ("G3D_0008_4", "G3D_0010_21", "G3D_Control_20000"),
    ("G3D_0013_1", "G3D_0011_1", "G3D_Park_1"),
    ("G3D_0013_11", "G3D_0011_2", "G3D_Park_1000"),
    ("G3D_0013_21", "G3D_0011_3", "G3D_Park_2000"),
    ("G3D_0009_1", "G3D_0011_6", "G3D_Park_5000"),
    ("G3D_0009_2", "G3D_0011_11", "G3D_Park_10000"),
    ("G3D_0009_3", "G3D_0011_16", "G3D_Park_15000"),
    ("G3D_0009_4", "G3D_0011_21", "G3D_Park_20000"),
]:
    column_ids = np.r_[df.loc[df["Exp ID"] == exp_id_1].shape[0] * [new_id]].reshape([-1, 1])
    columns_fields = df.loc[df["Exp ID"] == exp_id_1].values[:, 1:4]
    data1 = df.loc[df["Exp ID"] == exp_id_1].values[:, 4:]
    data2 = df.loc[df["Exp ID"] == exp_id_2].values[:, 4:]
    average = (data1 + data2) / 2
    concat_data = pd.DataFrame(np.concatenate([column_ids, columns_fields, average], axis=1), columns = df.columns)
    df = pd.concat([df, concat_data], axis=0)

df.to_excel(os.path.join(save_path_2_excels, "grouping_metrics.xlsx"), index=False)
df