# Notebook for Multiple Experiment Results

### Libraries import

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import auc

sys.path.append("../../")

In [None]:
from utils.common import format_index, repeat_vector_to_size
from utils.metrics import precision_recall_curve, tpr_fpr_curve
from utils.metrics import chiSquare_test, brownForsythe_test, levene_test
from utils.metrics import accuracy, precision, recall, specificity, f1_score

### Experiments selection

In [None]:
major_exps_ids = [format_index(i) for i in [12, 10, 8, 13, 11, 9]]
minor_exps_ids = [format_index(i) for i in range(1, 22)]
save_path_2_excels = "../../results/Results_Reports/"

exps = {}
seed = 8128
root_path = "../../results/Ganomaly_3D/"

for i in major_exps_ids:
    for exp_id in sorted(os.listdir(root_path)):
        if i in exp_id:
            sub_path = os.path.join(root_path, exp_id)
            for j in minor_exps_ids:
                for subexp_id in sorted(os.listdir(sub_path)):
                    if j in subexp_id:
                        final_id = "G3D_{}_{}".format(i, int(j))
                        exp_path = os.path.join(sub_path, subexp_id)
                        exps[final_id] = {
                            "path": exp_path
                        }

exps

### Errors loading

In [None]:
for exp_id in exps:
    base_path = os.path.join(exps[exp_id]["path"], "outputs/errors/")
    for t in ["encoder", "contextual", "adversarial"]:
        for c in ["normal", "abnormal"]:
            exps[exp_id]["all_{}_{}".format(t, c)] = np.r_[[]]

    for t in ["encoder", "contextual", "adversarial"]:
        for m in ["train", "val", "test"]:
            if m == "train":
                if os.path.isfile(os.path.join(base_path, t, m, "normal.npy")):
                    classes = ["normal"] 
                else:
                    classes = ["abnormal"]
            else:
                classes = ["normal", "abnormal"]

            for c in classes:
                all_data = "all_{}_{}".format(t, c)
                errors = np.load(os.path.join(base_path, t, m, c + ".npy"))
                exps[exp_id]["{}_{}_{}".format(m, t, c)] = errors
                if m != "train":
                    exps[exp_id][all_data] = np.concatenate([exps[exp_id][all_data], errors])

### Errors by patients loading

In [None]:
for exp_id in exps:
    base_path = os.path.join(exps[exp_id]["path"], "outputs/latent_vectors/input_generator")
    for t in ["encoder", "contextual", "adversarial"]:
        for c in ["normal", "abnormal"]:
            exps[exp_id]["all_{}_{}_patients".format(t, c)] = {}

    for t in ["encoder", "contextual", "adversarial"]:
        for m in ["train", "val", "test"]:
            if m == "train":
                if "train_encoder_normal" in exps[exp_id].keys():
                    classes = ["normal"] 
                else:
                    classes = ["abnormal"]
            else:
                classes = ["normal", "abnormal"]
            for c in classes:
                patients_ids_positions = [
                    int(i.split("_")[1].split("-")[1].split(".")[0]) for i in sorted(
                        os.listdir(os.path.join(base_path, m, c))
                    )
                ]
                all_data = "all_{}_{}_patients".format(t, c)
                data = "{}_{}_{}".format(m, t, c)
                key = "{}_{}".format(data, "patients")
                exps[exp_id][key] = {}

                for p_id in np.unique(patients_ids_positions):
                    exps[exp_id][key][p_id] = []
                    exps[exp_id][all_data][p_id] = []

                for i, p_id in enumerate(patients_ids_positions):
                    exps[exp_id][key][p_id].append(exps[exp_id][data][i])
                    if m != "train":
                        exps[exp_id][all_data][p_id].append(exps[exp_id][data][i])

### Quantitative metrics

In [None]:
data_table = []
data_columns = ["Exp ID", "Group", "Partition", "AUC", "Threshold", "Acc", "Pre", "Rec", "Spe", "F1", "Homo", "Class"]

errors = ["normal", "abnormal"]

for t in ["encoder", "contextual", "adversarial"]:
    for exp_id in exps:
        for part in ["val", "test", "all"]:
            data = "{}_{}_".format(part, t)
            y_true = np.concatenate([[i]*exps[exp_id][data + j].shape[0] for i,j in enumerate(errors)]) 
            y_pred = np.concatenate([exps[exp_id][data+i] for i in errors])
            tpr, fpr, _ = tpr_fpr_curve(y_true, y_pred)

            if part == "val":
                precisions, recalls, thresholds = precision_recall_curve(y_true, y_pred)
                deltas_pre_4_rec = np.abs(precisions - recalls)
                threshold = thresholds[np.argmin(deltas_pre_4_rec[deltas_pre_4_rec != 0])]

            if "train_{}_normal".format(t) in exps[exp_id].keys():
                y_pred = (y_pred > threshold).astype(np.int64)
                trained_class = "normal"
            else:
                y_pred = (y_pred < threshold).astype(np.int64)
                trained_class = "abnormal"

            TP = tf.keras.metrics.TruePositives()
            TN = tf.keras.metrics.TrueNegatives()
            FP = tf.keras.metrics.FalsePositives()
            FN = tf.keras.metrics.FalseNegatives()

            TP.update_state(y_true, y_pred)
            TN.update_state(y_true, y_pred)
            FP.update_state(y_true, y_pred)
            FN.update_state(y_true, y_pred)
            
            # Homocedasticity metric
            homo_level = []
            class_level = []
            for c in ["normal", "abnormal"]:
                if c == trained_class:
                    groups = [
                        ("train", "val"), ("train", "test"), ("val", "test")
                    ]
                    prefix = 0
                else:
                    groups = [
                        ("train", "val"), ("train", "test"), 
                        ("val", "val"), ("val", "test"), 
                        ("test", "val"), ("test", "test"), 
                    ]
                    prefix = 1
                for g1, g2 in groups:
                    data1 = exps[exp_id]["{}_{}_{}".format(g1, t, trained_class)]
                    data2 = exps[exp_id]["{}_{}_{}".format(g2, t, c)]
                    if data1.shape[0] > data2.shape[0]:
                        sub_data1 = np.r_[sorted(data1)]
                        sub_data2 = np.r_[sorted(repeat_vector_to_size(data2, data1.shape[0], seed))]
                    elif data1.shape[0] < data2.shape[0]:
                        sub_data1 = np.r_[sorted(repeat_vector_to_size(data1, data2.shape[0], seed))]
                        sub_data2 = np.r_[sorted(data2)]
                    else:
                        sub_data1 = np.r_[sorted(data1)]
                        sub_data2 = np.r_[sorted(data2)]
                    chi_test_1 = chiSquare_test(sub_data1, sub_data2)
                    chi_test_2 = chiSquare_test(sub_data2, sub_data1)

                    data1 = np.r_[sorted(data1)]
                    data2 = np.r_[sorted(data2)]
                    homo_level += [
                        abs(prefix - int(brownForsythe_test(data1, data2))), 
                        abs(prefix - int(levene_test(data1, data2)))
                    ]
                    class_level += [
                        abs(prefix - int(chi_test_1[0])), 
                        abs(prefix - int(chi_test_2[0]))
                    ]
            # Calculation of the last pair of data for non trained class
            if trained_class == "normal":
                data1 = exps[exp_id]["{}_{}_{}".format("val", t, "abnormal")]
                data2 = exps[exp_id]["{}_{}_{}".format("test", t, "abnormal")]
            else:
                data1 = exps[exp_id]["{}_{}_{}".format("val", t, "normal")]
                data2 = exps[exp_id]["{}_{}_{}".format("test", t, "normal")]
            if data1.shape[0] > data2.shape[0]:
                sub_data1 = np.r_[sorted(data1)]
                sub_data2 = np.r_[sorted(repeat_vector_to_size(data2, data1.shape[0], seed))]
            elif data1.shape[0] < data2.shape[0]:
                sub_data1 = np.r_[sorted(repeat_vector_to_size(data1, data2.shape[0], seed))]
                sub_data2 = np.r_[sorted(data2)]
            else:
                sub_data1 = np.r_[sorted(data1)]
                sub_data2 = np.r_[sorted(data2)]
            chi_test_1 = chiSquare_test(sub_data1, sub_data2)
            chi_test_2 = chiSquare_test(sub_data2, sub_data1)
            data1 = np.r_[sorted(data1)]
            data2 = np.r_[sorted(data2)]
            homo_level += [
                int(brownForsythe_test(data1, data2)), 
                int(levene_test(data1, data2))
            ]
            class_level += [
                int(chi_test_1[0]), 
                int(chi_test_2[0])
            ]
                        
            data_table.append([
                exp_id,
                t,
                part,
                round(auc(fpr, tpr), 3),
                round(threshold, 3),
                round(accuracy(TP.result().numpy(), TN.result().numpy(), FP.result().numpy(), FN.result().numpy()), 3),
                round(precision(TP.result().numpy(), FP.result().numpy()), 3),
                round(recall(TP.result().numpy(), FN.result().numpy()), 3),
                round(specificity(TN.result().numpy(), FP.result().numpy()), 3),
                round(f1_score(TP.result().numpy(), FP.result().numpy(), FN.result().numpy()), 3),
                round(np.mean(homo_level), 3),
                round(np.mean(class_level), 3)
            ])
df = pd.DataFrame(data_table, columns=data_columns)

for exp_id_1, exp_id_2, new_id in [
    ("G3D_0012_1", "G3D_0010_1", "G3D_Control_1"),
    ("G3D_0012_11", "G3D_0010_2", "G3D_Control_1000"),
    ("G3D_0012_21", "G3D_0010_3", "G3D_Control_2000"),
    ("G3D_0008_1", "G3D_0010_6", "G3D_Control_5000"),
    ("G3D_0008_2", "G3D_0010_11", "G3D_Control_10000"),
    ("G3D_0008_3", "G3D_0010_16", "G3D_Control_15000"),
    ("G3D_0008_4", "G3D_0010_21", "G3D_Control_20000"),
    ("G3D_0013_1", "G3D_0011_1", "G3D_Park_1"),
    ("G3D_0013_11", "G3D_0011_2", "G3D_Park_1000"),
    ("G3D_0013_21", "G3D_0011_3", "G3D_Park_2000"),
    ("G3D_0009_1", "G3D_0011_6", "G3D_Park_5000"),
    ("G3D_0009_2", "G3D_0011_11", "G3D_Park_10000"),
    ("G3D_0009_3", "G3D_0011_16", "G3D_Park_15000"),
    ("G3D_0009_4", "G3D_0011_21", "G3D_Park_20000"),
]:
    column_ids = np.r_[df.loc[df["Exp ID"] == exp_id_1].shape[0] * [new_id]].reshape([-1, 1])
    columns_fields = df.loc[df["Exp ID"] == exp_id_1].values[:, 1:3]
    data1 = df.loc[df["Exp ID"] == exp_id_1].values[:, 3:]
    data2 = df.loc[df["Exp ID"] == exp_id_2].values[:, 3:]
    average = (data1 + data2) / 2
    concat_data = pd.DataFrame(np.concatenate([column_ids, columns_fields, average], axis=1), columns = df.columns)
    df = pd.concat([df, concat_data], axis=0)
    
df.to_excel(os.path.join(save_path_2_excels, "quantitative_metrics.xlsx"), index=False)
df

### Classing Qualitative Metrics

In [None]:
data_table = []
data_columns = [
    "Exp ID", "Element", "Group", "vs Group", "Homo", "Class", 
    "Chi2 N -> A", "Delta Chi2 N -> A", "Chi2 A -> N", "Delta Chi2 A -> N"
]
for t in ["encoder", "contextual", "adversarial"]:
    for g1 in ["train", "val", "test"]:
        for g2 in ["val", "test"]:
            for exp_id in exps:
                if "train_{}_normal".format(t) in exps[exp_id].keys():
                    data1 = exps[exp_id]["{}_{}_normal".format(g1, t)]
                    data2 = exps[exp_id]["{}_{}_abnormal".format(g2, t)]
                else:
                    data2 = exps[exp_id]["{}_{}_abnormal".format(g1, t)]
                    data1 = exps[exp_id]["{}_{}_normal".format(g2, t)]
                if data1.shape[0] > data2.shape[0]:
                    sub_data1 = np.r_[sorted(data1)]
                    sub_data2 = np.r_[sorted(repeat_vector_to_size(data2, data1.shape[0], seed))]
                elif data1.shape[0] < data2.shape[0]:
                    sub_data1 = np.r_[sorted(repeat_vector_to_size(data1, data2.shape[0], seed))]
                    sub_data2 = np.r_[sorted(data2)]
                else:
                    sub_data1 = np.r_[sorted(data1)]
                    sub_data2 = np.r_[sorted(data2)]
                chi_test_1 = chiSquare_test(sub_data1, sub_data2)
                chi_test_2 = chiSquare_test(sub_data2, sub_data1)
                data1 = np.r_[sorted(data1)]
                data2 = np.r_[sorted(data2)]
                homo_level = [abs(1 - int(brownForsythe_test(data1, data2))), abs(1 - int(levene_test(data1, data2)))]
                class_level = [abs(1 - int(chi_test_1[0])), abs(1 - int(chi_test_2[0]))]
                data_table.append([
                    exp_id,
                    t,
                    g1,
                    g2, 
                    np.mean(homo_level),
                    np.mean(class_level),
                    int(chi_test_1[0]), 
                    round(chi_test_1[1], 5),
                    int(chi_test_2[0]), 
                    round(chi_test_2[1], 5),
                ])
df = pd.DataFrame(data_table, columns=data_columns)

for exp_id_1, exp_id_2, new_id in [
    ("G3D_0012_1", "G3D_0010_1", "G3D_Control_1"),
    ("G3D_0012_11", "G3D_0010_2", "G3D_Control_1000"),
    ("G3D_0012_21", "G3D_0010_3", "G3D_Control_2000"),
    ("G3D_0008_1", "G3D_0010_6", "G3D_Control_5000"),
    ("G3D_0008_2", "G3D_0010_11", "G3D_Control_10000"),
    ("G3D_0008_3", "G3D_0010_16", "G3D_Control_15000"),
    ("G3D_0008_4", "G3D_0010_21", "G3D_Control_20000"),
    ("G3D_0013_1", "G3D_0011_1", "G3D_Park_1"),
    ("G3D_0013_11", "G3D_0011_2", "G3D_Park_1000"),
    ("G3D_0013_21", "G3D_0011_3", "G3D_Park_2000"),
    ("G3D_0009_1", "G3D_0011_6", "G3D_Park_5000"),
    ("G3D_0009_2", "G3D_0011_11", "G3D_Park_10000"),
    ("G3D_0009_3", "G3D_0011_16", "G3D_Park_15000"),
    ("G3D_0009_4", "G3D_0011_21", "G3D_Park_20000"),
]:
    column_ids = np.r_[df.loc[df["Exp ID"] == exp_id_1].shape[0] * [new_id]].reshape([-1, 1])
    columns_fields = df.loc[df["Exp ID"] == exp_id_1].values[:, 1:4]
    data1 = df.loc[df["Exp ID"] == exp_id_1].values[:, 4:]
    data2 = df.loc[df["Exp ID"] == exp_id_2].values[:, 4:]
    average = (data1 + data2) / 2
    concat_data = pd.DataFrame(np.concatenate([column_ids, columns_fields, average], axis=1), columns = df.columns)
    df = pd.concat([df, concat_data], axis=0)

df.to_excel(os.path.join(save_path_2_excels, "classing_metrics.xlsx"), index=False)
df

### Grouping Qualitative Metrics

In [None]:
data_table = []
data_columns = [
    "Exp ID", "Element", "G1", "G2", "Homo", "Class", 
    "Chi2 G1 -> G2", "Delta Chi2 G1 -> G2", "Chi2 G2 -> G1", "Delta Chi2 G2 -> G1"
]

for t in ["encoder", "contextual", "adversarial"]:
    for g1, g2 in [
        ("train", "val"), ("train", "test"), ("val", "test"),
    ]:
        for exp_id in exps:
            if "train_{}_normal".format(t) in exps[exp_id].keys():
                c = "normal"
            else:
                c = "abnormal"
            data1 = exps[exp_id]["{}_{}_{}".format(g1, t, c)]
            data2 = exps[exp_id]["{}_{}_{}".format(g2, t, c)]
            if data1.shape[0] > data2.shape[0]:
                sub_data1 = np.r_[sorted(data1)]
                sub_data2 = np.r_[sorted(repeat_vector_to_size(data2, data1.shape[0], seed))]
            elif data1.shape[0] < data2.shape[0]:
                sub_data1 = np.r_[sorted(repeat_vector_to_size(data1, data2.shape[0], seed))]
                sub_data2 = np.r_[sorted(data2)]
            else:
                sub_data1 = np.r_[sorted(data1)]
                sub_data2 = np.r_[sorted(data2)]
            chi_test_1 = chiSquare_test(sub_data1, sub_data2)
            chi_test_2 = chiSquare_test(sub_data2, sub_data1)
            data1 = np.r_[sorted(data1)]
            data2 = np.r_[sorted(data2)]
            homo_level = [int(brownForsythe_test(data1, data2)), int(levene_test(data1, data2))]
            class_level = [int(chi_test_1[0]), int(chi_test_2[0])]
            data_table.append([
                exp_id,
                t,
                g1,
                g2,
                np.mean(homo_level),
                np.mean(class_level),
                int(chi_test_1[0]), 
                round(chi_test_1[1], 5),
                int(chi_test_2[0]), 
                round(chi_test_2[1], 5),
            ])
df = pd.DataFrame(data_table, columns=data_columns)

for exp_id_1, exp_id_2, new_id in [
    ("G3D_0012_1", "G3D_0010_1", "G3D_Control_1"),
    ("G3D_0012_11", "G3D_0010_2", "G3D_Control_1000"),
    ("G3D_0012_21", "G3D_0010_3", "G3D_Control_2000"),
    ("G3D_0008_1", "G3D_0010_6", "G3D_Control_5000"),
    ("G3D_0008_2", "G3D_0010_11", "G3D_Control_10000"),
    ("G3D_0008_3", "G3D_0010_16", "G3D_Control_15000"),
    ("G3D_0008_4", "G3D_0010_21", "G3D_Control_20000"),
    ("G3D_0013_1", "G3D_0011_1", "G3D_Park_1"),
    ("G3D_0013_11", "G3D_0011_2", "G3D_Park_1000"),
    ("G3D_0013_21", "G3D_0011_3", "G3D_Park_2000"),
    ("G3D_0009_1", "G3D_0011_6", "G3D_Park_5000"),
    ("G3D_0009_2", "G3D_0011_11", "G3D_Park_10000"),
    ("G3D_0009_3", "G3D_0011_16", "G3D_Park_15000"),
    ("G3D_0009_4", "G3D_0011_21", "G3D_Park_20000"),
]:
    column_ids = np.r_[df.loc[df["Exp ID"] == exp_id_1].shape[0] * [new_id]].reshape([-1, 1])
    columns_fields = df.loc[df["Exp ID"] == exp_id_1].values[:, 1:4]
    data1 = df.loc[df["Exp ID"] == exp_id_1].values[:, 4:]
    data2 = df.loc[df["Exp ID"] == exp_id_2].values[:, 4:]
    average = (data1 + data2) / 2
    concat_data = pd.DataFrame(np.concatenate([column_ids, columns_fields, average], axis=1), columns = df.columns)
    df = pd.concat([df, concat_data], axis=0)

df.to_excel(os.path.join(save_path_2_excels, "grouping_metrics.xlsx"), index=False)
df