# Notebook for Multiple Experiment Results

### Libraries import

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.metrics import auc
from sklearn.model_selection import train_test_split

sys.path.append("../../")

In [None]:
from utils.common import format_index, repeat_vector_to_size
from utils.metrics import precision_recall_curve, tpr_fpr_curve
from utils.metrics import chiSquare_test, brownForsythe_test, levene_test
from utils.metrics import accuracy, precision, recall, specificity, f1_score

### Experiments selection

In [None]:
major_exps_ids = [format_index(i) for i in [10, 11]]
minor_exps_ids = [format_index(i) for i in range(1, 22)]
save_path_2_excels = "../../results/Results_Reports/"

exps = {}
seed = 8128
root_path = "../../results/Ganomaly_3D/"

for i in major_exps_ids:
    for exp_id in sorted(os.listdir(root_path)):
        if i in exp_id:
            sub_path = os.path.join(root_path, exp_id)
            for j in minor_exps_ids:
                for subexp_id in sorted(os.listdir(sub_path)):
                    if j in subexp_id:
                        final_id = "G3D_{}_{}".format(i, int(j))
                        exp_path = os.path.join(sub_path, subexp_id)
                        exps[final_id] = {
                            "path": exp_path
                        }

exps

### Errors loading

In [None]:
for exp_id in exps:
    base_path = os.path.join(exps[exp_id]["path"], "outputs/errors/")
    for t in ["encoder", "contextual", "adversarial"]:
        for c in ["normal", "abnormal"]:
            exps[exp_id]["all_{}_{}".format(t, c)] = np.r_[[]]

    for t in ["encoder", "contextual", "adversarial"]:
        for m in ["train", "val", "test"]:
            if m == "train":
                if os.path.isfile(os.path.join(base_path, t, m, "normal.npy")):
                    classes = ["normal"] 
                else:
                    classes = ["abnormal"]
            else:
                classes = ["normal", "abnormal"]

            for c in classes:
                all_data = "all_{}_{}".format(t, c)
                errors = np.load(os.path.join(base_path, t, m, c + ".npy"))
                exps[exp_id]["{}_{}_{}".format(m, t, c)] = errors
                exps[exp_id][all_data] = np.concatenate([exps[exp_id][all_data], errors])

### Errors by patients loading

In [None]:
for exp_id in exps:
    base_path = os.path.join(exps[exp_id]["path"], "outputs/latent_vectors/input_generator")
    for t in ["encoder", "contextual", "adversarial"]:
        for c in ["normal", "abnormal"]:
            exps[exp_id]["all_{}_{}_patients".format(t, c)] = {}

    for t in ["encoder", "contextual", "adversarial"]:
        for m in ["train", "val", "test"]:
            if m == "train":
                if "train_encoder_normal" in exps[exp_id].keys():
                    classes = ["normal"] 
                else:
                    classes = ["abnormal"]
            else:
                classes = ["normal", "abnormal"]
            for c in classes:
                patients_ids_positions = [
                    int(i.split("_")[1].split("-")[1].split(".")[0]) for i in sorted(
                        os.listdir(os.path.join(base_path, m, c))
                    )
                ]
                all_data = "all_{}_{}_patients".format(t, c)
                data = "{}_{}_{}".format(m, t, c)
                key = "{}_{}".format(data, "patients")
                exps[exp_id][key] = {}

                for p_id in np.unique(patients_ids_positions):
                    exps[exp_id][key][p_id] = []
                    exps[exp_id][all_data][p_id] = []

                for i, p_id in enumerate(patients_ids_positions):
                    exps[exp_id][key][p_id].append(exps[exp_id][data][i])
                    exps[exp_id][all_data][p_id].append(exps[exp_id][data][i])

### Quantitative metrics

In [None]:
data_table = []
data_columns = ["Exp ID", "Group", "Partition", "AUC", "Threshold", "Acc", "Pre", "Rec", "Spe", "F1", "Homo"]

errors = ["normal", "abnormal"]

for t in ["encoder", "contextual", "adversarial"]:
    for exp_id in exps:
        for part in ["val", "test"]:
            data = "{}_{}_".format(part, t)
            y_true = np.concatenate([[i]*exps[exp_id][data + j].shape[0] for i,j in enumerate(errors)]) 
            y_pred = np.concatenate([exps[exp_id][data+i] for i in errors])
            tpr, fpr, _ = tpr_fpr_curve(y_true, y_pred)

            if part == "val":
                precisions, recalls, thresholds = precision_recall_curve(y_true, y_pred)
                deltas_pre_4_rec = np.abs(precisions - recalls)
                threshold = thresholds[np.argmin(deltas_pre_4_rec[deltas_pre_4_rec != 0])]

            if "train_{}_normal".format(t) in exps[exp_id].keys():
                y_pred = (y_pred > threshold).astype(np.int64)
                trained_class = "normal"
            else:
                y_pred = (y_pred < threshold).astype(np.int64)
                trained_class = "abnormal"

            TP = tf.keras.metrics.TruePositives()
            TN = tf.keras.metrics.TrueNegatives()
            FP = tf.keras.metrics.FalsePositives()
            FN = tf.keras.metrics.FalseNegatives()

            TP.update_state(y_true, y_pred)
            TN.update_state(y_true, y_pred)
            FP.update_state(y_true, y_pred)
            FN.update_state(y_true, y_pred)
            
            # Homocedasticity metric
            homo_level = []
            for c in ["normal", "abnormal"]:
                if c == trained_class:
                    groups = [("train", "val"), ("train", "test"), ("val", "test")]
                    prefix = 0
                else:
                    groups = [
                        ("train", "val"), ("train", "test"), 
                        ("val", "val"), ("val", "test"), 
                        ("test", "val"), ("test", "test")
                    ]
                    prefix = 1
                for g1, g2 in groups:
                    data1 = np.r_[sorted(exps[exp_id]["{}_{}_{}".format(g1, t, trained_class)])]
                    data2 = np.r_[sorted(exps[exp_id]["{}_{}_{}".format(g2, t, c)])]
                    homo_level += [
                        abs(prefix - int(brownForsythe_test(data1, data2))), 
                        abs(prefix - int(levene_test(data1, data2)))
                    ]

            data_table.append([
                exp_id,
                t,
                part,
                round(auc(fpr, tpr), 3),
                round(threshold, 3),
                round(accuracy(TP.result().numpy(), TN.result().numpy(), FP.result().numpy(), FN.result().numpy()), 3),
                round(precision(TP.result().numpy(), FP.result().numpy()), 3),
                round(recall(TP.result().numpy(), FN.result().numpy()), 3),
                round(specificity(TN.result().numpy(), FP.result().numpy()), 3),
                round(f1_score(TP.result().numpy(), FP.result().numpy(), FN.result().numpy()), 3),
                round(np.mean(homo_level), 3)
            ])
df = pd.DataFrame(data_table, columns=data_columns)
df.to_excel(os.path.join(save_path_2_excels, "quantitative_metrics.xlsx"), index=False)
df

### Classing Qualitative Metrics

In [None]:
data_table = []
data_columns = ["Exp ID", "Group", "Element", "Brow", "Lev", "Chi2 N -> A", "Chi2 A -> N"]
for t in ["encoder", "contextual", "adversarial"]:
    for g in ["val", "test"]:
        for exp_id in exps:
            data1 = exps[exp_id]["{}_{}_normal".format(g, t)]
            data2 = exps[exp_id]["{}_{}_abnormal".format(g, t)]
            if data1.shape[0] > data2.shape[0]:
                sub_data1 = np.r_[sorted(data1)]
                sub_data2 = np.r_[sorted(repeat_vector_to_size(data2, data1.shape[0]))]
            elif data1.shape[0] < data2.shape[0]:
                sub_data1 = np.r_[sorted(repeat_vector_to_size(data1, data2.shape[0]))]
                sub_data2 = np.r_[sorted(data2)]
            else:
                sub_data1 = np.r_[sorted(data1)]
                sub_data2 = np.r_[sorted(data2)]
            chi_test_1 = chiSquare_test(sub_data1, sub_data2)
            chi_test_2 = chiSquare_test(sub_data2, sub_data1)
            data1 = np.r_[sorted(data1)]
            data2 = np.r_[sorted(data2)]
            data_table.append([
                exp_id,
                g,
                t, 
                int(brownForsythe_test(data1, data2)),
                int(levene_test(data1, data2)),
                "{} ({})".format(int(chi_test_1[0]), round(chi_test_1[1], 5)),
                "{} ({})".format(int(chi_test_2[0]), round(chi_test_2[1], 5)),
            ])
df = pd.DataFrame(data_table, columns=data_columns)
df.to_excel(os.path.join(save_path_2_excels, "classing_metrics.xlsx"), index=False)
df

### Grouping Qualitative Metrics

In [None]:
data_table = []
data_columns = ["Exp ID", "Element", "G1", "G2", "Brow", "Lev", "Bar", "MW", "KW", "KS", "Chi2 G1 -> G2", "Chi2 G2 -> G1"]

for t in ["encoder", "contextual", "adversarial"]:
    for g1, g2 in [
        ("train", "val"), ("train", "test"), ("train", "all"), 
        ("val", "test"), ("val", "all"),
        ("test", "all")
    ]:
        for exp_id in exps:
            if "train_{}_normal".format(t) in exps[exp_id].keys():
                c = "normal"
            else:
                c = "abnormal"
            data1 = exps[exp_id]["{}_{}_{}".format(g1, t, c)]
            data2 = exps[exp_id]["{}_{}_{}".format(g2, t, c)]
            if data1.shape[0] > data2.shape[0]:
                size = data2.shape[0] / data1.shape[0]
                _, temporal = train_test_split(data1, test_size=size, random_state=seed)
                sub_data1 = np.r_[sorted(temporal)]
                sub_data2 = np.r_[sorted(data2)]
            elif data1.shape[0] < data2.shape[0]:
                size = data1.shape[0] / data2.shape[0]
                _, temporal = train_test_split(data2, test_size=size, random_state=seed)
                sub_data2 = np.r_[sorted(temporal)]
                sub_data1 = np.r_[sorted(data1)]
            else:
                sub_data1 = data1
                sub_data2 = data2
            chi_test_g1 = chiSquare_test(sub_data1, sub_data2)
            chi_test_g2 = chiSquare_test(sub_data2, sub_data1)
            data1 = np.r_[sorted(data1)]
            data2 = np.r_[sorted(data2)]
            data_table.append([
                exp_id,
                t,
                g1,
                g2,
                int(brownForsythe_test(data1, data2)),
                int(levene_test(data1, data2)),
                int(bartlett_test(data1, data2)),
                int(mannWhitney_test(data1, data2)),
                int(kruskalWallis_test(data1, data2)),
                int(kolmogorovSmirnov_test(data1, data2)),
                "{} ({})".format(int(chi_test_g1[0]), round(chi_test_g1[1], 5)),
                "{} ({})".format(int(chi_test_g2[0]), round(chi_test_g2[1], 5)),
            ])
df = pd.DataFrame(data_table, columns=data_columns)
df.to_excel(os.path.join(save_path_2_excels, "grouping_metrics.xlsx"), index=False)
df

### Homocedasticity between patients and groups

In [None]:
data_table = []
data_columns = ["Exp ID", "Group", "Class", "Partition", "vs Complete Part", "Homocedasticity level"]
test_class = "normal"

for group in ["encoder", "contextual", "adversarial"]:
    for partition in ["train", "val", "test", "all"]:
        for total_partition in ["train", "val", "test", "all"]:
            for exp_id in exps:
                if partition == "train" or total_partition == "train":
                    if "train_{}_normal".format(t) in exps[exp_id].keys():
                        classes = ["normal"]
                    else:
                        classes = ["abnormal"]
                else:
                    classes = ["normal", "abnormal"]
                    
                for test_class in classes:
                    patients_dict = exps[exp_id]["{}_{}_{}_patients".format(partition, group, test_class)]
                    data_group = np.r_[sorted(exps[exp_id]["{}_{}_{}".format(total_partition, group, test_class)])]
                    homo_by_patients = []
                    for p_id in patients_dict:
                        homo_by_patients.append(
                            int(brownForsythe_test(np.r_[sorted(patients_dict[p_id])], data_group))*0.5 + 
                            int(levene_test(np.r_[sorted(patients_dict[p_id])], data_group))*0.5
                        )
                    data_table.append([
                        exp_id, 
                        group,
                        test_class,
                        partition,
                        total_partition,
                        np.mean(homo_by_patients)
                    ])

df = pd.DataFrame(data_table, columns=data_columns)
df.to_excel(os.path.join(save_path_2_excels, "homocedasticity_metrics.xlsx"), index=False)
df