# GANomaly Notebook for Multiple Experiment Results to save in Excels

## Initial Configurations

### Libraries import

In [None]:
import os
import sys
import cv2
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy import stats

sys.path.append("../../")

In [None]:
from utils.metrics import accuracy, precision, recall, specificity, f1_score
from utils.metrics import dagostinoPearson_test, andersonDarling_test, shapiroWilks_test, chiSquare_test, fOneWay_test
from utils.metrics import brownForsythe_test, levene_test, bartlett_test
from utils.metrics import mannWhitney_test, kruskalWallis_test, kolmogorovSmirnov_test

### Functions definition

In [None]:
def precision_recall_curve(y_true, y_pred, num_thresholds=200):
    precisions = []
    recalls = []
    thresholds = np.linspace(np.min(y_pred), np.max(y_pred), num_thresholds)
    for t in thresholds:
        tp = np.count_nonzero(np.logical_and(y_true, (y_pred > t)))
        fp = np.count_nonzero(np.logical_and(np.logical_not(y_true), (y_pred > t)))
        fn = np.count_nonzero(np.logical_and(y_true, (y_pred <= t)))
        if tp+fp == 0:
            precisions.append(0)
        else:
            precisions.append(precision(tp, fp))
        if tp + fn == 0:
            recalls.append(0)
        else:
            recalls.append(recall(tp, fn))
    return np.r_[precisions], np.r_[recalls], thresholds

def format_index(index, max_digits = 4):
    """This function format the index integer into a string with the maximun quantity of digits geivn.
    Args:
        index (Int): Integer to be formatted.
        max_digits (Int): How many digits must the number contain, e.g: if 4 then the range is from 0000 to 9999.
    """
    value = str(index)
    while len(value) < max_digits:
        value = '0' + value
    return value

### Experiments selection

In [None]:
# experiments_ids = [
#     "0028", "0032", "0034", "0035", # 3D B32 BN Control
#     "0038", "0042", "0047", "0052", # 2D B32 BN Control
#     "0029", "0033", "0036", "0037", # 3D B16 BN Control
#     "0039", "0043", "0050", "0055", # 2D B16 BN Control
#     "0044", "0046", "0048", "0051", # 3D B16 BN Parkinson
#     "0063", "0067", "0069", "0070", # 2D B16 BN Parkinson
#     "0040", "0041", "0045", "0049", # 3D B16 RGB Control
#     "0053", "0058", "0061", "0066", # 2D B16 RGB Control
#     "0059", "0062", "0065", "0068", # 3D B16 RGB Parkinson
#     "0054", "0057", "0060", "0064", # 2D B16 RGB Parkinson
# ]
experiments_ids = [format_index(i) for i in range(1, 28)]
save_path_2_excels = "/home/jefelitman/Results_Reports/"

exps = {}
root_path = "/home/jefelitman/Saved_Models/Anomaly_parkinson/"
for exp_id in experiments_ids:
    for i in sorted(os.listdir(root_path)):
        if exp_id in i:
            exp_path = os.path.join(root_path, i)
            exps[exp_id] = {
                "path": exp_path
            }
exps

## Standar metrics excel

### Errors loading

In [None]:
for exp_id in experiments_ids:
    base_path = os.path.join(exps[exp_id]["path"], "outputs/errors/")
    for t in ["encoder", "contextual", "adversarial"]:
            for c in ["normal", "abnormal"]:
                exps[exp_id]["all_{}_{}".format(t, c)] = np.r_[[]]
            
    for t in ["encoder", "contextual", "adversarial"]:
        for m in ["train", "test"]:
            if m == "train":
                if os.path.isfile(os.path.join(base_path, t, m, "normal.npy")):
                    classes = ["normal"] 
                else:
                    classes = ["abnormal"]
            else:
                classes = ["normal", "abnormal"]

            for c in classes:
                all_data = "all_{}_{}".format(t, c)
                errors = np.load(os.path.join(base_path, t, m, c + ".npy"))
                exps[exp_id]["{}_{}_{}".format(m, t, c)] = errors
                exps[exp_id][all_data] = np.concatenate([exps[exp_id][all_data], errors])

In [None]:
for exp_id in experiments_ids:
    base_path = os.path.join(exps[exp_id]["path"], "outputs/latent_vectors/input_generator")
    for t in ["encoder", "contextual", "adversarial"]:
        for m in ["train", "test"]:
            if m == "train":
                if "train_encoder_normal" in exps[exp_id].keys():
                    classes = ["normal"] 
                else:
                    classes = ["abnormal"]
            else:
                classes = ["normal", "abnormal"]
            for c in classes:
                patients_ids_positions = [
                    int(i.split("-")[1].split(".")[0]) for i in sorted(
                        os.listdir(os.path.join(base_path, m, c))
                    )
                ]
                data = "{}_{}_{}".format(m, t, c)
                key = "{}_{}".format(data, "patients")
                exps[exp_id][key] = {}
                
                for p_id in np.unique(patients_ids_positions):
                    exps[exp_id][key][p_id] = []
                
                for i, p_id in enumerate(patients_ids_positions):
                    exps[exp_id][key][p_id].append(exps[exp_id][data][i])

### Building excel with standard metrics

In [None]:
data_table = []
data_columns = ["Exp ID", "L_gen", "L_disc", "Acc (t=0.5)", "Pre_orig", 
                "Rec_orig", "Spe_orig", "F1_orig", "AUC", "Threshold", 
                "Acc_thre", "Pre_thre", "Rec_thre", "Spe_thre", "F1_thre",
                "Homo Level"
               ]
for exp_id in experiments_ids:
    experiment_folder = exps[exp_id]["path"]
    
    train_metrics = pd.read_csv(os.path.join(experiment_folder, "metrics/train.csv"))
    test_metrics = pd.read_csv(os.path.join(experiment_folder, "metrics/test.csv"))

    group = "encoder"
    data = "test_{}_".format(group)
    errors = ["normal", "abnormal"]
    path = os.path.join(experiment_folder, "outputs/graphics/quantitative/")
    y_true = np.concatenate([[i]*exps[exp_id][data + j].shape[0] for i,j in enumerate(errors)]) 
    y_pred = np.concatenate([exps[exp_id][data+i] for i in errors])
    
    if "train_{}_normal".format(group) in exps[exp_id].keys():
        y_pred = (y_pred - np.min(y_pred)) / (np.max(y_pred) - np.min(y_pred))
        test_class = "normal"
    else:
        y_pred = 1 - ((y_pred - np.min(y_pred)) / (np.max(y_pred) - np.min(y_pred)))
        test_class = "abnormal"
    
    precisions, recalls, thresholds = precision_recall_curve(y_true, y_pred)
    deltas_pre_4_rec = np.abs(precisions - recalls)
    threshold = thresholds[np.argmin(deltas_pre_4_rec[deltas_pre_4_rec != 0])]

    TP = tf.keras.metrics.TruePositives(threshold)
    TN = tf.keras.metrics.TrueNegatives(threshold)
    FP = tf.keras.metrics.FalsePositives(threshold)
    FN = tf.keras.metrics.FalseNegatives(threshold)
    AUC = tf.keras.metrics.AUC()

    TP.update_state(y_true, y_pred)
    TN.update_state(y_true, y_pred)
    FP.update_state(y_true, y_pred)
    FN.update_state(y_true, y_pred)
    AUC.update_state(y_true, y_pred)
    
    homo_key = "{}{}_patients".format(data, test_class)
    homo_metric = 0
    for p_id in exps[exp_id][homo_key]:
        homo_4_pat = int(brownForsythe_test(
            sorted(exps[exp_id][homo_key][p_id]), sorted(exps[exp_id]["train_{}_{}".format(group, test_class)])
        )) + int(levene_test(
            sorted(exps[exp_id][homo_key][p_id]), sorted(exps[exp_id]["train_{}_{}".format(group, test_class)])
        )) + int(bartlett_test(
            sorted(exps[exp_id][homo_key][p_id]), sorted(exps[exp_id]["train_{}_{}".format(group, test_class)])
        ))
        homo_metric += homo_4_pat/3
    homo_metric /= len(exps[exp_id][homo_key])
    
    data_table.append([
        exp_id,
        train_metrics.loc[train_metrics.shape[0] - 1 ,"gen_error"],
        train_metrics.loc[train_metrics.shape[0] - 1 ,"disc_error"],
        test_metrics.loc[test_metrics.shape[0] - 1 ,"accuracy"],
        test_metrics.loc[test_metrics.shape[0] - 1 ,"precision"],
        test_metrics.loc[test_metrics.shape[0] - 1 ,"recall"],
        test_metrics.loc[test_metrics.shape[0] - 1 ,"specificity"],
        test_metrics.loc[test_metrics.shape[0] - 1 ,"f1_score"],
        AUC.result().numpy(),
        threshold,
        np.max(accuracy(TP.result().numpy(), TN.result().numpy(), FP.result().numpy(), FN.result().numpy())),
        np.max(precision(TP.result().numpy(), FP.result().numpy())),
        np.max(recall(TP.result().numpy(), FN.result().numpy())),
        np.max(specificity(TN.result().numpy(), FP.result().numpy())),
        np.max(f1_score(TP.result().numpy(), FP.result().numpy(), FN.result().numpy())),
        homo_metric
    ])

pd.DataFrame(data_table, columns=data_columns).to_excel(os.path.join(save_path_2_excels, "standard_metrics.xlsx"), index=False)

## Qualitative metrics excel

### Errors exploration

In [None]:
data_table = []
data_columns = ["Exp ID", "Min", "Max", "Mean", "Std", "Ske", "Kur", "CDF (x > 0)"]

for g in ["train", "test", "all"]:
        if g == "train":
            classes = ["check"]
        else:
            classes = ["normal", "abnormal"]
        for cl in classes:
            for t in ["encoder", "contextual", "adversarial"]:
                
                if cl == "check":
                    data_table.append(["{} {}".format(g, t)] + [None]*5)
                else:
                    data_table.append(["{} {} {}".format(g, t, cl)] + [None]*5)
                    
                for exp_id in experiments_ids:
                    if cl == "check":
                        if "train_{}_normal".format(t) in exps[exp_id].keys():
                            c = "normal"
                        else:
                            c = "abnormal"
                    else:
                        c = cl
                    
                    data = exps[exp_id]["{}_{}_{}".format(g, t, c)]
                    m = np.mean(data)
                    s = np.std(data)
                
                    data_table.append([
                        exp_id,
                        np.min(data),
                        np.max(data),
                        m,
                        s,
                        stats.skew(data),
                        stats.kurtosis(data),
                        1 - stats.norm(m, s).cdf(0)
                    ])
pd.DataFrame(data_table, columns=data_columns).to_excel(
    os.path.join(save_path_2_excels, "qualitative_metrics.xlsx"), 
    index=False
)

### Normality tests

In [None]:
data_table = []
data_columns = ["Exp ID", "Brown", "Levene", "Barlett", "DAP", "AD", "SW", "Chi^2", "F test"]

for g in ["train", "test", "all"]:
    if g == "train":
        classes = ["check"]
    else:
        classes = ["normal", "abnormal"]
    for cl in classes:
        for t in ["encoder", "contextual", "adversarial"]:

            if cl == "check":
                data_table.append(["{} {}".format(g, t)] + [None]*8)
            else:
                data_table.append(["{} {} {}".format(g, t, cl)] + [None]*8)

            for exp_id in experiments_ids:
                if cl == "check":
                    if "train_{}_normal".format(t) in exps[exp_id].keys():
                        c = "normal"
                    else:
                        c = "abnormal"
                else:
                    c = cl

                data = sorted(exps[exp_id]["{}_{}_{}".format(g, t, c)])
                norm_dist = sorted(stats.norm.rvs(loc=np.mean(data), scale=np.std(data), size=len(data), random_state=8128))
                chi_test = chiSquare_test(data, norm_dist)
                data_table.append([
                    exp_id,
                    int(brownForsythe_test(data, norm_dist)),
                    int(levene_test(data, norm_dist)),
                    int(bartlett_test(data, norm_dist)),
                    int(dagostinoPearson_test(data)),
                    int(andersonDarling_test(data)),
                    int(shapiroWilks_test(data)),
                    "{} ({})".format(int(chi_test[0]), round(chi_test[1], 5)),
                    int(fOneWay_test(data, norm_dist))
                ])
pd.DataFrame(data_table, columns=data_columns).to_excel(
    os.path.join(save_path_2_excels, "normality_tests.xlsx"), 
    index=False
)

### Grouping tests

In [None]:
data_table = []
data_columns = ["Exp ID", "Brown", "Levene", "Barlett", "MW", "KW", "KS", "F test"]

for t in ["encoder", "contextual", "adversarial"]:
    for g1, g2 in [("train", "test"), ("test", "all"), ("train", "all")]:
        data_table.append(["{} {} vs {}".format(t, g1, g2)] + [None]*7)
        for exp_id in experiments_ids:
            if "train_{}_normal".format(t) in exps[exp_id].keys():
                c = "normal"
            else:
                c = "abnormal"
                
            data1 = sorted(exps[exp_id]["{}_{}_{}".format(g1, t, c)])
            data2 = sorted(exps[exp_id]["{}_{}_{}".format(g2, t, c)])
            
            data_table.append([
                exp_id,
                int(brownForsythe_test(data1, data2)),
                int(levene_test(data1, data2)),
                int(bartlett_test(data1, data2)),
                int(mannWhitney_test(data1, data2)),
                int(kruskalWallis_test(data1, data2)),
                int(kolmogorovSmirnov_test(data1, data2)),
                int(fOneWay_test(data1, data2))
            ])
pd.DataFrame(data_table, columns=data_columns).to_excel(
    os.path.join(save_path_2_excels, "grouping_tests.xlsx"), 
    index=False
)

### Classing tests

In [None]:
data_table = []
data_columns = ["Exp ID", "Brown", "Levene", "Barlett", "Chi^2 N->A", "Chi^2 A->N", "MW", "KW", "KS", "F test"]

for g in ["test", "all"]:
    for t in ["encoder", "contextual", "adversarial"]:
        data_table.append(["{} {}".format(g, t)] + [None]*9)
        for exp_id in experiments_ids:
            data1 = sorted(exps[exp_id]["{}_{}_normal".format(g, t)])
            data2 = sorted(exps[exp_id]["{}_{}_abnormal".format(g, t)])
            
            row = [
                exp_id,
                int(brownForsythe_test(data1, data2)),
                int(levene_test(data1, data2)),
                int(bartlett_test(data1, data2)),
            ]
            if g == "test":
                row += [
                    int(mannWhitney_test(data1, data2)),
                    int(kruskalWallis_test(data1, data2)),
                    int(kolmogorovSmirnov_test(data1, data2)),
                    int(fOneWay_test(data1, data2)),
                    None,
                    None
                ]
            else:
                chi_1_test = chiSquare_test(data1, data2)
                chi_2_test = chiSquare_test(data2, data1)
                row += [
                    "{} ({})".format(int(chi_1_test[0]), round(chi_1_test[1], 5)),
                    "{} ({})".format(int(chi_2_test[0]), round(chi_2_test[1], 5)),
                    int(mannWhitney_test(data1, data2)),
                    int(kruskalWallis_test(data1, data2)),
                    int(kolmogorovSmirnov_test(data1, data2)),
                    int(fOneWay_test(data1, data2))
                ]
            
            data_table.append(row)
        
pd.DataFrame(data_table, columns=data_columns).to_excel(
    os.path.join(save_path_2_excels, "classing_tests.xlsx"), 
    index=False
)