# Notebook to generate PCA Graphics

### Libraries import

In [None]:
import os
import cv2
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

### Experiment selection

In [None]:
experiment_id = "0006"
root_path = "../../results/Ganomaly_3D/"
for i in sorted(os.listdir(root_path)):
    if experiment_id in i:
        experiment_folder = os.path.join(root_path, i)
experiment_folder

### Latent Vectors (Embeddings) Loading

In [None]:
base_path = os.path.join(experiment_folder, "outputs/latent_vectors/")
for n in ["generator", "discriminator"]:
    for t in ["input", "output"]:
        for c in ["normal", "abnormal"]:
            globals()["all_{}_{}_{}".format(n, t, c)] = []
            
for n in ["generator", "discriminator"]:
    for t in ["input", "output"]:
        for m in ["train", "val", "test"]:
            if m == "train":
                if len(os.listdir(os.path.join(base_path, "{}_{}".format(t, n), "train", "normal"))) != 0:
                    classes = ["normal"] 
                else:
                    classes = ["abnormal"]
            else:
                classes = ["normal", "abnormal"]
            for c in classes:
                all_data = "all_{}_{}_{}".format(n, t, c)
                data = "{}_{}_{}_{}".format(m, n, t, c)
                globals()[data] = []
                path = os.path.join(base_path, t + "_" + n, m, c)
                for file in sorted(os.listdir(path)):
                    vector = np.load(os.path.join(path, file))
                    globals()[data].append(vector)
                    globals()[all_data].append(vector)
                globals()[data] = np.r_[globals()[data]]

for n in ["generator", "discriminator"]:
    for t in ["input", "output"]:
        for c in ["normal", "abnormal"]:
            all_data = "all_{}_{}_{}".format(n, t, c)
            globals()[all_data] = np.r_[globals()[all_data]]
            
plt.rc("text", usetex=True)
all_generator_input_normal.shape, all_generator_input_abnormal.shape

### Latent vectors (Embeddings) by patients loading

In [None]:
# base_path = os.path.join(experiment_folder, "outputs/latent_vectors/input_generator")
# for t in ["encoder", "contextual", "adversarial"]:
#     for m in ["train", "val", "test"]:
#         if m == "train":
#             if "train_encoder_normal" in globals().keys():
#                 classes = ["normal"] 
#             else:
#                 classes = ["abnormal"]
#         else:
#             classes = ["normal", "abnormal"]
#         for c in classes:
#             patients_ids_positions = [
#                 int(i.split("_")[1].split("-")[1].split(".")[0]) for i in sorted(
#                     os.listdir(os.path.join(base_path, m, c))
#                 )
#             ]
#             data = "{}_{}_{}".format(m, t, c)
#             key = "{}_{}".format(data, "patients")
#             globals()[key] = {}

#             for p_id in np.unique(patients_ids_positions):
#                 globals()[key][p_id] = []

#             for i, p_id in enumerate(patients_ids_positions):
#                 globals()[key][p_id].append(globals()[data][i])

### PCA individual between classes

In [None]:
save_path = os.path.join(experiment_folder, "outputs/graphics/visuals/PCA_classes_individual")
if not os.path.isdir(save_path):
    os.mkdir(save_path)
for n in ["generator", "discriminator"]:
    if n == "generator":
        sufix = "G"
    else:
        sufix = "D"
    for t in ["input", "output"]:
        for m in ["train", "test", "val", "all"]:
            if m == "train":
                if "train_{}_{}_normal".format(n, t) in globals().keys():
                    classes = ["normal"] 
                    order = ["Control $Z_{}$".format(sufix), "Parkinson $Z_{}$".format(sufix)]
                else:
                    classes = ["abnormal"]
                    order = ["Parkinson $Z_{}$".format(sufix), "Control $Z_{}$".format(sufix)]
            else:
                classes = ["normal", "abnormal"]
                order = [r"Control $Z_{}$".format(sufix), r"Parkinson $Z_{}$".format(sufix)]
            globals()["{}_mapped_{}_{}".format(m, n, t)] = PCA(n_components=2, random_state=8128).fit_transform(
                np.concatenate(
                    [globals()["{}_{}_{}_{}".format(m, n, t, c)] for c in classes], axis=0
                )
            )

            divisor = globals()["{}_{}_{}_{}".format(m, n, t, classes[0])].shape[0]
            plt.scatter(globals()["{}_mapped_{}_{}".format(m, n, t)][:divisor,0],
                globals()["{}_mapped_{}_{}".format(m, n, t)][:divisor,1], color="blue", s=5, label=order[0]
            )
            if len(classes) > 1:
                plt.scatter(globals()["{}_mapped_{}_{}".format(m, n, t)][divisor:,0],
                    globals()["{}_mapped_{}_{}".format(m, n, t)][divisor:,1], color="red", s=5, label=order[1]
                )
            plt.legend()
            plt.title("PCA for {} {} data in {}".format(n, t, m))
            filename = '{}_pca_{}_{}.png'.format(m, n, t)
            plt.savefig(os.path.join(save_path, filename), dpi=1200)
            plt.close()

### PCA combined between classes

In [None]:
save_path = os.path.join(experiment_folder, "outputs/graphics/visuals/PCA_classes_combined")
if not os.path.isdir(save_path):
    os.mkdir(save_path)
for n in ["generator", "discriminator"]:
    if n == "generator":
        sufix = "G"
    else:
        sufix = "D"
    for m in ["train", "test", "val", "all"]:
        if m == "train":
            if "train_{}_{}_normal".format(n, t) in globals().keys():
                classes = ["normal"]
                order = [
                    "Control $Z_{}$".format(sufix), 
                    "Control $Z'_{}$".format(sufix), 
                    "Parkinson $Z_{}$".format(sufix), 
                    "Parkinson $Z'_{}$".format(sufix)
                ]
            else:
                classes = ["abnormal"]
                order = [
                    "Parkinson $Z_{}$".format(sufix), 
                    "Parkinson $Z'_{}$".format(sufix), 
                    "Control $Z_{}$".format(sufix), 
                    "Control $Z'_{}$".format(sufix)
                ]
        else:
            classes = ["normal", "abnormal"]
            order = [
                "Control $Z_{}$".format(sufix), 
                "Control $Z'_{}$".format(sufix), 
                "Parkinson $Z_{}$".format(sufix), 
                "Parkinson $Z'_{}$".format(sufix)
            ]
        for c in classes:
            input_data = globals()["{}_{}_input_{}".format(m, n, c)]
            output_data = globals()["{}_{}_output_{}".format(m, n, c)]
            globals()["{}_{}_combined_{}".format(m, n, c)] = np.concatenate([input_data, output_data])
            
        globals()["{}_mapped_{}_combined".format(m, n)] = PCA(n_components=2, random_state=8128).fit_transform(np.concatenate(
                [globals()["{}_{}_combined_{}".format(m, n, c)] for c in classes], axis=0
            )
        )
        divisor_normal = globals()["{}_{}_input_{}".format(m, n, classes[0])].shape[0]
        divisor_classes = globals()["{}_{}_combined_{}".format(m, n, classes[0])].shape[0]
        plt.scatter(globals()["{}_mapped_{}_combined".format(m, n)][:divisor_normal, 0],
            globals()["{}_mapped_{}_combined".format(m, n)][:divisor_normal, 1], 
            color="blue", 
            s=20, 
            label=order[0],
            marker="|"
        )
        plt.scatter(globals()["{}_mapped_{}_combined".format(m, n)][divisor_normal:divisor_classes,0],
            globals()["{}_mapped_{}_combined".format(m, n)][divisor_normal:divisor_classes,1], 
            color="cyan", 
            s=20, 
            label=order[1],
            marker="_"
        )
        if len(classes) > 1:
            divisor_abnormal = globals()["{}_{}_input_{}".format(m, n, classes[1])].shape[0] + divisor_classes
            plt.scatter(globals()["{}_mapped_{}_combined".format(m, n)][divisor_classes : divisor_abnormal, 0],
                globals()["{}_mapped_{}_combined".format(m, n)][divisor_classes : divisor_abnormal,1], 
                color="red", 
                s=20, 
                label=order[2],
                marker="|"
            )
            plt.scatter(globals()["{}_mapped_{}_combined".format(m, n)][divisor_abnormal:, 0],
                globals()["{}_mapped_{}_combined".format(m, n)][divisor_abnormal:, 1], 
                color="orange", 
                s=20, 
                label=order[3],
                marker="_"
            )
        plt.legend()
        plt.title("PCA for {} combined data in {}".format(n, m))
        filename = '{}_pca_{}_combined.png'.format(m, n)
        plt.savefig(os.path.join(save_path, filename),dpi=1200)
        plt.close()

### PCA individual between groups

In [None]:
save_path = os.path.join(experiment_folder, "outputs/graphics/visuals/PCA_groups_individual")
if not os.path.isdir(save_path):
    os.mkdir(save_path)
for n in ["generator", "discriminator"]:
    if n == "generator":
        sufix = "G"
    else:
        sufix = "D"
    for t in ["input", "output"]:
        for groups in [
            ("train", "val"), ("train", "test"), ("train", "all"), 
            ("val", "test"), ("val", "all"),
            ("test", "all")
        ]:
            if groups[0] == "train":
                if "train_{}_{}_normal".format(n, t) in globals().keys():
                    classes = ["normal"] 
                else:
                    classes = ["abnormal"]
            else:
                classes = ["normal", "abnormal"]
            order = ["{} $Z_{}$".format(g, sufix) for g in groups]
            for c in classes:
                globals()["mapped_{}_{}".format(n, t)] = PCA(n_components=2, random_state=8128).fit_transform(
                    np.concatenate(
                        [globals()["{}_{}_{}_{}".format(g, n, t, c)] for g in groups], axis=0
                    )
                )
                divisor = globals()["{}_{}_{}_{}".format(groups[0], n, t, c)].shape[0]
                plt.scatter(globals()["mapped_{}_{}".format(n, t)][:divisor,0],
                    globals()["mapped_{}_{}".format(n, t)][:divisor,1], color="blue", s=5, label=order[0]
                )
                plt.scatter(globals()["mapped_{}_{}".format(n, t)][divisor:,0],
                    globals()["mapped_{}_{}".format(n, t)][divisor:,1], color="red", s=5, label=order[1]
                )
                plt.legend()
                plt.title("PCA for {} {} data ({} vs {})".format(n, t, groups[0], groups[1]))
                filename = '{}_vs_{}_pca_{}_{}.png'.format(groups[0], groups[1], n, t)
                plt.savefig(os.path.join(save_path, filename), dpi=1200)
                plt.close()

### PCA combined between groups

In [None]:
# Aqui voy
save_path = os.path.join(experiment_folder, "outputs/graphics/visuals/PCA_classes_combined")
if not os.path.isdir(save_path):
    os.mkdir(save_path)
for n in ["generator", "discriminator"]:
    if n == "generator":
        sufix = "G"
    else:
        sufix = "D"
    for m in ["train", "test", "val", "all"]:
        if m == "train":
            if "train_{}_{}_normal".format(n, t) in globals().keys():
                classes = ["normal"]
                order = [
                    "Control $Z_{}$".format(sufix), 
                    "Control $Z'_{}$".format(sufix), 
                    "Parkinson $Z_{}$".format(sufix), 
                    "Parkinson $Z'_{}$".format(sufix)
                ]
            else:
                classes = ["abnormal"]
                order = [
                    "Parkinson $Z_{}$".format(sufix), 
                    "Parkinson $Z'_{}$".format(sufix), 
                    "Control $Z_{}$".format(sufix), 
                    "Control $Z'_{}$".format(sufix)
                ]
        else:
            classes = ["normal", "abnormal"]
            order = [
                "Control $Z_{}$".format(sufix), 
                "Control $Z'_{}$".format(sufix), 
                "Parkinson $Z_{}$".format(sufix), 
                "Parkinson $Z'_{}$".format(sufix)
            ]
        for c in classes:
            input_data = globals()["{}_{}_input_{}".format(m, n, c)]
            output_data = globals()["{}_{}_output_{}".format(m, n, c)]
            globals()["{}_{}_combined_{}".format(m, n, c)] = np.concatenate([input_data, output_data])
            
        globals()["{}_mapped_{}_combined".format(m, n)] = PCA(n_components=2, random_state=8128).fit_transform(np.concatenate(
                [globals()["{}_{}_combined_{}".format(m, n, c)] for c in classes], axis=0
            )
        )
        divisor_normal = globals()["{}_{}_input_{}".format(m, n, classes[0])].shape[0]
        divisor_classes = globals()["{}_{}_combined_{}".format(m, n, classes[0])].shape[0]
        plt.scatter(globals()["{}_mapped_{}_combined".format(m, n)][:divisor_normal, 0],
            globals()["{}_mapped_{}_combined".format(m, n)][:divisor_normal, 1], 
            color="blue", 
            s=20, 
            label=order[0],
            marker="|"
        )
        plt.scatter(globals()["{}_mapped_{}_combined".format(m, n)][divisor_normal:divisor_classes,0],
            globals()["{}_mapped_{}_combined".format(m, n)][divisor_normal:divisor_classes,1], 
            color="cyan", 
            s=20, 
            label=order[1],
            marker="_"
        )
        if len(classes) > 1:
            divisor_abnormal = globals()["{}_{}_input_{}".format(m, n, classes[1])].shape[0] + divisor_classes
            plt.scatter(globals()["{}_mapped_{}_combined".format(m, n)][divisor_classes : divisor_abnormal, 0],
                globals()["{}_mapped_{}_combined".format(m, n)][divisor_classes : divisor_abnormal,1], 
                color="red", 
                s=20, 
                label=order[2],
                marker="|"
            )
            plt.scatter(globals()["{}_mapped_{}_combined".format(m, n)][divisor_abnormal:, 0],
                globals()["{}_mapped_{}_combined".format(m, n)][divisor_abnormal:, 1], 
                color="orange", 
                s=20, 
                label=order[3],
                marker="_"
            )
        plt.legend()
        plt.title("PCA for {} combined data in {}".format(n, m))
        filename = '{}_pca_{}_combined.png'.format(m, n)
        plt.savefig(os.path.join(save_path, filename),dpi=1200)
        plt.close()