# Parallel coordinate plots

In [None]:
%config Completer.use_jedi = False
%matplotlib inline

In [None]:
import numpy as np
import pandas as pd
import cv2
import copy
import time
import ast
import json
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
from natsort import natsorted

In [None]:
import sys
import os

sys.path.append(os.path.abspath('../data_utils'))
sys.path.append(os.path.abspath('../models'))

In [None]:
from model_zoo.utils import check_gpu
from model_zoo.losses.dice import DiceLoss, DiceCoefficient
from data_utils.DataContainer import DataContainer
from data_utils.TestSet import TestSet

In [None]:
import logging
logging.basicConfig(level=logging.INFO)

# TestSet

In [None]:
MODELS_SIMPLE1 = ["XNet_T2_relu", "XNet_T2_leaky", "XNet_T2_selu"]
MODELS_SIMPLE2 = ["XNet_T1_relu", "XNet_T1_leaky", "XNet_T1_selu"]
MODELS_SIMPLE = [*MODELS_SIMPLE1, *MODELS_SIMPLE2]
MODELS_CG = ["CG_XNet_T1_relu", "CG_XNet_T2_relu"]
MODELS_DA = ["SegmS2T_GAN1_relu", "SegmS2T_GAN2_relu", "SegmS2T_GAN5_relu",
             "CG_SegmS2T_GAN1_relu", "CG_SegmS2T_GAN2_relu", "CG_SegmS2T_GAN5_relu"]
MODELS_GAN = ["GAN_1+XNet_T1_relu", "GAN_2+XNet_T1_relu", "GAN_5+XNet_T1_relu",
              "GAN_1+CG_XNet_T1_relu", "GAN_2+CG_XNet_T1_relu", "GAN_5+CG_XNet_T1_relu"]
MODELS = [*MODELS_SIMPLE, *MODELS_CG, *MODELS_GAN, *MODELS_DA]

In [None]:
testset = TestSet("/tf/workdir/data/VS_segm/VS_registered/test_processed/", load=True)

In [None]:
df_total = testset.df_total
df_signature = testset.df_signature_3d
intermediate = df_total.to_json()
df_total = pd.read_json(intermediate)
intermediate = df_signature.to_json()
df_signature = pd.read_json(intermediate)

In [None]:
df_total

In [None]:
df_signature.tail()

In [None]:
list(df_signature.columns)

# Features 3D

In [None]:
models = MODELS_DA
features = "shape" 
metric = "dice"
dataset= "all"

## Error metrics

In [None]:
metrics = ["dice", "assd", "acc", "tnr", "tpr"]
datasets = ["all", "only_tumor"]

In [None]:
df_performance = pd.DataFrame()
df_performance["id"] = list(df_total["dice_all"][0]["id"].values())[:-1]
values = [3,2,1]
values_ascending = dict(enumerate(values, 1))
values_descending = dict(enumerate(reversed(values),1))
dict_performance_bins = {}
for m in metrics:
    for d in datasets:
        df_metric = pd.DataFrame(df_total[m+"_"+d][0])[["id"]+models]
        df_metric["mean_value"] = df_metric.mean(axis=1)
        bins = np.linspace(np.min(df_metric["mean_value"]), np.nextafter(np.max(df_metric["mean_value"]), np.inf), 4)
        if "assd" in m:
            df_metric["mean"] = np.vectorize(values_descending.get)(np.digitize(df_metric["mean_value"], bins=bins))
        else:
            df_metric["mean"] = np.vectorize(values_ascending.get)(np.digitize(df_metric["mean_value"], bins=bins))
        df_performance[m+"_"+d] = df_metric.iloc[:-1]["mean"].values
        if m+"_"+d not in dict_performance_bins.keys():
            dict_performance_bins[m+"_"+d] = bins.tolist()

In [None]:
vals = [np.round(x, 3) for x in dict_performance_bins["dice_all"]]
bvals = [f"{vals[i]}-{vals[i+1]}" for i in range(len(vals)-1)]
ticks = list(reversed([f"{a}\n({b})" for a,b in zip(["small", "medium", "large"], bvals)]))
ticks

## Radiomics Features

In [None]:
df_rad = df_signature[["id"]+[c for c in df_signature.columns if c.split("-")[0] in ["shape", "firstorder"]]]
df_rad["id"] = df_rad["id"].apply(lambda x: str(x))
df_rad.head()

In [None]:
featureKeys = list(df_rad.keys())
featureKeys.remove("id")
featureKeys = [k.split("-")[-1] for k in featureKeys]
featureKeys

In [None]:
list_patient_ids = list(df_total["dice_all"][0]["id"].values())[:-1];
list_patient_ids_radiomics = [str(x) for x in df_signature["id"].values]

In [None]:
remove_id = [int(r) for r in natsorted(list(set(list_patient_ids) - set(list_patient_ids_radiomics)))]
remove_idx = []
for idx, row in df_performance.iterrows():
    try:
        if int(row["id"]) in remove_id:
            remove_idx.append(idx)
    except:
        continue
remove_idx

## Volumetric

In [None]:
df_volume = pd.DataFrame()
for patient_id in testset.list_patient_ids:
    df = pd.read_json(f"/tf/workdir/data/VS_segm/VS_registered/test_processed/vs_gk_{patient_id}/evaluation.json")
    df_volume = df_volume.append({"id": patient_id,
                "slice_number": len(df),
                "tumor_slice_number": len(df[df["VS_class_gt"] == 1])}, ignore_index=True)

In [None]:
bins = [0, 70, 79, 80]
res = np.digitize(df_volume["slice_number"], bins=bins)
res

In [None]:
bins = [0,10,15,20]

res = np.digitize(df_volume["tumor_slice_number"], bins=bins)
res

## Merge

In [None]:
type(df_volume.iloc[0]["id"]), type(df_performance.iloc[0]["id"]), type(df_rad.iloc[0]["id"])

In [None]:
df_new = df_volume.merge(df_performance.merge(df_rad, on="id"), on="id")
df_new = df_new.fillna(value=0)
df_new.head()

In [None]:
df_new.columns

## Test for app

In [None]:
df_total_json = testset.df_total.to_json()
df_signature_json = testset.df_signature_3d.to_json()
df_volume_json = testset.df_volume.to_json()

In [None]:
df_total = pd.read_json(df_total_json)
df_signature = pd.read_json(df_signature_json)
df_volume = pd.read_json(df_volume_json)
models = MODELS_DA
features = "shape"
metric = "dice"
dataset = "all"
metrics = ["dice", "assd", "acc", "tnr", "tpr"]
datasets = ["all", "only_tumor"]
df_performance = pd.DataFrame()
df_performance["id"] = list(df_total["dice_all"][0]["id"].values())[:-1]
values = [3, 2, 1]
values_ascending = dict(enumerate(values, 1))
values_descending = dict(enumerate(reversed(values), 1))
dict_performance_bins = {}
for m in metrics:
    for d in datasets:
        df_metric = pd.DataFrame(df_total[m + "_" + d][0])[["id"] + models]
        df_metric["mean_value"] = df_metric.mean(axis=1)
        bins = np.linspace(np.min(df_metric["mean_value"]),
                           np.nextafter(np.max(df_metric["mean_value"]), np.inf), 4)
        if "assd" in m:
            df_metric["mean"] = np.vectorize(values_descending.get)(
                np.digitize(df_metric["mean_value"], bins=bins))
        else:
            df_metric["mean"] = np.vectorize(values_ascending.get)(
                np.digitize(df_metric["mean_value"], bins=bins))
        df_performance[m + "_" + d] = df_metric.iloc[:-1]["mean"].values
        if m + "_" + d not in dict_performance_bins.keys():
            dict_performance_bins[m + "_" + d] = bins.tolist()
df_rad = df_signature[["id"] + [c for c in df_signature.columns if c.split("-")[0] in ["shape", "firstorder"]]]
df_rad["id"] = df_rad["id"].apply(lambda x: str(x))
featureKeys = list(df_rad.keys())
featureKeys.remove("id")
featureKeys = [k.split("-")[-1] for k in featureKeys]
df_volume["id"] = df_volume["id"].apply(lambda x: str(x))
df_new = df_volume.merge(df_performance.merge(df_rad, on="id"), on="id")
df_new = df_new.fillna(value=0)

In [None]:
list(df_new.columns)

# Parallel coordinate plot - overview

In [None]:
features = "shape"

if features == "firstorder":
    feat_list = ["Energy","Skewness","Kurtosis","Variance","Range"] # "Entropy", "Uniformity"
elif features == "shape":
    feat_list = ["Elongation","Flatness","Sphericity","SurfaceArea","MeshVolume"]
elif features == "performance":
    feat_list = ['dice_all', 'dice_only_tumor', 'assd_all', 'assd_only_tumor',
                   'acc_all', 'acc_only_tumor', 'tnr_all', 'tpr_all',
                   'tpr_only_tumor']
    feat_list.remove(str(metric+"_"+dataset))

feat_list

In [None]:
df_plot = pd.DataFrame()
df_plot["id"] = df_new["id"]
performance_col = metric+"_"+dataset
df_plot[metric+"_"+dataset] = df_new[metric+"_"+dataset].values
for fk in feat_list:
    df_plot[fk] = df_new[[c for c in df_new.columns if fk in c][0]].values
df_plot.head()

In [None]:
plot_cols = df_plot.columns.to_list()
plot_cols.remove("id")
plot_cols

In [None]:
lookup_dict = {"Energy": dict(ticktext=['large', 'medium', 'small'], categoryarray=[3,2,1]),
               #"Entropy": dict(ticktext=['large', 'medium', 'small'], categoryarray=[3,2,1]),
               "Skewness": dict(ticktext=['pos', 'neg'], categoryarray=[2,1]),
               "Kurtosis": dict(ticktext=['lepto', 'platy'], categoryarray=[2,1]),
               #"Uniformity": dict(ticktext=['homogen+', 'homogen~','homogen-'], categoryarray=[3,2,1]),
               "Variance": dict(ticktext=['large', 'medium', 'small'], categoryarray=[3,2,1]),
               "Elongation": dict(ticktext=['>mean', '<=mean'], categoryarray=[2,1]),
               "Flatness": dict(ticktext=['non-flat', 'flat'], categoryarray=[2,1]),
               "Sphericity": dict(ticktext=['>mean', '<=mean'], categoryarray=[2,1]),
               "SurfaceArea": dict(ticktext=['large', 'medium', 'small'], categoryarray=[3,2,1]),
               "MeshVolume": dict(ticktext=['large', 'medium', 'small'], categoryarray=[3,2,1]),
               "Range": dict(ticktext=['large', 'medium', 'small'], categoryarray=[3,2,1]),
               "dice_only_tumor": dict(ticktext=["good", "medium", "bad"], categoryarray=[1,2,3],
                                      label="DSC tumor"),
               "dice_all": dict(ticktext=["good", "medium", "bad"], categoryarray=[1,2,3],
                                      label="DSC"),
               "acc_only_tumor": dict(ticktext=["good", "medium", "bad"], categoryarray=[1,2,3],
                                      label="ACC tumor"),
               "acc_all": dict(ticktext=["good", "medium", "bad"], categoryarray=[1,2,3],
                                      label="ACC"),
               "assd_only_tumor": dict(ticktext=["good", "medium", "bad"], categoryarray=[1,2,3],
                                      label="ASSD tumor"),
               "assd_all": dict(ticktext=["good", "medium", "bad"], categoryarray=[1,2,3],
                                      label="ASSD"),
               "tpr_only_tumor": dict(ticktext=["good", "medium", "bad"], categoryarray=[1,2,3],
                                      label="TPR tumor"),
               "tpr_all": dict(ticktext=["good", "medium", "bad"], categoryarray=[1,2,3],
                                      label="TPR"),
               #"tnr_only_tumor": dict(ticktext=['large', 'medium', 'small'], categoryarray=[3,2,1],
               #                       label="TNR tumor"),
               "tnr_all": dict(ticktext=["good", "medium", "bad"], categoryarray=[1,2,3],
                                      label="TNR")
               }

In [None]:
# Create dimensions
perf_dim = go.parcats.Dimension(
    values=df_plot[performance_col], **lookup_dict[performance_col]
)

feature_dim = []
for f in list(df_plot.keys())[2:]:
    if "label" in lookup_dict[f].keys():
        feature_dim.append(go.parcats.Dimension(values=df_plot[f], **lookup_dict[f]))
    else:
        feature_dim.append(go.parcats.Dimension(values=df_plot[f], label=f, **lookup_dict[f]))
print(len(feature_dim))
# Create parcats trace
color = df_plot[performance_col]
colorscale = [[0, 'mediumseagreen'], [1, 'lightsteelblue']];

fig = go.Figure(data = [go.Parcats(dimensions=[perf_dim, *feature_dim[:4]],
        line={'color': color, 'colorscale': colorscale}, bundlecolors=True,
        hoveron='category', hoverinfo='count+probability',
                                   arrangement='freeform')])
fig.update_layout(margin=dict(l=5,
                                      r=5,
                                      b=5,
                                      t=20,
                                      pad=4)
                          )
fig.show()

# Features 2D

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
patient_id = 205
models = MODELS_DA
features = "shape" 
metric = "dice"
dataset= "only_tumor"

## Error metric

In [None]:
metrics = ["dice", "assd"]
datasets = ["all", "only_tumor"]

In [None]:
df_total = pd.read_json(f"/tf/workdir/data/VS_segm/VS_registered/test_processed/vs_gk_{patient_id}/evaluation.json")
df_total.tail()

In [None]:
def calculate_accuracy(conf_mat):
    if type(conf_mat) == dict:
        return (conf_mat["tp"] + conf_mat["tn"]) / (
                    conf_mat["tp"] + conf_mat["tn"] + conf_mat["fp"] + conf_mat["fn"])
    else:
        return (conf_mat[3] + conf_mat[0]) / (conf_mat[3] + conf_mat[0] + conf_mat[1] + conf_mat[2])


def calculate_tpr(conf_mat):
    if type(conf_mat) == dict:
        return (conf_mat["tp"]) / (conf_mat["tp"] + conf_mat["fn"])
    else:
        return (conf_mat[3]) / (conf_mat[3] + conf_mat[2])


def calculate_tnr(conf_mat):
    if type(conf_mat) == dict:
        return (conf_mat["tn"]) / (conf_mat["tn"] + conf_mat["fp"]) if conf_mat["tn"] != 0 else 1.0
    else:
        return (conf_mat[0]) / (conf_mat[0] + conf_mat[1]) if conf_mat[0] != 0 else 1.0

In [None]:
metrics, models, dataset

In [None]:
df_performance = pd.DataFrame()
values = [3,2,1]
values_ascending = dict(enumerate(values, 1))
values_descending = dict(enumerate(reversed(values),1))
dict_performance_bins = {}
#if d == "only_tumor":
#    df_total = df_total[df_total["VS_class_gt"]==1]
df_performance["id"] = [str(idx) for idx in range(len(df_total))]
for d in ["only_tumor", "all"]:
    for met in metrics:
        df_tmp = pd.DataFrame()
        for mod in models:
            df_tmp[met+'_'+mod] = df_total[f"VS_segm_{met}-{mod}"]
            #df_tmp["acc_"+mod] = calculate_accuracy(confusion_matrix(df_total["VS_class_gt"],
            #             df_total[f"VS_class_pred-{mod}"], 
            #             labels=[0, 1]).ravel())
            #df_tmp["tpr_"+mod] = calculate_tpr(confusion_matrix(df_total["VS_class_gt"],
            #             df_total[f"VS_class_pred-{mod}"], 
            #             labels=[0, 1]).ravel())
        df_performance[met+'_'+d] = df_tmp.mean(axis=1).values
        

In [None]:
selected_models = models
for d in ["only_tumor", "all"]:
    for m in ["dice", "assd"]:
        df_tmp = pd.DataFrame()
        for model in selected_models:
            df_tmp[m + '_' + model] = df_total[f"VS_segm_{m}-{model}"]
        df_performance[m + '_' + d] = df_tmp.mean(axis=1).values
        bins = np.linspace(np.min(df_performance[m + '_' + d]),
                           np.nextafter(np.max(df_performance[m + '_' + d]), np.inf), 4)
        if "assd" in m:
            df_performance[m + '_' + d] = np.vectorize(values_descending.get)(
                np.digitize(df_performance[m + '_' + d], bins=bins))
        else:
            df_performance[m + '_' + d] = np.vectorize(values_ascending.get)(
                np.digitize(df_performance[m + '_' + d], bins=bins))
        if m + "_" + d not in dict_performance_bins.keys():
            dict_performance_bins[m + "_" + d] = bins.tolist()

In [None]:
df_performance.head()

In [None]:
np.unique(df_performance["dice_only_tumor"])

## Radiomics features

In [None]:
with open(f"/tf/workdir/data/VS_segm/VS_registered/test_processed/vs_gk_{patient_id}/radiomics_2d.json") as json_file:
    df_rad = json.load(json_file)
df_rad.keys()

In [None]:
df_rad

In [None]:
# regenerate to list of values
df_radiomics = {"id": list(df_rad.keys())}
feature_classes = list(df_rad[list(df_rad.keys())[0]].keys())
feature_classes.remove("shape2D")
for cl in feature_classes:
    cl_dict = {}
    for key in df_rad.keys():
        cl_dict[key] = df_rad[str(key)][cl]
    tmp = {}
    for idx, d in cl_dict.items():
        for f, vals in d.items():
            if f in tmp.keys():
                tmp[f] = tmp[f] + [vals]
            else:
                tmp[f] = [vals]
    df_radiomics[cl] = tmp
df_radiomics

In [None]:
df_radiomics.keys()

In [None]:
# generate signature
df_sign = pd.DataFrame(columns=["id"])
df_sign["id"] = df_radiomics["id"]
for fc in feature_classes:
    for key, vals in df_radiomics[fc].items():
        vals = [float(v) for v in vals]
        if key == "Skewness":
            df_sign[f"{fc}-{key}"] = [1 if a <= 0 else 2 for a in vals]
        elif key == "Kurtosis":
            df_sign[f"{fc}-{key}"] = [1 if a <= 3 else 2 for a in vals]
        elif key == "Elongation":
            df_sign[f"{fc}-{key}"] = [1 if a <= np.mean(vals) else 2 for a in vals]
        elif key == "Flatness":
            df_sign[f"{fc}-{key}"] = [1 if a <= 0.5 else 2 for a in vals]
        elif key == "Sphericity":
            df_sign[f"{fc}-{key}"] = [1 if a <= np.mean(vals) else 2 for a in vals]
        else:
            df_sign[f"{fc}-{key}"] = np.digitize(vals, bins=np.linspace(np.min(vals),
                                                                   np.nextafter(np.max(vals), np.inf),
                                                                   4))

In [None]:
df_sign

## Volume features

In [None]:
df_volume = pd.DataFrame()
df = pd.read_json(f"/tf/workdir/data/VS_segm/VS_registered/test_processed/vs_gk_{patient_id}/evaluation.json")
for idx, row in df.iterrows():
    df_volume = df_volume.append({"id": str(row["slice"]),
                                 "tumor_presence": row["VS_class_gt"],
                                 "tumor_size_px": np.count_nonzero(cv2.drawContours(np.zeros((256, 256)),
               [np.array(s).astype(np.int64) for s in
                np.array(row["VS_segm_gt"], dtype="object")], -1, (1),
               -1))},
                                 ignore_index=True)

bins = np.linspace(np.min(df_volume[df_volume["tumor_size_px"] >= 1]["tumor_size_px"]), np.nextafter(np.max(df_volume[df_volume["tumor_presence"] >= 1]["tumor_size_px"]), np.inf), 4)
res = [0]*len(df_volume)
res[np.where(df_volume["tumor_presence"] == 1)[0][0]:np.where(df_volume["tumor_presence"] == 1)[0][-1]+1] = np.digitize(df_volume[df_volume["tumor_presence"] == 1]["tumor_size_px"], bins=bins)
df_volume["tumor_size"] = np.array(res)
df_volume.head()

In [None]:
df_volume.tail()

In [None]:
np.unique(df_volume["tumor_size"])

## merge

In [None]:
df_new = df_volume.merge(df_performance.merge(df_sign, on="id", how="left"), on="id", how="left")
df_new.head()

In [None]:
df_new.tail()

In [None]:
df_new[df_new["tumor_size_px"] != 0]

In [None]:
list(df_new.columns)

## Parallel coordinate plot - details

In [None]:
features = "shape"

if features == "firstorder":
    feat_list = ["Energy","Skewness","Kurtosis","Variance","Range"] # "Entropy", "Uniformity"
elif features == "shape":
    feat_list = ["Elongation","MaximumDiameter","MeshSurface","MajorAxisLength","Perimeter",]
elif features == "performance":
    feat_list = ['dice_all', 'dice_only_tumor', 'assd_all', 'assd_only_tumor',
                   'acc_all', 'acc_only_tumor', 'tnr_all', 'tpr_all',
                   'tpr_only_tumor']
    feat_list.remove(str(metric+"_"+dataset))

feat_list

In [None]:
df_plot = pd.DataFrame()
df_plot["id"] = df_new["id"]
df_plot["tumor_presence"] = df_new["tumor_presence"]
performance_col = metric+"_"+dataset
df_plot[metric+"_"+dataset] = df_new[metric+"_"+dataset].values
for fk in feat_list:
    df_plot[fk] = df_new[[c for c in df_new.columns if fk in c][0]].values
df_plot.head()

In [None]:
plot_cols = df_plot.columns.to_list()
plot_cols.remove("id")
plot_cols

In [None]:
px.parallel_categories(df_plot)

In [None]:
df_plot = df_plot.fillna(value=10)

In [None]:
# Create dimensions
perf_dim = []

perf_dim.append(go.parcats.Dimension(
    values=df_plot["tumor_presence"], label="Presence"
))
perf_dim.append(go.parcats.Dimension(
    values=df_plot[performance_col], label="DSC"#**lookup_dict[performance_col]
))

feature_dim = []
for f in list(df_plot.keys())[2:]:
    #if "label" in lookup_dict[f].keys():
   #     feature_dim.append(go.parcats.Dimension(values=df_plot[f], **lookup_dict[f]))
    #else:
    feature_dim.append(go.parcats.Dimension(values=df_plot[f], label=f))#, **lookup_dict[f]))
print(len(feature_dim))
# Create parcats trace
color = df_plot[performance_col]
colorscale = [[0, 'mediumseagreen'], [1, 'lightsteelblue']];

fig = go.Figure(data = [go.Parcats(dimensions=[*perf_dim, *feature_dim],
        line={'color': color, 'colorscale': colorscale}, bundlecolors=True,
        hoveron='category', hoverinfo='count+probability',
                                   arrangement='freeform')])
fig.update_layout(margin=dict(l=5,
                                      r=5,
                                      b=5,
                                      t=20,
                                      pad=4)
                          )
fig.show()