In [1]:
## import the necessary packages
import numpy as np
import os
from scipy.optimize import linear_sum_assignment
import warnings
import csv
import pandas as pd
from tqdm import tqdm
import tifffile
from math import*
import json

# Suppress FutureWarning messages
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
method = 'GNN'
#Choose directory containing the predicted nucleus and golgi centroids 

gt_dir = r"../data/vectors" #directory with the ground truth vectors
img_dir = r"../data/images"#directory with images

#nuclei_thresholds = [8.8]
#golgi_thresholds = [4.4]
nuclei_thresholds = [10]
golgi_thresholds = [6]
levels_ = [0]
lvl_ = 0

numbers_ = [0, 1, 2, 3, 4, 5, 6, 7] #crops

## name of the images
imgs = ['Crop1', 'Crop2', 'Crop3', 'Crop4', 'Crop5_BC', 'Crop6_BC', 'Crop7_BC','Crop8_BC']

image_dimensions = [[0.333,0.333,0.270], [0.333,0.333,0.270], [0.333,0.333,0.270], [0.333,0.333,0.270],
              [0.333,0.333,0.270], [0.333,0.333,0.270], [0.333,0.333,0.400], [0.333,0.333,0.400]] #um

info = ['test']*8

In [3]:
''' Define the metrics ''' 
def square_rooted(x):
    return round(np.sqrt(sum([a*a for a in x])),3)
 
def cosine_similarity(x,y):
    numerator = sum(a*b for a,b in zip(x,y))
    denominator = square_rooted(x)*square_rooted(y)
    return round(numerator/float(denominator),3)

#Euclidean distance computed in um
def distance_um(p, q, dimx, dimy, dimz):
    dist_um = (((p[0]-q[0])*dimx)**2)+(((p[1]-q[1])*dimy)**2)+(((p[2]-q[2])*dimz)**2)
    return np.sqrt(dist_um) 

#ignore the borders of the image
def inside_img(coord,img_dim_x,img_dim_y,img_dim_z,x_y_lim,z_lim):
    return coord[0]<img_dim_x-x_y_lim and coord[0]>x_y_lim and coord[1]<img_dim_y-x_y_lim and coord[1]>x_y_lim and coord[2]<img_dim_z-z_lim and coord[2]>0

#modify Constraints Col
def transform_constraints_column(col):
    """
    This function takes a string value from the 'Constraints' column and returns:
        - False if the value doesn't contain 'constraints_'
        - The substring after 'constraints_' (inclusively) otherwise
    """
    if 'constraints' not in col:
        return False
    else:
        return "constraints"+col.split('constraints')[1]

In [4]:
def eval_results_batch(pred_subfolder, image_nb, allmetrics, metrics_stats):
    pred_vectors = os.path.join(pred_subfolder, imgs[image_nb] + '.csv')
    gt_vectors = os.path.join(gt_dir, imgs[image_nb] + '.csv')

    ## read the image and get its dimensions
    image = tifffile.imread(os.path.join(img_dir, imgs[image_nb] + '.tif'))
    (img_dim_x, img_dim_y, img_dim_z, channels) = np.shape(image)

    #voxel's physical dimensions
    x_spacing = image_dimensions[image_nb][0]
    y_spacing = image_dimensions[image_nb][1]
    z_spacing = image_dimensions[image_nb][2]
    
    #limits to ignore vectors at the borders of the image
    x_y_lim = int(7/x_spacing)  #(voxels)  16
    z_lim = int(5/z_spacing)    #(voxels)  5

    #print('Reading the csv file with the ground truth vectors')
    ## nuclei and golgi centroids
    nuclei_centroids_gt = [] 
    golgi_centroids_gt = []
    
    #open the csv file and save the gt nucleus and Golgi centroids
    file = open(gt_vectors, "rU")
    reader = csv.reader(file, delimiter=';')
    for row in reader:
        if row[0] != 'YN,XN,ZN,YG,XG,ZG':
            aux = row[0].split(",")
            YN = int(float(aux[0]))-1
            XN = int(float(aux[1]))-1
            ZN = int(float(aux[4]))-1
            YG = int(float(aux[2]))-1
            XG = int(float(aux[3]))-1
            ZG = int(float(aux[5]))-1
            
            if inside_img(np.asarray([XN,YN,ZN]), img_dim_x, img_dim_y, img_dim_z, x_y_lim, z_lim) and inside_img(np.asarray([XG,YG,ZG]), img_dim_x,img_dim_y,img_dim_z,x_y_lim,z_lim):
                nuclei_centroids_gt.append((XN,YN,ZN))
                golgi_centroids_gt.append((XG,YG,ZG))     
    
    golgi_centroids_gt = np.asarray(golgi_centroids_gt)
    nuclei_centroids_gt = np.asarray(nuclei_centroids_gt)
    
    #Remove predicted nuclei and golgi at image borders
    n_centroids = []
    g_centroids = []
    #open the csv file and save the gt nucleus and Golgi centroids
    file = open(pred_vectors, "rU")
    reader = csv.reader(file, delimiter=';')
    for row in reader:
        if row[0] != 'YN,XN,ZN,YG,XG,ZG':
            aux = row[0].split(",")
            YN = int(float(aux[0]))-1
            XN = int(float(aux[1]))-1
            ZN = int(float(aux[4]))-1
            YG = int(float(aux[2]))-1
            XG = int(float(aux[3]))-1
            ZG = int(float(aux[5]))-1
            
            if inside_img(np.asarray([XN,YN,ZN]), img_dim_x, img_dim_y, img_dim_z, x_y_lim, z_lim) and inside_img(np.asarray([XG,YG,ZG]), img_dim_x,img_dim_y,img_dim_z,x_y_lim,z_lim):
                if distance_um([XN,YN,ZN], [XG,YG,ZG], x_spacing, y_spacing, z_spacing)<18:
                    n_centroids.append((XN,YN,ZN))
                    g_centroids.append((XG,YG,ZG))     
            
    nuclei_centroids = np.asarray(n_centroids)
    golgi_centroids = np.asarray(g_centroids)
    
    #print('Evaluation')
    ''' Assignment nuclei centroids '''
    ## compute the Euclidean distance between the predicted and ground truth centroids
    matrix = np.zeros((len(nuclei_centroids),len(nuclei_centroids_gt)))
    
    ## build the cost matrix
    for i in range(0,len(nuclei_centroids)):
        for j in range(0,len(nuclei_centroids_gt)):
            matrix[i,j] = distance_um(nuclei_centroids[i], nuclei_centroids_gt[j], x_spacing, y_spacing, z_spacing) + distance_um(golgi_centroids[i], golgi_centroids_gt[j], x_spacing, y_spacing, z_spacing)
    
    matrix[matrix>10] = 2000
    
    ## method to solve the linear assignment problem
    row_ind, col_ind = linear_sum_assignment(matrix)
    
    ''' Compute the metrics for the vectors '''
    for n_th, g_th, thlvl in zip(nuclei_thresholds, golgi_thresholds, levels_):
        metrics = pd.DataFrame(columns = ["Image", "Method", "Type", "NucleusTh", "GolgiTh", "Threshold_level",
                                      "cosine similarity", "vec_error", "nuclei", "golgi"])

        if thlvl==lvl_:
            index_tp = []  ## positions in vectors nuclei_centroids, golgi_centroids, that are
                            ## true positives
                            
            index_tp_gt = [] ## positions in vectors nuclei_centroids_gt and golgi_centroids_gt,
                              ## that correspond to true positives

        for i in range(0, len(row_ind)):
            n_coord = nuclei_centroids[row_ind[i]]
            g_coord = golgi_centroids[row_ind[i]]
        
            vec = g_coord - n_coord
        
            n_coord_gt = nuclei_centroids_gt[col_ind[i]]
            g_coord_gt = golgi_centroids_gt[col_ind[i]]
        
            vec_gt = g_coord_gt - n_coord_gt
            
            dist_n_centroids = distance_um(n_coord, n_coord_gt, x_spacing, y_spacing, z_spacing)
            dist_g_centroids = distance_um(g_coord, g_coord_gt, x_spacing, y_spacing, z_spacing)
            vec_error = distance_um(vec, vec_gt, x_spacing, y_spacing, z_spacing)
            
            cos_sim = cosine_similarity(vec, vec_gt)
            
            if dist_n_centroids<=n_th and dist_g_centroids<=g_th:
                res = {"Image": imgs[image_nb], "Method": method, "Type": info[image_nb], "NucleusTh": n_th, "GolgiTh": g_th,
                       "Threshold_level": thlvl,
                       "cosine similarity": abs(cos_sim), "vec_error": vec_error, 
                       "nuclei": dist_n_centroids, "golgi": dist_g_centroids}
                
                res_aux = {"Image": imgs[image_nb], "Method": method, "Type": info[image_nb], "NucleusTh": n_th, "GolgiTh": g_th,
                       "Threshold_level": thlvl, "index_tp_gt": col_ind[i],
                       "cosine similarity": abs(cos_sim), "vec_error": vec_error, 
                       "nuclei": dist_n_centroids, "golgi": dist_g_centroids}
                
                row_aux = len(allmetrics)
                allmetrics.loc[row_aux] = res_aux
                
                row = len(metrics)
                metrics.loc[row] = res
                
                row_stats = len(metrics_stats)
                metrics_stats.loc[row_stats] = res
                
                if thlvl==lvl_:
                    index_tp.append(row_ind[i])
                    index_tp_gt.append(col_ind[i])
                
        
        metrics_mean = metrics.select_dtypes(include=[np.number]).mean()
        metrics_std = metrics.select_dtypes(include=[np.number]).std()
        
        TP = len(metrics)
        
        FP = np.shape(golgi_centroids)[0] - len(metrics)
        
        FN = np.shape(golgi_centroids_gt)[0] - len(metrics)
        
        TPR = TP/(TP+FN)
        
        FPR = FP/(FP+TP)
        
        FNR = FN/(FN+TP)
        
        PRECISION = TP/(TP+FP)
        RECALL = TPR
        F1_SCORE = 2*PRECISION*RECALL/(PRECISION+RECALL)
        
        res = {"Image": imgs[image_nb], "Method": method, "Type": info[image_nb], 
               "NucleusTh": n_th, "GolgiTh": g_th, "Threshold_level": thlvl,
               "CosineSimilarityM": metrics_mean['cosine similarity'],
               "CosineSimilaritySTD": metrics_std['cosine similarity'], 
               "VecErrorM": metrics_mean['vec_error'],
               "VecErrorSTD": metrics_std['vec_error'],
               "DistanceNuM": metrics_mean['nuclei'], 
               "DistanceNuSTD": metrics_std['nuclei'], 
               "DistanceGoM": metrics_mean['golgi'], 
               "DistanceGoSTD": metrics_std['golgi'], 
               "TP": TP, 
               "FP": FP, 
               "FN": FN, 
               "TPR": TPR, 
               "FPR": FPR,
               "FNR": FNR,
               "PRECISION":PRECISION,
               "F1_SCORE":F1_SCORE}
        return res

# Not Grouped

In [7]:
#Uncomment here to use results from paper
#pred_dir = r"../results/results_submitted_paper/trial1/Results_2"#GNN w/ Angular Features  normalized = True K=7
#pred_dir = r"../results/results_submitted_paper/trial1/Results_3"#GNN w/o Angular Features normalized = True k=7
#pred_dir = r"../results/results_submitted_paper/trial1/Results_6"#GNN w/ Angular Features  normalized = False K=7
#pred_dir = r"../results/results_submitted_paper/trial1/Results_7"#GNN w/o Angular Features normalized = False k=7
#pred_dir = r"../results/results_submitted_paper/trial1/Results_10"#GNN w/ Angular Features  normalized = True K=10
#pred_dir = r"../results/results_submitted_paper/trial1/Results_11"#GNN w/o Angular Features normalized = True k=10
#pred_dir = r"../results/results_submitted_paper/trial1/Results_14"#GNN w/ Angular Features  normalized = False K=10
#pred_dir = r"../results/results_submitted_paper/trial1/Results_15"#GNN w/o Angular Features normalized = False k=10

#pred_dir = r"../results/results_submitted_paper/trial1/Results_18"#MLP w/ Angular Features normalized=True K=7
#pred_dir = r"../results/results_submitted_paper/trial1/Results_19"#MLP w/o Angular Features normalized=True K=7
#pred_dir = r"../results/results_submitted_paper/trial1/Results_22"#MLP w/ Angular Features normalized=False K=7
#pred_dir = r"../results/results_submitted_paper/trial1/Results_23"#MLP w/o Angular Features normalized=False K=7
#pred_dir = r"../results/results_submitted_paper/trial1/Results_26"#MLP w/ Angular Features normalized=True K=10
#pred_dir = r"../results/results_submitted_paper/trial1/Results_27"#MLP w/o Angular Features normalized=True K=10
#pred_dir = r"../results/results_submitted_paper/trial1/Results_30"#MLP w/o Angular Features normalized=False K=10
#pred_dir = r"../results/results_submitted_paper/trial1/Results_31"#MLP w/o Angular Features normalized=False K=10


#Uncomment here to use LATEST results with data WITH normalization
#pred_dir = r"../results/results_real_automatic/trial1/Results_0_constraints/"#GNN w/ Angular Features constraints Greedy w/o Threshold
#pred_dir = r"../results/results_real_automatic/trial1/Results_1_constraints/"#GNN w/o Angular Features constraints Greedy w/o Threshold
#pred_dir = r"../results/results_real_automatic/trial1/Results_2_constraints/"#MLP w/ Angular Features constraints Greedy w/o Threshold
#pred_dir = r"../results/results_real_automatic/trial1/Results_3_constraints/"#MLP w/o Angular Features constraints Greedy w/o Threshold

#pred_dir = r"../results/results_real_automatic/trial1/Results_0_constraints_threshold/"#GNN w/ Angular Features constraints Greedy w/ Threshold
#pred_dir = r"../results/results_real_automatic/trial1/Results_1_constraints_threshold/"#GNN w/o Angular Features constraints Greedy w/ Threshold
#pred_dir = r"../results/results_real_automatic/trial1/Results_2_constraints_threshold/"#MLP w/ Angular Features constraints Greedy w/ Threshold
#pred_dir = r"../results/results_real_automatic/trial1/Results_3_constraints_threshold/"#MLP w/o Angular Features constraints Greedy w/ Threshold

#Uncomment here to use LATEST results with data WITHOUT normalization
#pred_dir = r"../results/results_real_automatic_not_normalized/trial1/Results_0_constraints/" #GNN w/ Angular Features
#pred_dir = r"../results/results_real_automatic_not_normalized/trial1/Results_1_constraints/" #GNN w/o Angular Features
#pred_dir = r"../results/results_real_automatic_not_normalized/trial1/Results_2_constraints/" #MLP w/ Angular Features
#pred_dir = r"../results/results_real_automatic_not_normalized/trial1/Results_3_constraints/" #MLP w/o Angular Features

#Uncomment here to use results for classical algorithms
#pred_dir = r"../results/Hopcroft_Karp_Real_Automatic_k10/" #Hopcroft-Karp
pred_dir = r"../results/Minimum Weight_Real_Automatic_k10/" #Jonker-Volgenant

performance_metrics = pd.DataFrame(columns = ["Image", "Method", "Type", "NucleusTh", "GolgiTh", "Threshold_level",
                                              "CosineSimilarityM",
                                              "CosineSimilaritySTD", "VecErrorM","VecErrorSTD",
                                              "DistanceNuM", "DistanceNuSTD", "DistanceGoM",
                                              "DistanceGoSTD", "TP", "FP", "FN", "TPR", "FPR", "FNR", "PRECISION", "F1_SCORE"])

metrics_stats = pd.DataFrame(columns = ["Image", "Method", "Type", "NucleusTh", "GolgiTh", "Threshold_level",
                                        "cosine similarity", "vec_error", "nuclei", "golgi"])

allmetrics = pd.DataFrame(columns = ["Image", "Method", "Type", "NucleusTh", "GolgiTh", "Threshold_level",
                                      "index_tp_gt", "cosine similarity", "vec_error", "nuclei", "golgi"])

for image_nb in tqdm(numbers_):
        res = eval_results_batch(pred_dir, image_nb, allmetrics, metrics_stats)
        
        
        row = len(performance_metrics)
        performance_metrics.loc[row] = res

final_metrics = performance_metrics.groupby(["Threshold_level"], as_index=False).agg({'CosineSimilarityM': np.mean,
                                                 "CosineSimilaritySTD": np.mean,
                                                 "VecErrorM": np.mean,
                                                 "VecErrorSTD": np.mean,
                                                 "DistanceNuM": np.mean, 
                                                 "DistanceNuSTD": np.mean, 
                                                 "DistanceGoM": np.mean, 
                                                 "DistanceGoSTD": np.mean, 
                                                 "TP": np.sum, 
                                                 "FP": np.sum, 
                                                 "FN": np.sum, 
                                                 "TPR": np.mean, 
                                                 "FPR": np.mean,
                                                 "FNR": np.mean,
                                                 "PRECISION": np.mean,
                                                 "F1_SCORE":np.mean})
final_metrics

100%|██████████| 8/8 [00:01<00:00,  5.35it/s]


Unnamed: 0,Threshold_level,CosineSimilarityM,CosineSimilaritySTD,VecErrorM,VecErrorSTD,DistanceNuM,DistanceNuSTD,DistanceGoM,DistanceGoSTD,TP,FP,FN,TPR,FPR,FNR,PRECISION,F1_SCORE
0,0,0.88863,0.184118,2.664233,1.764795,2.200037,1.519015,1.533332,1.051984,237,103,171,0.586048,0.283786,0.413952,0.716214,0.638681


In [7]:
print(final_metrics.to_latex())

\begin{tabular}{lrrrrrrrrrrrrrrrrr}
\toprule
 & Threshold_level & CosineSimilarityM & CosineSimilaritySTD & VecErrorM & VecErrorSTD & DistanceNuM & DistanceNuSTD & DistanceGoM & DistanceGoSTD & TP & FP & FN & TPR & FPR & FNR & PRECISION & F1_SCORE \\
\midrule
0 & 0 & 0.850968 & 0.225321 & 2.729815 & 1.894963 & 2.271397 & 1.639082 & 1.559974 & 0.979717 & 243 & 65 & 165 & 0.581506 & 0.215662 & 0.418494 & 0.784338 & 0.662857 \\
\bottomrule
\end{tabular}



# Grouped

In [9]:
trials_dfs = []


list_pred_dirs = [r"../results/results_real_automatic/trial1/",
                    r"../results/results_real_automatic/trial2/",
                  r"../results/results_real_automatic/trial3/",
                  ]

#Uncomment here to choose different set of images
#list_pred_dirs = [r"../results/results_real_automatic_not_normalized/trial1/",
#                  r"../results/results_real_automatic_not_normalized/trial2/",
#                 r"../results/results_real_automatic_not_normalized/trial3/",]


for pred_dir in tqdm(list_pred_dirs):
    records = []
    for pred_subfolder in os.listdir(pred_dir):
        
        pred_subfolder = os.path.join(pred_dir, pred_subfolder)

        performance_metrics = pd.DataFrame(columns = ["Image", "Method", "Type", "NucleusTh", "GolgiTh", "Threshold_level",
                                              "CosineSimilarityM",
                                              "CosineSimilaritySTD", "VecErrorM","VecErrorSTD",
                                              "DistanceNuM", "DistanceNuSTD", "DistanceGoM",
                                              "DistanceGoSTD", "TP", "FP", "FN", "TPR", "FPR", "FNR", "PRECISION", "F1_SCORE"])

        metrics_stats = pd.DataFrame(columns = ["Image", "Method", "Type", "NucleusTh", "GolgiTh", "Threshold_level",
                                                "cosine similarity", "vec_error", "nuclei", "golgi"])

        allmetrics = pd.DataFrame(columns = ["Image", "Method", "Type", "NucleusTh", "GolgiTh", "Threshold_level",
                                            "index_tp_gt", "cosine similarity", "vec_error", "nuclei", "golgi"])
        for image_nb in numbers_:

            res = eval_results_batch(pred_subfolder, image_nb, allmetrics, metrics_stats)
            row = len(performance_metrics)
            performance_metrics.loc[row] = res
            

        #Aggregate Metrics
        final_metrics = performance_metrics.groupby(["Threshold_level"], as_index=False).agg({'CosineSimilarityM': np.mean,
                                                 "CosineSimilaritySTD": np.mean,
                                                 "VecErrorM": np.mean,
                                                 "VecErrorSTD": np.mean,
                                                 "DistanceNuM": np.mean, 
                                                 "DistanceNuSTD": np.mean, 
                                                 "DistanceGoM": np.mean, 
                                                 "DistanceGoSTD": np.mean, 
                                                 "TP": np.sum, 
                                                 "FP": np.sum, 
                                                 "FN": np.sum, 
                                                 "TPR": np.mean, 
                                                 "FPR": np.mean,
                                                 "FNR": np.mean,
                                                 "PRECISION": np.mean,
                                                 "F1_SCORE":np.mean})

        #print(final_metrics)
        record = {}
        params_path = os.path.join(pred_subfolder, "params.json")
        with open(params_path, 'r') as file:
            params_data = json.load(file)
        record.update(params_data["job_parameters"])
        record.update(final_metrics.to_dict('records')[0])
        record["Constraints"] = os.path.basename(pred_subfolder)
        records.append(record)

    #Get the dataframe of the records
    records_df = pd.DataFrame(records)
    records_df = records_df[["edge_feats", "model_type",  "Constraints",
                            'Threshold_level',
                            'CosineSimilarityM', 'CosineSimilaritySTD', 'VecErrorM', 'VecErrorSTD',
                            'DistanceNuM', 'DistanceNuSTD', 'DistanceGoM', 'DistanceGoSTD', 'TP',
                            'FP', 'FN', 'TPR', 'FPR', 'FNR', 'PRECISION', 'F1_SCORE']]

    #modify angular features col
    records_df = records_df.rename(columns={'edge_feats': 'Angular Features'})
    records_df["Angular Features"] = records_df["Angular Features"] .apply(lambda x : any("angle" in item for item in x))

    #modify model_type col
    model_type_transform = {"GNN_Classifier":"GNN"}
    records_df["model_type"] = records_df["model_type"].apply(lambda x: model_type_transform.get(x, x)) 

    # Apply the transformation using vectorized function
    records_df['Constraints'] = records_df['Constraints'].apply(transform_constraints_column)
    constraints_transform = {"constraints":"Greedy", "constraints_threshold":"Greedy w/ Threshold", "constraints_opt": "Optimization"}
    records_df['Constraints'] = records_df['Constraints'].apply(lambda x : constraints_transform.get(x,x))
    trials_dfs.append(records_df)

100%|██████████| 3/3 [01:33<00:00, 31.22s/it]


In [14]:
for _trial_df in trials_dfs:
    display(_trial_df.sort_values(by=['Angular Features', 'Constraints','model_type']))

Unnamed: 0,Angular Features,model_type,Constraints,Threshold_level,CosineSimilarityM,CosineSimilaritySTD,VecErrorM,VecErrorSTD,DistanceNuM,DistanceNuSTD,DistanceGoM,DistanceGoSTD,TP,FP,FN,TPR,FPR,FNR,PRECISION,F1_SCORE
4,False,GNN,False,0,0.911178,0.153308,2.520939,1.634886,2.137216,1.438427,1.404005,0.812231,314,264,94,0.769603,0.43131,0.230397,0.56869,0.641538
12,False,MLP,False,0,0.908506,0.152357,2.468459,1.598312,2.152821,1.389008,1.382037,0.783699,300,282,108,0.711877,0.455976,0.288123,0.544024,0.596207
5,False,GNN,Greedy,0,0.889721,0.174598,2.631559,1.658817,2.158351,1.468158,1.573499,1.065216,262,75,146,0.641144,0.222469,0.358856,0.777531,0.698692
13,False,MLP,Greedy,0,0.866989,0.214504,2.764887,1.885462,2.269348,1.649315,1.573304,1.033933,261,78,147,0.642416,0.228225,0.357584,0.771775,0.697303
7,False,GNN,Greedy w/ Threshold,0,0.887588,0.17732,2.639208,1.675893,2.155843,1.464619,1.58267,1.066903,262,64,146,0.641144,0.201039,0.358856,0.798961,0.70743
15,False,MLP,Greedy w/ Threshold,0,0.850968,0.225321,2.729815,1.894963,2.271397,1.639082,1.559974,0.979717,243,65,165,0.581506,0.215662,0.418494,0.784338,0.662857
6,False,GNN,Optimization,0,0.902923,0.163363,2.773189,1.707598,2.310813,1.637089,1.538087,0.947327,201,112,207,0.496133,0.343005,0.503867,0.656995,0.554972
14,False,MLP,Optimization,0,0.887322,0.189488,2.9973,1.987836,2.474296,1.811356,1.57644,1.016789,198,114,210,0.486593,0.34603,0.513407,0.65397,0.546719
0,True,GNN,False,0,0.909547,0.154956,2.578322,1.71039,2.164726,1.485535,1.445781,0.885372,318,235,90,0.783106,0.403895,0.216894,0.596105,0.66652
8,True,MLP,False,0,0.904539,0.156951,2.510372,1.641067,2.163124,1.411706,1.399233,0.79121,298,262,110,0.708257,0.438186,0.291743,0.561814,0.606905


Unnamed: 0,Angular Features,model_type,Constraints,Threshold_level,CosineSimilarityM,CosineSimilaritySTD,VecErrorM,VecErrorSTD,DistanceNuM,DistanceNuSTD,DistanceGoM,DistanceGoSTD,TP,FP,FN,TPR,FPR,FNR,PRECISION,F1_SCORE
4,False,GNN,False,0,0.902928,0.165415,2.585858,1.69075,2.159122,1.455606,1.469386,0.878806,308,203,100,0.753514,0.390249,0.246486,0.609751,0.668763
12,False,MLP,False,0,0.905662,0.156516,2.486718,1.630487,2.157139,1.407135,1.366546,0.756696,301,287,107,0.715155,0.451609,0.284845,0.548391,0.600357
5,False,GNN,Greedy,0,0.896889,0.175868,2.584723,1.663637,2.166502,1.514505,1.512062,0.957568,264,81,144,0.659432,0.223799,0.340568,0.776201,0.709252
13,False,MLP,Greedy,0,0.87159,0.209582,2.71538,1.814721,2.238143,1.628838,1.55616,1.032251,259,81,149,0.636781,0.235581,0.363219,0.764419,0.691049
7,False,GNN,Greedy w/ Threshold,0,0.893347,0.179609,2.626802,1.714797,2.195195,1.574805,1.518321,0.951345,259,68,149,0.648239,0.200668,0.351761,0.799332,0.712802
15,False,MLP,Greedy w/ Threshold,0,0.85855,0.218947,2.738879,1.903825,2.296055,1.698665,1.534795,0.966582,245,64,163,0.587553,0.207076,0.412447,0.792924,0.669243
6,False,GNN,Optimization,0,0.895552,0.173916,2.945182,1.950204,2.348726,1.605815,1.556077,1.012694,201,117,207,0.494501,0.355609,0.505499,0.644391,0.550276
14,False,MLP,Optimization,0,0.885343,0.188445,2.913725,1.89395,2.382405,1.737445,1.634064,1.113146,196,111,212,0.484917,0.345955,0.515083,0.654045,0.546774
0,True,GNN,False,0,0.903767,0.160497,2.625413,1.702907,2.156227,1.45756,1.531606,0.97568,295,222,113,0.727204,0.406073,0.272796,0.593927,0.644779
8,True,MLP,False,0,0.902495,0.15848,2.519054,1.65127,2.185323,1.447303,1.37081,0.772099,296,266,112,0.701229,0.442768,0.298771,0.557232,0.602196


Unnamed: 0,Angular Features,model_type,Constraints,Threshold_level,CosineSimilarityM,CosineSimilaritySTD,VecErrorM,VecErrorSTD,DistanceNuM,DistanceNuSTD,DistanceGoM,DistanceGoSTD,TP,FP,FN,TPR,FPR,FNR,PRECISION,F1_SCORE
4,False,GNN,False,0,0.906034,0.154514,2.636295,1.728406,2.195023,1.519362,1.442262,0.869065,312,222,96,0.768458,0.397502,0.231542,0.602498,0.667392
12,False,MLP,False,0,0.908105,0.15259,2.472856,1.601299,2.164441,1.41869,1.376351,0.781359,301,300,107,0.715962,0.471044,0.284038,0.528956,0.588193
5,False,GNN,Greedy,0,0.914654,0.145669,2.552436,1.673352,2.1946,1.604967,1.520563,0.931936,268,78,140,0.659615,0.216769,0.340385,0.783231,0.711614
13,False,MLP,Greedy,0,0.866603,0.215522,2.745261,1.878755,2.252505,1.635737,1.578285,1.052473,262,77,146,0.644018,0.225911,0.355982,0.774089,0.699197
7,False,GNN,Greedy w/ Threshold,0,0.91294,0.145195,2.591067,1.717085,2.254555,1.680924,1.505771,0.922669,267,64,141,0.659047,0.191683,0.340953,0.808317,0.722889
15,False,MLP,Greedy w/ Threshold,0,0.852114,0.225913,2.728903,1.901446,2.266793,1.652327,1.571788,1.02532,247,64,161,0.590311,0.211897,0.409689,0.788103,0.670086
6,False,GNN,Optimization,0,0.912108,0.153167,2.831312,1.904924,2.369003,1.676513,1.487656,0.900149,198,113,210,0.486643,0.35152,0.513357,0.64848,0.544477
14,False,MLP,Optimization,0,0.882413,0.184241,2.857526,1.860889,2.347923,1.654785,1.580158,1.072042,194,117,214,0.477951,0.362664,0.522049,0.637336,0.53627
0,True,GNN,False,0,0.899807,0.164998,2.698548,1.815077,2.241704,1.599468,1.506369,0.955027,303,166,105,0.737433,0.337997,0.262567,0.662003,0.692694
8,True,MLP,False,0,0.902832,0.157579,2.512813,1.665266,2.151948,1.411666,1.375662,0.786234,298,287,110,0.703496,0.461276,0.296504,0.538724,0.584794


In [15]:
combined_df = pd.concat(trials_dfs)
grouped = combined_df.groupby(['Angular Features', 'model_type', 'Constraints']).agg(
    {'Threshold_level': ['mean', 'std', 'min', 'max'],
    'CosineSimilarityM': ['mean', 'std', 'min', 'max'],
    'CosineSimilaritySTD': ['mean', 'std', 'min', 'max'],
    'VecErrorM': ['mean', 'std', 'min', 'max'],
    'VecErrorSTD': ['mean', 'std', 'min', 'max'],

    'DistanceNuM': ['mean', 'std', 'min', 'max'],
    'DistanceNuSTD': ['mean', 'std', 'min', 'max'],
    'DistanceGoM': ['mean', 'std', 'min', 'max'],
    'DistanceGoSTD': ['mean', 'std', 'min', 'max'],

    'TP': ['mean', 'std', 'min', 'max'],
    'FP': ['mean', 'std', 'min', 'max'],
    'FN': ['mean', 'std', 'min', 'max'],
    'TPR': ['mean', 'std', 'min', 'max'],
    'FPR': ['mean', 'std', 'min', 'max'],
    'FNR': ['mean', 'std', 'min', 'max'],
    'PRECISION': ['mean', 'std', 'min', 'max'],
    'F1_SCORE': ['mean', 'std', 'min', 'max']
    })

grouped = grouped.reset_index(level=['Angular Features', 'model_type', 'Constraints'])

def format_column(_mean, _min, _max, _std):
    reference = abs(_min) if abs(_min) > abs(_max) else abs(_max)
    difference = abs(_mean-reference)
    return str(round(_mean, 3)) + "±" + str(round(difference, 3))

numeric_cols = ['Threshold_level', 'CosineSimilarityM', 'CosineSimilaritySTD',
     'VecErrorM', 'VecErrorSTD', 'DistanceNuM', 'DistanceNuSTD', 'DistanceGoM',
     'DistanceGoSTD', 'TP', 'FP', 'FN', 'TPR', 'FPR', 'FNR', 'PRECISION', 'F1_SCORE']
for col in numeric_cols:  # Iterate over the first level of multi-level columns
    cols = [(col, 'mean'), (col, 'min'), (col, 'max'), (col, 'std')]
    grouped[(col,"")] = grouped.apply(lambda row: format_column(row[cols[0]], row[cols[1]], row[cols[2]], row[cols[3]]), axis=1)
    grouped = grouped.drop(columns= cols)

grouped.columns = grouped.columns.map(''.join)
#grouped = grouped[["Data Train","Algorithm", "Constraints","Angles",	"ROC AUC Score","Accuracy","Precision",	"Recall","F1-Score"]]
display(grouped.sort_values(by=['Angular Features', 'Constraints','model_type']))

Unnamed: 0,Angular Features,model_type,Constraints,Threshold_level,CosineSimilarityM,CosineSimilaritySTD,VecErrorM,VecErrorSTD,DistanceNuM,DistanceNuSTD,DistanceGoM,DistanceGoSTD,TP,FP,FN,TPR,FPR,FNR,PRECISION,F1_SCORE
0,False,GNN,False,0.0±0.0,0.907±0.004,0.158±0.008,2.581±0.055,1.685±0.044,2.164±0.031,1.471±0.048,1.439±0.031,0.853±0.025,311.333±2.667,229.667±34.333,96.667±3.333,0.764±0.006,0.406±0.025,0.236±0.01,0.594±0.016,0.659±0.01
4,False,MLP,False,0.0±0.0,0.907±0.001,0.154±0.003,2.476±0.011,1.61±0.02,2.158±0.006,1.405±0.014,1.375±0.007,0.774±0.01,300.667±0.333,289.667±10.333,107.333±0.667,0.714±0.002,0.46±0.012,0.286±0.002,0.54±0.008,0.595±0.005
1,False,GNN,Greedy,0.0±0.0,0.9±0.014,0.165±0.01,2.59±0.042,1.665±0.008,2.173±0.021,1.529±0.076,1.535±0.038,0.985±0.08,264.667±3.333,78.0±3.0,143.333±2.667,0.653±0.006,0.221±0.003,0.347±0.012,0.779±0.004,0.707±0.005
5,False,MLP,Greedy,0.0±0.0,0.868±0.003,0.213±0.002,2.742±0.023,1.86±0.026,2.253±0.016,1.638±0.011,1.569±0.009,1.04±0.013,260.667±1.333,78.667±2.333,147.333±1.667,0.641±0.003,0.23±0.006,0.359±0.004,0.77±0.004,0.696±0.003
2,False,GNN,Greedy w/ Threshold,0.0±0.0,0.898±0.015,0.167±0.012,2.619±0.02,1.703±0.014,2.202±0.053,1.573±0.107,1.536±0.047,0.98±0.087,262.667±4.333,65.333±2.667,145.333±3.667,0.649±0.01,0.198±0.003,0.351±0.008,0.802±0.006,0.714±0.009
6,False,MLP,Greedy w/ Threshold,0.0±0.0,0.854±0.005,0.223±0.003,2.733±0.006,1.9±0.004,2.278±0.018,1.663±0.035,1.556±0.016,0.991±0.035,245.0±2.0,64.333±0.667,163.0±2.0,0.586±0.004,0.212±0.004,0.414±0.005,0.788±0.004,0.667±0.003
3,False,GNN,Optimization,0.0±0.0,0.904±0.009,0.163±0.01,2.85±0.095,1.854±0.096,2.343±0.026,1.64±0.037,1.527±0.029,0.953±0.059,200.0±1.0,114.0±3.0,208.0±2.0,0.492±0.004,0.35±0.006,0.508±0.006,0.65±0.007,0.55±0.005
7,False,MLP,Optimization,0.0±0.0,0.885±0.002,0.187±0.002,2.923±0.074,1.914±0.074,2.402±0.073,1.735±0.077,1.597±0.037,1.067±0.046,196.0±2.0,114.0±3.0,212.0±2.0,0.483±0.003,0.352±0.011,0.517±0.005,0.648±0.006,0.543±0.004
8,True,GNN,False,0.0±0.0,0.904±0.005,0.16±0.005,2.634±0.064,1.743±0.072,2.188±0.054,1.514±0.085,1.495±0.037,0.939±0.037,305.333±12.667,207.667±27.333,102.667±10.333,0.749±0.034,0.383±0.023,0.251±0.022,0.617±0.045,0.668±0.025
12,True,MLP,False,0.0±0.0,0.903±0.001,0.158±0.001,2.514±0.005,1.653±0.013,2.167±0.019,1.424±0.024,1.382±0.017,0.783±0.008,297.333±0.667,271.667±15.333,110.667±1.333,0.704±0.004,0.447±0.014,0.296±0.003,0.553±0.009,0.598±0.009


In [None]:
grouped = grouped[["Angular Features","model_type","Constraints","CosineSimilarityM","VecErrorM","DistanceNuM","DistanceGoM","TPR","FPR","FNR", "PRECISION", "F1_SCORE"]]
print(grouped.to_latex())