# Classification

In [73]:
import pandas as pd
import glob
from analysis.utils import resolve_split_csv_path
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Load classification summaries
path_class_adni_psp = "/Users/emmatosato/Documents/PhD/ANM_Verona/output/voxel/umap_classification/adni_psp/summary_all_seeds.csv"
path_class_adni_cbs ="/Users/emmatosato/Documents/PhD/ANM_Verona/output/voxel/umap_classification/adni_cbs/summary_all_seeds.csv"
path_class_cbs_psp =  "/Users/emmatosato/Documents/PhD/ANM_Verona/output/voxel/umap_classification/cbs_psp/summary_all_seeds.csv"

# Load network classification summaries
path_class_net_adni_psp = '/Users/emmatosato/Documents/PhD/ANM_Verona/output/networks/classification/adni_psp/summary_all_seeds.csv'
path_class_net_adni_cbs = '/Users/emmatosato/Documents/PhD/ANM_Verona/output/networks/classification/adni_cbs/summary_all_seeds.csv'
path_class_net_psp_cbs = '/Users/emmatosato/Documents/PhD/ANM_Verona/output/networks/classification/psp_cbs/summary_all_seeds.csv'

# Load split dataframes
dir_split = "/Users/emmatosato/Documents/PhD/ANM_Verona/data/dataframes/split"

# Dataframes voxel classification
df_classs_adni_psp = pd.read_csv(path_class_adni_psp)
df_classs_adni_cbs = pd.read_csv(path_class_adni_cbs)
df_classs_cbs_psp = pd.read_csv(path_class_cbs_psp)

# Dataframe network classification
df_classs_net_adni_psp = pd.read_csv(path_class_net_adni_psp)
df_classs_net_adni_cbs = pd.read_csv(path_class_net_adni_cbs)
df_classs_net_cbs_psp = pd.read_csv(path_class_net_psp_cbs)


# Separate cross-validation and test metrics
cv_metrics = ["cv_accuracy", "cv_precision", "cv_recall", "cv_f1", "cv_auc_roc"]
test_metrics = ["test_accuracy", "test_precision", "test_recall", "test_f1", "test_auc_roc"]

In [74]:
def get_variables(df):
    # Group by model
    grouped = df.groupby("model")

    return  grouped

def print_info(df_split, df_class):
    print(f"Number of seeds: {df_class['seed'].nunique()} and seeds: {df_class['seed'].unique()}")
    print(f"Number of models: {df_class['model'].nunique()} and types: {df_class['model'].unique()}\n")
    print(f"Training set size: {(df_split['split'] == 'train').sum()}")
    print(f"Test set size: {(df_split['split'] == 'test').sum()}")

def print_result_cv(grouped, group1, group2):
    print(f"\nGroup {group1} vs {group2}\n")
    print("=== Cross-validation across seeds ===")
    for model_name, group in grouped:
        print(f"\nModel: {model_name}")
        display(group[cv_metrics].agg(["mean", "std"]).round(3))
    print("\n")


def print_result_test(grouped, group1, group2):
    print(f"\nGroup {group1} vs {group2}\n")
    print("=== Test performance on holdout set across seeds=== ")
    for model_name, group in grouped:
        print(f"\nModel: {model_name}")
        display(group[test_metrics].agg(["mean", "std"]).round(3))
    print("\n")

# Voxel classification

## ADNI vs PSP

In [75]:
# Extract number of seeds and grouped data
group1 = "ADNI"
group2 = "PSP"
split_path = resolve_split_csv_path(dir_split, group1 , group2)
df_split = pd.read_csv(split_path)

print_info(df_split, df_classs_adni_psp)


Using split file: /Users/emmatosato/Documents/PhD/ANM_Verona/data/dataframes/split/ADNI_PSP_splitted.csv

Number of seeds: 5 and seeds: [   42   123  2023 31415 98765]
Number of models: 2 and types: ['SVM' 'RandomForest']

Training set size: 105
Test set size: 27


In [76]:
grouped_adni_psp = get_variables(df_classs_adni_psp)
print_result_cv(grouped_adni_psp, group1, group2)


Group ADNI vs PSP

=== Cross-validation across seeds ===

Model: RandomForest


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.612,0.613,0.61,0.603,0.637
std,0.025,0.024,0.024,0.029,0.02



Model: SVM


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.653,0.663,0.657,0.65,0.603
std,0.005,0.004,0.006,0.007,0.056






In [77]:
print_result_test(grouped_adni_psp, group1, group2)


Group ADNI vs PSP

=== Test performance on holdout set across seeds=== 

Model: RandomForest


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.637,0.636,0.616,0.612,0.643
std,0.017,0.017,0.019,0.021,0.027



Model: SVM


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.667,0.675,0.675,0.667,0.728
std,0.0,0.0,0.0,0.0,0.0






## ADNI vs CBS

In [78]:
group1 = "ADNI"
group2 = "CBS"
split_path = resolve_split_csv_path(dir_split, group1 , group2)
df_split = pd.read_csv(split_path)

print_info(df_split, df_classs_adni_cbs)


Using split file: /Users/emmatosato/Documents/PhD/ANM_Verona/data/dataframes/split/ADNI_CBS_splitted.csv

Number of seeds: 5 and seeds: [   42   123  2023 31415 98765]
Number of models: 2 and types: ['SVM' 'RandomForest']

Training set size: 92
Test set size: 24


In [79]:
grouped_adni_cbs = get_variables(df_classs_adni_cbs)
print_result_cv(grouped_adni_cbs, group1, group2)


Group ADNI vs CBS

=== Cross-validation across seeds ===

Model: RandomForest


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.552,0.526,0.524,0.517,0.608
std,0.016,0.023,0.022,0.023,0.021



Model: SVM


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.576,0.346,0.474,0.386,0.416
std,0.019,0.033,0.011,0.006,0.024






In [80]:
print_result_test(grouped_adni_cbs, group1, group2)


Group ADNI vs CBS

=== Test performance on holdout set across seeds=== 

Model: RandomForest


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.625,0.583,0.567,0.564,0.404
std,0.0,0.0,0.0,0.0,0.017



Model: SVM


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.625,0.312,0.5,0.385,0.549
std,0.0,0.0,0.0,0.0,0.002






## PSP vs CBS

In [81]:
## ADNI vs CBS
group1 = "PSP"
group2 = "CBS"
split_path = resolve_split_csv_path(dir_split, group1, group2)
df_split = pd.read_csv(split_path)

print_info(df_split, df_classs_cbs_psp)


Using split file: /Users/emmatosato/Documents/PhD/ANM_Verona/data/dataframes/split/PSP_CBS_splitted.csv

Number of seeds: 5 and seeds: [   42   123  2023 31415 98765]
Number of models: 2 and types: ['SVM' 'RandomForest']

Training set size: 83
Test set size: 21


In [82]:
grouped_psp_cbs = get_variables(df_classs_cbs_psp)
print_result_cv(grouped_psp_cbs, group1, group2)


Group PSP vs CBS

=== Cross-validation across seeds ===

Model: RandomForest


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.605,0.589,0.58,0.575,0.604
std,0.064,0.069,0.067,0.07,0.056



Model: SVM


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.578,0.289,0.5,0.366,0.431
std,0.0,0.0,0.0,0.0,0.047






In [83]:
print_result_test(grouped_psp_cbs, group1, group2)


Group PSP vs CBS

=== Test performance on holdout set across seeds=== 

Model: RandomForest


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.457,0.491,0.492,0.44,0.555
std,0.026,0.038,0.031,0.021,0.061



Model: SVM


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.571,0.286,0.5,0.364,0.472
std,0.0,0.0,0.0,0.0,0.0






# Networks Classification

## ADNI vs PSP

In [84]:
df_net_noThr = pd.read_csv('/Users/emmatosato/Documents/PhD/ANM_Verona/data/dataframes/networks/networks_noTHR.csv')

In [85]:
group1 = "ADNI"
group2 = "PSP"

grouped_net_adni_psp = get_variables(df_classs_net_adni_psp)

print_result_cv(grouped_net_adni_psp, group1, group2)


Group ADNI vs PSP

=== Cross-validation across seeds ===

Model: RandomForest


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.659,0.667,0.654,0.65,0.692
std,0.04,0.044,0.038,0.039,0.02



Model: SVM


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.674,0.695,0.66,0.654,0.693
std,0.017,0.024,0.017,0.019,0.014






In [86]:
print_result_test(grouped_net_adni_psp, group1, group2)


Group ADNI vs PSP

=== Test performance on holdout set across seeds=== 

Model: RandomForest


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.593,0.59,0.59,0.589,0.639
std,0.026,0.027,0.027,0.027,0.034



Model: SVM


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.519,0.5,0.5,0.494,0.639
std,0.0,0.0,0.0,0.0,0.0






## ADNI vs CBS

In [87]:
group1 = "ADNI"
group2 = "CBS"

grouped_net_adni_cbs = get_variables(df_classs_net_adni_cbs)

print_result_cv(grouped_net_adni_cbs, group1, group2)


Group ADNI vs CBS

=== Cross-validation across seeds ===

Model: RandomForest


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.533,0.461,0.478,0.46,0.551
std,0.022,0.037,0.021,0.029,0.037



Model: SVM


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.629,0.609,0.535,0.487,0.559
std,0.012,0.08,0.009,0.008,0.057






In [88]:
print_result_test(grouped_net_adni_cbs, group1, group2)


Group ADNI vs CBS

=== Test performance on holdout set across seeds=== 

Model: RandomForest


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.633,0.582,0.556,0.534,0.507
std,0.035,0.068,0.049,0.073,0.025



Model: SVM


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.667,0.667,0.578,0.556,0.481
std,0.0,0.0,0.0,0.0,0.0






## PSP vs CBS

In [89]:
group1 = "PSP"
group2 = "CBS"

grouped_net_cbs_psp = get_variables(df_classs_net_cbs_psp)

print_result_cv(grouped_net_cbs_psp, group1, group2)


Group PSP vs CBS

=== Cross-validation across seeds ===

Model: RandomForest


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.601,0.595,0.58,0.571,0.647
std,0.063,0.066,0.062,0.067,0.061



Model: SVM


Unnamed: 0,cv_accuracy,cv_precision,cv_recall,cv_f1,cv_auc_roc
mean,0.579,0.59,0.533,0.495,0.482
std,0.011,0.048,0.009,0.011,0.097






In [90]:
print_result_test(grouped_net_cbs_psp, group1, group2)


Group PSP vs CBS

=== Test performance on holdout set across seeds=== 

Model: RandomForest


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.667,0.662,0.65,0.651,0.657
std,0.034,0.037,0.03,0.031,0.022



Model: SVM


Unnamed: 0,test_accuracy,test_precision,test_recall,test_f1,test_auc_roc
mean,0.619,0.612,0.583,0.571,0.602
std,0.0,0.0,0.0,0.0,0.0




