### Compare final results with MLP

In [3]:
#imports
import os
import numpy as np
import h5py
import matplotlib.pyplot as plt
import seaborn as sns
from meld_graph.training import tp_fp_fn_tn, dice_coeff
from meld_classifier.meld_cohort import MeldSubject,MeldCohort
from meld_graph.icospheres import IcoSpheres
from meld_graph.graph_tools import GraphTools
import itertools
import pandas as pd
from meld_graph.evaluation import load_prediction
from meld_graph import experiment

Setting EXPERIMENT_PATH to /rds/project/kw350/rds-kw350-meld/experiments_graph/kw350


### load per-vertex results

In [4]:
#On test dataset
ref = '/rds/project/kw350/rds-kw350-meld/experiments/co-ripa1/iteration_21-09-15/ensemble_21-09-15/fold_all/results'

with h5py.File(os.path.join(ref, 'predictions_ensemble_iteration.hdf5'), "r") as f:
    subjects = list(f.keys())
subjects.remove('MELD_H4_3T_FCD_0011') # because does not exist in graph model
cohort = MeldCohort(hdf5_file_root='{site_code}_{group}_featurematrix_combat_6.hdf5',
               dataset='MELD_dataset_V6.csv')
print(len(subjects))


# # On wittheld sites
# ref = '/rds/project/kw350/rds-kw350-meld/experiments/co-ripa1/predict_NewSiteH27H28_21-09-20/fold_all/results'

# with h5py.File(os.path.join(ref, 'predictions_ensemble_iteration.hdf5'), "r") as f:
#     subjects = list(f.keys())
# cohort= MeldCohort(hdf5_file_root='{site_code}_{group}_featurematrix_combat_NewSite.hdf5', dataset='MELD_dataset_NewSiteH27H28.csv')
# print(len(subjects))

453


In [5]:
df_old=pd.DataFrame()
subjects_dictionary={}
values={}
for si,subj in enumerate(subjects):
    if si%100==0:
        print(si)
    values['ID']=subj
    s = MeldSubject(subj,cohort=cohort)
    values['group']= True if s.group=='patient' else False
    labels_hemis = {}
    dists={}
    labels = np.zeros(len(cohort.cortex_label)*2)
    for hemi in ['lh','rh']:
        dists[hemi], labels_hemis[hemi] = s.load_feature_lesion_data(
                    features=['.on_lh.boundary_zone.mgh'], hemi=hemi, features_to_ignore=[]
                )
        if np.sum(dists[hemi])==0:
            dists[hemi] +=200
    labels = np.hstack([labels_hemis['lh'][cohort.cortex_mask],labels_hemis['rh'][cohort.cortex_mask]])
    borderzones = np.vstack([dists['lh'][cohort.cortex_mask,:],dists['rh'][cohort.cortex_mask,:]]).ravel()<20
    
    #load pred from old classifier
    pred_file_old = os.path.join(ref, 'predictions_ensemble_iteration.hdf5')
    result_hemis_old = load_prediction(subj,pred_file_old, dset='prediction')
    result_old = np.hstack([result_hemis_old['lh'],result_hemis_old['rh']])
    values['model']='per vertex'
    
    #add detection with borderzone
    if labels.sum()>0:
        values['detected'] = np.logical_and(result_old, borderzones).any()
        values['number TP clusters'] = len(set(result_old[borderzones.astype('bool')]))-1
    else:
        values['number TP clusters'] = 0
    # add number of FP clusters : total clusters - TP clusters
    values['number FP clusters']=len(set(result_hemis_old['lh']))+len(set(result_hemis_old['rh']))-2-values['number TP clusters']
    df_old=pd.concat([df_old,pd.DataFrame([values])])
df_old = df_old.reset_index()
df_old.head()

0
100
200
300
400


Unnamed: 0,index,ID,group,model,detected,number TP clusters,number FP clusters
0,0,MELD2_H7_3T_FCD_001,True,per vertex,False,0,3
1,0,MELD2_H7_3T_FCD_004,True,per vertex,False,0,1
2,0,MELD2_H7_3T_FCD_005,True,per vertex,False,0,0
3,0,MELD2_H7_3T_FCD_009,True,per vertex,True,2,2
4,0,MELD_H10_3T_C_0005,False,per vertex,True,0,0


In [6]:
df_old.groupby('group')['ID'].count()

group
False    193
True     260
Name: ID, dtype: int64

In [7]:
sensitivity = np.mean(df_old['detected'][df_old['group']])
specificity = (df_old['number FP clusters'][df_old['group']==0]>0).mean()
total_detected = np.sum(df_old['number TP clusters'][df_old['group']])
total_predicted = np.sum(df_old['number FP clusters'][df_old['group']])
ppv = total_detected / (total_predicted + total_detected)
print(np.round(sensitivity,2),np.round(1-specificity,2),np.round(ppv,2))

0.67 0.54 0.39


### load results for meld graph model

In [9]:
# for test dataset
experiment_dir = '/rds/project/kw350/rds-kw350-meld/experiments_graph/kw350'
model = '23-10-30_FOPF_dcop'
# model = '23-10-30_MSBS_dcop_with_combat'
df_model = pd.read_csv(os.path.join(experiment_dir,model,
                                          's_0','fold_all','results_best_model','test_results.csv'))

# # for withheld sites
# experiment_dir = '/rds/project/kw350/rds-kw350-meld/experiments_graph/kw350'
# # model = '23-10-30_FOPF_dcop'
# model = '23-10-30_MSBS_dcop_with_combat'

# df_model_h27 = pd.read_csv(os.path.join(experiment_dir,model,'s_0','fold_all', 'test_H27','results_best_model','test_results.csv'))
# df_model_h28 = pd.read_csv(os.path.join(experiment_dir,model,'s_0','fold_all', 'test_H28','results_best_model','test_results.csv'))
# df_model = pd.concat([df_model_h27,df_model_h28])

In [10]:
df_model.groupby('group')['ID'].count()

group
False    193
True     260
Name: ID, dtype: int64

In [11]:
df_model['model'] = ['graph' for x in df_model.iterrows()]
df_model = df_model[['ID','group','detected','number FP clusters','number TP clusters', 'model']]
df_model.head()

Unnamed: 0,ID,group,detected,number FP clusters,number TP clusters,model
0,MELD_H2_15T_FCD_0001,True,False,0,0,graph
1,MELD_H2_15T_FCD_0003,True,True,0,1,graph
2,MELD_H2_15T_FCD_0005,True,False,1,0,graph
3,MELD_H2_15T_FCD_0007,True,True,0,1,graph
4,MELD_H2_15T_FCD_0008,True,False,1,0,graph


In [12]:
sensitivity = np.mean(df_model['detected'][df_model['group']])
specificity = (df_model['number FP clusters'][df_model['group']==0]>0).mean()
total_detected = np.sum(df_model['number TP clusters'][df_model['group']])
total_predicted = np.sum(df_model['number FP clusters'][df_model['group']])
ppv = total_detected / (total_predicted + total_detected)
print(np.round(sensitivity,2),np.round(1-specificity,2),np.round(ppv,2))

0.68 0.7 0.66


In [10]:
# ## Concat results

# df = pd.concat([df_old, df_model], ignore_index=True)
# df.head()

Unnamed: 0,index,ID,group,model,detected,number TP clusters,number FP clusters
0,0.0,MELD2_H7_3T_FCD_001,True,per vertex,False,0,3
1,0.0,MELD2_H7_3T_FCD_004,True,per vertex,False,0,1
2,0.0,MELD2_H7_3T_FCD_005,True,per vertex,False,0,0
3,0.0,MELD2_H7_3T_FCD_009,True,per vertex,True,2,2
4,0.0,MELD_H10_3T_C_0005,False,per vertex,True,0,0


### add breakdown

In [27]:
# add demographic
age_array=[]
sex_array=[]
histo_array=[]
site_array=[]
scanner_array=[]
flair_array=[]
group_array=[]
sf_array=[]
mri_negative_array=[]
for subject in df_model['ID']:
    subj = MeldSubject(subject, cohort)
    age, sex, histo, site, sf, mri_negative = subj.get_demographic_features(["Age at preoperative", "Sex", "Histology", "Site", "Seizure free", "Ever reported MRI negative"])
    scanner_array.append(subj.scanner)
    age_array.append(age)
    sex_array.append(sex)
    histo_array.append(histo)
    site_array.append(site)
    sf_array.append(sf)
    flair_array.append(subj.has_flair)
    mri_negative_array.append(mri_negative)

    
df_model['Age at preoperative']=age_array
df_model['Sex']=sex_array
df_model['Histology']=histo_array
df_model['Site']=site_array
df_model['Scanner']=scanner_array
df_model['FLAIR']=flair_array
df_model['Seizure free']=sf_array
df_model['Ever reported MRI negative'] = mri_negative_array

df_model.head()

Unnamed: 0,ID,group,detected,number FP clusters,number TP clusters,model,age,sex,histo,site,seizure_freedom,scanner,FLAIR,Histology,Scanner,Seizure free,Ever reported MRI negative,Age at preoperative,Sex,Site
0,MELD_H2_15T_FCD_0001,True,False,0,0,graph,20.0,1.0,,H2,1.0,15T,False,,15T,1.0,0.0,20.0,1.0,H2
1,MELD_H2_15T_FCD_0003,True,True,0,1,graph,10.0,0.0,,H2,,15T,False,,15T,,0.0,10.0,0.0,H2
2,MELD_H2_15T_FCD_0005,True,False,1,0,graph,20.0,1.0,,H2,,15T,False,,15T,,0.0,20.0,1.0,H2
3,MELD_H2_15T_FCD_0007,True,True,0,1,graph,4.0,1.0,FCD_2B,H2,1.0,15T,False,FCD_2B,15T,1.0,0.0,4.0,1.0,H2
4,MELD_H2_15T_FCD_0008,True,False,1,0,graph,10.0,1.0,,H2,,15T,False,,15T,,0.0,10.0,1.0,H2


In [21]:
pat = df_model[df_model['group']==True]

In [22]:
disp_df=pd.DataFrame(100*pat.groupby('Scanner').mean()['detected']).round(1)
disp_df['count'] = pat.groupby('Scanner').count()['detected']
disp_df

Unnamed: 0_level_0,detected,count
Scanner,Unnamed: 1_level_1,Unnamed: 2_level_1
15T,64.3,56
3T,69.6,204


In [23]:
disp_df=pd.DataFrame(100*pat.groupby(['Scanner','FLAIR']).mean()['detected']).round(1)
disp_df['count'] = pat.groupby(['Scanner','FLAIR']).count()['detected']
disp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,detected,count
Scanner,FLAIR,Unnamed: 2_level_1,Unnamed: 3_level_1
15T,False,58.3,36
15T,True,75.0,20
3T,False,71.1,114
3T,True,67.8,90


In [24]:
disp_df=pd.DataFrame(100*pat.groupby(['Seizure free']).mean()['detected']).round(1)
disp_df['count'] = pat.groupby(['Seizure free']).count()['detected']
disp_df

Unnamed: 0_level_0,detected,count
Seizure free,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,66.7,51
1.0,78.3,106


In [25]:
disp_df=pd.DataFrame(100*pat.groupby(['Histology']).mean()['detected']).round(1)
disp_df['n patients'] = pat.groupby(['Histology']).count()['detected']
disp_df.rename(columns={'detected':'% Detected'})

Unnamed: 0_level_0,% Detected,n patients
Histology,Unnamed: 1_level_1,Unnamed: 2_level_1
FCD_1,69.2,13
FCD_2A,73.7,57
FCD_2B,79.6,93
FCD_3,75.0,8


In [26]:
disp_df=pd.DataFrame(100*pat.groupby(['Ever reported MRI negative']).mean()['detected']).round(1)
disp_df['count'] = pat.groupby(['Ever reported MRI negative']).count()['detected']
disp_df

Unnamed: 0_level_0,detected,count
Ever reported MRI negative,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,71.7,180
1.0,61.3,80


In [None]:
pat['paediatric'] = pat['Age at preoperative']<18
disp_df=pd.DataFrame(100*pat.groupby(['paediatric']).mean()['detected']).round(1)
disp_df['count'] = pat.groupby(['paediatric']).count()['detected']
disp_df