## Notebook to compare final results MELD graph vs MELD per vertex
## Results on test cohort and withheld sites H27 H28 H101

In [1]:
#imports
import os
import numpy as np
import h5py
import matplotlib.pyplot as plt
import seaborn as sns
import sys
sys.path.append('/home/co-ripa1/rds/hpc-work/scripts/meld_classifier')
from meld_classifier.meld_cohort import MeldSubject,MeldCohort
from meld_graph.training import tp_fp_fn_tn, dice_coeff
from meld_graph.icospheres import IcoSpheres
from meld_graph.graph_tools import GraphTools
import itertools
import pandas as pd
from meld_graph.evaluation import load_prediction
from meld_graph import experiment

Setting MELD_DATA_PATH to /rds/user/kw350/rds-kw350-meld/meld_data/Data/
Setting BASE_PATH to /rds/user/kw350/rds-kw350-meld/meld_data/Data/
Setting EXPERIMENT_PATH to /rds/user/kw350/rds-kw350-meld/experiments/kw350/
No fs_subjects_path defined in /home/kw350/software/gdl/meld_classifier/meld_config.ini!
Setting EXPERIMENT_PATH to /rds/project/kw350/rds-kw350-meld/experiments_graph/kw350
NOTE: captum not found. You will not be able to compute saliency.


### load per-vertex results

In [2]:
#dataset one of ['test','indi_test']
dataset = 'indi_test'
#for H101 boundaries
cohort2= MeldCohort(hdf5_file_root='{site_code}_{group}_featurematrix_combat_fastsurfer_harmonised_NewSite.hdf5', 
                           dataset='MELD_dataset_NewSiteH27H28H101_nc.csv')

In [3]:
def load_cohort_mlp(dataset):
    # Load the MELD cohort
    if dataset == 'test':
        ref = '/rds/project/kw350/rds-kw350-meld/experiments/co-ripa1/iteration_21-09-15/ensemble_21-09-15/fold_all/results'
        with h5py.File(os.path.join(ref, 'predictions_ensemble_iteration.hdf5'), "r") as f:
            subjects = list(f.keys())
        subjects.remove('MELD_H4_3T_FCD_0011') # because does not exist in graph model
        cohort = MeldCohort(hdf5_file_root='{site_code}_{group}_featurematrix_combat_6.hdf5',
                dataset='MELD_dataset_V6.csv')
    elif dataset == 'indi_test':
        refh27h28 = '/rds/project/kw350/rds-kw350-meld/experiments/co-ripa1/predict_NewSiteH27H28_21-09-20/fold_all/results'
        refh101 = '/rds/project/kw350/rds-kw350-meld/experiments/co-ripa1/predict_NewSiteH101_24-02-20/results'
        with h5py.File(os.path.join(refh27h28, 'predictions_ensemble_iteration.hdf5'), "r") as f:
            subjects = list(f.keys())
        with h5py.File(os.path.join(refh101, 'predictions_ensemble_iteration.hdf5'), "r") as f:
            subjects = subjects + list(f.keys())
        cohort= MeldCohort(hdf5_file_root='{site_code}_{group}_featurematrix_combat_6_kernels_robustCombat_NewSite.hdf5', dataset='MELD_dataset_NewSiteH27H28H101.csv')
    else:
        raise ValueError('Unknown dataset')
    return cohort, subjects

In [4]:
cohort, subjects =  load_cohort_mlp(dataset)
refh27h28 = '/rds/project/kw350/rds-kw350-meld/experiments/co-ripa1/predict_NewSiteH27H28_21-09-20/fold_all/results'
refh101 = '/rds/project/kw350/rds-kw350-meld/experiments/co-ripa1/predict_NewSiteH101_24-02-20/results'
ref = '/rds/project/kw350/rds-kw350-meld/experiments/co-ripa1/iteration_21-09-15/ensemble_21-09-15/fold_all/results'


In [5]:
#load the predictions
df_old=pd.DataFrame()
subjects_dictionary={}
#values becomes each row of the dataframe
values={}
for si,subj in enumerate(subjects):
    if si%100==0:
        print(si)
    values['ID']=subj
    #load the subject
    if "H101" in subj:
        s = MeldSubject(subj,cohort=cohort2)
    else:
        s = MeldSubject(subj,cohort=cohort)
    #add the group
    values['group']= True if s.group=='patient' else False
    labels_hemis = {}
    dists={}
    labels = np.zeros(len(cohort.cortex_label)*2)
    #load the borderzone
    for hemi in ['lh','rh']:
        dists[hemi], labels_hemis[hemi] = s.load_feature_lesion_data(
                    features=['.on_lh.boundary_zone.mgh'], hemi=hemi, features_to_ignore=[]
                )
        if np.sum(dists[hemi])==0:
            dists[hemi] +=200
    labels = np.hstack([labels_hemis['lh'][cohort.cortex_mask],labels_hemis['rh'][cohort.cortex_mask]])
    borderzones = np.vstack([dists['lh'][cohort.cortex_mask,:],dists['rh'][cohort.cortex_mask,:]]).ravel()<20
    #load pred from old classifier
    if 'H101' in subj:
        pred_file_old = os.path.join(refh101, 'predictions_ensemble_iteration.hdf5')
    elif ('H27' in subj) or ('H28' in subj):
        pred_file_old = os.path.join(refh27h28, 'predictions_ensemble_iteration.hdf5')
    else:
        pred_file_old = os.path.join(ref, 'predictions_ensemble_iteration.hdf5')

    result_hemis_old = load_prediction(subj,pred_file_old, dset='prediction')
    result_old = np.hstack([result_hemis_old['lh'],result_hemis_old['rh']])
    values['model']='per vertex'
    
    #add detection with borderzone
    if labels.sum()>0:
        values['detected'] = np.logical_and(result_old, borderzones).any()
        # add number of TP clusters - number of clusters that are in the borderzone
        clusters_in_borderzone = set(result_old[borderzones.astype('bool')])
        #remove the 0 cluster
        if 0 in clusters_in_borderzone:
            clusters_in_borderzone.remove(0)
        values['number TP clusters'] = len(clusters_in_borderzone)
        values['size_pred'] = len(result_old[borderzones.astype('bool')])
    else:
        values['number TP clusters'] = 0
    # add number of FP clusters : total clusters - TP clusters
    values['number FP clusters']=len(set(result_hemis_old['lh']))+len(set(result_hemis_old['rh']))-2-values['number TP clusters']
    df_old=pd.concat([df_old,pd.DataFrame([values])])
df_old = df_old.reset_index()
df_old.head()

0
100
200


Unnamed: 0,index,ID,group,model,number TP clusters,number FP clusters,detected,size_pred
0,0,MELD_H27_3T_C_0018,False,per vertex,0,1,,
1,0,MELD_H27_3T_C_0019,False,per vertex,0,1,,
2,0,MELD_H27_3T_C_0020,False,per vertex,0,0,,
3,0,MELD_H27_3T_C_0021,False,per vertex,0,5,,
4,0,MELD_H27_3T_C_0022,False,per vertex,0,0,,


In [6]:
def df_stats(df):
    sensitivity = np.mean(df['detected'][df['group']])
    specificity = (df['number FP clusters'][df['group']==0]>0).mean()
    total_detected = np.sum(df['number TP clusters'][df['group']])
    total_predicted = np.sum(df['number FP clusters'][df['group']])
    ppv = total_detected / (total_predicted + total_detected)
    return np.round(sensitivity,2),np.round(1-specificity,2),np.round(ppv,2)

# bootstrapped confidence intervals
def bootstrap_CI(df, n=10000, func=df_stats):
    """Calculate confidence intervals for a given function"""
    bootstrapped = []
    for i in range(n):
        bootstrapped.append(func(df.sample(len(df), replace=True)))
    return np.percentile(np.array(bootstrapped), [2.5, 97.5],axis=0)

In [7]:
print('all together')
dfsub = df_old.copy()
dfsub_mlp = dfsub.copy()
sensitivity, specificity, ppv = df_stats(dfsub)
print(np.round(sensitivity,2),np.round(1-specificity,2),np.round(ppv,2))
print(bootstrap_CI(dfsub))
try:
    for site in ['H27', 'H28', 'H101']:
        print(site)
        dfsub = df_old[df_old['ID'].str.contains(site)]
        sensitivity, specificity, ppv = df_stats(dfsub)
        print(np.round(sensitivity,2),np.round(1-specificity,2),np.round(ppv,2))
except:
    pass


all together
0.77 0.53 0.46
[[0.69 0.37 0.39]
 [0.84 0.56 0.53]]
H27
0.94 0.83 0.52
H28
0.62 nan 0.32
H101
0.76 0.47 0.49


### load results for meld graph model

In [92]:
def load_cohort_graph(dataset,model,no_combat=False):

    # Load the MELD cohort
    nc = ''
    if no_combat:
        nc = '_nc'
    if dataset == 'test':
        experiment_dir = '/rds/project/kw350/rds-kw350-meld/experiments_graph/kw350'
        
        pred_file = os.path.join(experiment_dir,model,'s_0','fold_all','results_best_model', 'predictions.hdf5')
        cohort= MeldCohort(hdf5_file_root='{site_code}_{group}_featurematrix_combat_6_kernels_noCombat.hdf5',
                            dataset='MELD_dataset_v6.csv')
    elif dataset == 'indi_test':
        experiment_dir = '/rds/project/kw350/rds-kw350-meld/experiments_graph/kw350'
        pred_file = os.path.join(experiment_dir,model,'s_0','fold_all',
                                  'test_H27H28H101{}'.format(nc),'results_best_model', 'predictions.hdf5')
        cohort= MeldCohort(hdf5_file_root='{site_code}_{group}_featurematrix_combat_6_kernels_robustCombat_NewSite.hdf5', 
                           dataset='MELD_dataset_NewSiteH27H28H101{}.csv'.format(nc))
            
    else:
        raise ValueError('Unknown dataset')
    with h5py.File(pred_file, "r") as f:
        subjects = list(f.keys())
    return cohort, subjects,pred_file

In [97]:
#  #for test dataset
model = '23-10-30_LVHZ_dcp'
#model='23-10-30_MSBS_dcop_with_combat'
# # model ='24-01-04_best_dcop_with_combat'

cohort, subjects, pred_file = load_cohort_graph(dataset,model,
no_combat=True)

In [100]:
df=pd.DataFrame()
subjects_dictionary={}
values={}
for si,subj in enumerate(subjects):
    if si%100==0:
        print(si)
    values['ID']=subj
    if "H101" in subj:
        s = MeldSubject(subj,cohort=cohort2)
    else:
        s = MeldSubject(subj,cohort=cohort)
    values['group']= True if s.group=='patient' else False
    labels_hemis = {}
    dists={}
    labels = np.zeros(len(cohort.cortex_label)*2)
    for hemi in ['lh','rh']:
        dists[hemi], labels_hemis[hemi] = s.load_feature_lesion_data(
                    features=['.on_lh.boundary_zone.mgh'], hemi=hemi, features_to_ignore=[]
                )
        if np.sum(dists[hemi])==0:
            dists[hemi] +=200
    labels = np.hstack([labels_hemis['lh'][cohort.cortex_mask],labels_hemis['rh'][cohort.cortex_mask]])
    borderzones = np.vstack([dists['lh'][cohort.cortex_mask,:],dists['rh'][cohort.cortex_mask,:]]).ravel()<20
    #load pred from graph classifier

    result_hemis = load_prediction(subj,pred_file, dset='prediction_clustered')
    result = np.hstack([result_hemis['lh'],result_hemis['rh']])
    values['model']='graph'
    
    #add detection with borderzone
    if labels.sum()>0:
        values['detected'] = np.logical_and(result, borderzones).any()
        clusters_in_borderzone = set(result[borderzones.astype('bool')])
        #remove the 0 cluster
        if 0 in clusters_in_borderzone:
            clusters_in_borderzone.remove(0)
        values['number TP clusters'] = len(clusters_in_borderzone)
        values['size_pred'] = len(result[borderzones.astype('bool')])
    else:
        values['number TP clusters'] = 0
    # add number of FP clusters : total clusters - TP clusters
    values['number FP clusters']=len(set(result_hemis['lh']))+len(set(result_hemis['rh']))-2-values['number TP clusters']
    df=pd.concat([df,pd.DataFrame([values])])
    
    
df = df.reset_index()
df.head()

0
100
200


Unnamed: 0,index,ID,group,model,number TP clusters,number FP clusters,detected,size_pred
0,0,MELD_H101_3T_C_00002,False,graph,0,1,,
1,0,MELD_H101_3T_C_00005,False,graph,0,1,,
2,0,MELD_H101_3T_C_00008,False,graph,0,1,,
3,0,MELD_H101_3T_C_00011,False,graph,0,1,,
4,0,MELD_H101_3T_C_00012,False,graph,0,0,,


In [101]:
df

Unnamed: 0,index,ID,group,model,number TP clusters,number FP clusters,detected,size_pred
0,0,MELD_H101_3T_C_00002,False,graph,0,1,,
1,0,MELD_H101_3T_C_00005,False,graph,0,1,,
2,0,MELD_H101_3T_C_00008,False,graph,0,1,,
3,0,MELD_H101_3T_C_00011,False,graph,0,1,,
4,0,MELD_H101_3T_C_00012,False,graph,0,0,,
...,...,...,...,...,...,...,...,...
212,0,MELD_H28_3T_FCD_0019,True,graph,0,1,False,11741.0
213,0,MELD_H28_3T_FCD_0020,True,graph,1,0,True,10750.0
214,0,MELD_H28_3T_FCD_0022,True,graph,1,0,True,29269.0
215,0,MELD_H28_3T_FCD_0023,True,graph,1,0,True,15185.0


In [102]:
print('all together')
dfsub = df.copy()
sensitivity, specificity, ppv = df_stats(dfsub)

print(np.round(sensitivity,2),np.round(specificity,2),np.round(ppv,2))
print(bootstrap_CI(dfsub))
try:
    for site in ['H27', 'H28', 'H101']:
        print(site)
        dfsub = df[df['ID'].str.contains(site)]
        sensitivity, specificity, ppv = df_stats(dfsub)
        print(np.round(sensitivity,2),np.round(specificity,2),np.round(ppv,2))
except:
    pass

all together
0.72 0.39 0.67
[[0.63 0.29 0.58]
 [0.79 0.48 0.76]]
H27
0.82 0.28 0.74
H28
0.69 nan 0.48
H101
0.7 0.41 0.7


In [28]:
dfsub = df[df['ID'].str.contains('H101')]
sensitivity, specificity, ppv = df_stats(dfsub)
sensitivity

0.99

In [32]:
dfsub[dfsub['group']==True]

Unnamed: 0,index,ID,group,model,number TP clusters,number FP clusters,detected,size_pred
85,0,MELD_H101_3T_FCD_00001,True,graph,1,0,True,14560.0
86,0,MELD_H101_3T_FCD_00003,True,graph,1,0,True,16062.0
87,0,MELD_H101_3T_FCD_00004,True,graph,1,0,True,6490.0
88,0,MELD_H101_3T_FCD_00006,True,graph,1,0,True,5556.0
89,0,MELD_H101_3T_FCD_00009,True,graph,1,0,True,10518.0
...,...,...,...,...,...,...,...,...
165,0,MELD_H101_3T_FCD_00141,True,graph,1,0,True,7987.0
166,0,MELD_H101_3T_FCD_00142,True,graph,1,0,True,8829.0
167,0,MELD_H101_3T_FCD_00144,True,graph,1,0,True,11467.0
168,0,MELD_H101_3T_FCD_00145,True,graph,1,0,True,9900.0


In [26]:
df


Unnamed: 0,index,ID,group,model,number TP clusters,number FP clusters,detected,size_pred
0,0,MELD_H101_3T_C_00002,False,graph,0,1,,
1,0,MELD_H101_3T_C_00005,False,graph,0,1,,
2,0,MELD_H101_3T_C_00008,False,graph,0,1,,
3,0,MELD_H101_3T_C_00011,False,graph,0,1,,
4,0,MELD_H101_3T_C_00012,False,graph,0,1,,
...,...,...,...,...,...,...,...,...
216,0,MELD_H28_3T_FCD_0019,True,graph,0,1,False,11741.0
217,0,MELD_H28_3T_FCD_0020,True,graph,1,0,True,10750.0
218,0,MELD_H28_3T_FCD_0022,True,graph,1,0,True,29269.0
219,0,MELD_H28_3T_FCD_0023,True,graph,1,0,True,15185.0


In [24]:
#number clusters

print('MLP')
for quantile in [.5,.25,.75,1]:
    print(quantile,dfsub_mlp.groupby('group').quantile(quantile)['number FP clusters'])


print('MELD graph')
for quantile in [.5,.25,.75,1]:
    print(quantile,df.groupby('group').quantile(quantile)['number FP clusters'])



MLP
0.5 group
False    1.0
True     1.0
Name: number FP clusters, dtype: float64
0.25 group
False    0.0
True     0.0
Name: number FP clusters, dtype: float64
0.75 group
False    1.0
True     2.0
Name: number FP clusters, dtype: float64
1 group
False    16.0
True      7.0
Name: number FP clusters, dtype: float64
MELD graph
0.5 group
False    1.0
True     0.0
Name: number FP clusters, dtype: float64
0.25 group
False    1.0
True     0.0
Name: number FP clusters, dtype: float64
0.75 group
False    1.0
True     0.0
Name: number FP clusters, dtype: float64
1 group
False    2.0
True     5.0
Name: number FP clusters, dtype: float64


  print(quantile,dfsub_mlp.groupby('group').quantile(quantile)['number FP clusters'])
  print(quantile,dfsub_mlp.groupby('group').quantile(quantile)['number FP clusters'])
  print(quantile,dfsub_mlp.groupby('group').quantile(quantile)['number FP clusters'])
  print(quantile,dfsub_mlp.groupby('group').quantile(quantile)['number FP clusters'])
  print(quantile,df.groupby('group').quantile(quantile)['number FP clusters'])
  print(quantile,df.groupby('group').quantile(quantile)['number FP clusters'])
  print(quantile,df.groupby('group').quantile(quantile)['number FP clusters'])
  print(quantile,df.groupby('group').quantile(quantile)['number FP clusters'])


### with the csv results file

In [115]:
# # # for test dataset
# experiment_dir = '/rds/project/kw350/rds-kw350-meld/experiments_graph/kw350'
# model = '23-10-30_FOPF_dcop'
# # model = '23-10-30_MSBS_dcop_with_combat'
# # model='24-01-04_best_dcop_with_combat'
# # model='24-01-04_best_dcop'
# df_model = pd.read_csv(os.path.join(experiment_dir,model,
#                                           's_0','fold_all','results_best_model','test_results.csv'))

# # # for withheld sites
# # experiment_dir = '/rds/project/kw350/rds-kw350-meld/experiments_graph/kw350'
# # # model = '23-10-30_FOPF_dcop'
# # # model = '23-10-30_MSBS_dcop_with_combat'
# # model = '24-01-04_best_dcop_with_combat'

# # # df_model_h27 = pd.read_csv(os.path.join(experiment_dir,model,'s_0','fold_all', 'test_H27','results_best_model','test_results.csv'))
# # # df_model_h28 = pd.read_csv(os.path.join(experiment_dir,model,'s_0','fold_all', 'test_H28','results_best_model','test_results.csv'))
# # # df_model_h101 = pd.read_csv(os.path.join(experiment_dir,model,'s_0','fold_all', 'test_H101','results_best_model','test_results.csv'))
# # # df_model = pd.concat([df_model_h27, df_model_h28, df_model_h101])

# # df_model = pd.read_csv(os.path.join(experiment_dir,model,'s_0','fold_all', 'test_H27H28H101','results_best_model','test_results.csv'))

In [116]:
# df_model.groupby('group')['ID'].count()

group
False    193
True     260
Name: ID, dtype: int64

In [117]:
# df_model['model'] = ['graph' for x in df_model.iterrows()]
# df_model = df_model[['ID','group','detected','number FP clusters','number TP clusters', 'model']]
# df_model.head()


Unnamed: 0,ID,group,detected,number FP clusters,number TP clusters,model
0,MELD_H2_15T_FCD_0001,True,False,0,0,graph
1,MELD_H2_15T_FCD_0003,True,True,0,1,graph
2,MELD_H2_15T_FCD_0005,True,False,1,0,graph
3,MELD_H2_15T_FCD_0007,True,True,0,1,graph
4,MELD_H2_15T_FCD_0008,True,False,1,0,graph


In [118]:
# print('all together')
# dfsub = df_model.copy()
# sensitivity = np.mean(dfsub['detected'][dfsub['group']])
# specificity = (dfsub['number FP clusters'][dfsub['group']==0]>0).mean()
# total_detected = np.sum(dfsub['number TP clusters'][dfsub['group']])
# total_predicted = np.sum(dfsub['number FP clusters'][dfsub['group']])
# ppv = total_detected / (total_predicted + total_detected)
# print(np.round(sensitivity,2),np.round(1-specificity,2),np.round(ppv,2))

# try:
#     for site in ['H27','H28', 'H101']:
#         print(site)
#         dfsub = df_model[df_model['ID'].str.contains(site)]
#         sensitivity = np.mean(dfsub['detected'][dfsub['group']])
#         specificity = (dfsub['number FP clusters'][dfsub['group']==0]>0).mean()
#         total_detected = np.sum(dfsub['number TP clusters'][dfsub['group']])
#         total_predicted = np.sum(dfsub['number FP clusters'][dfsub['group']])
#         ppv = total_detected / (total_predicted + total_detected)
#         print(np.round(sensitivity,2),np.round(1-specificity,2),np.round(ppv,2))
# except:
#     pass

all together
0.68 0.7 0.66
H27
nan nan nan
H28
nan nan nan
H101
nan nan nan


  ppv = total_detected / (total_predicted + total_detected)


### add breakdown on test dataset

In [16]:
# for test dataset
experiment_dir = '/rds/project/kw350/rds-kw350-meld/experiments_graph/kw350'
model = '23-10-30_LVHZ_dcp'
df_model = pd.read_csv(os.path.join(experiment_dir,model,
                                          's_0','fold_all','results_best_model','test_results.csv'))

cohort= MeldCohort(hdf5_file_root='{site_code}_{group}_featurematrix_combat_6_kernels.hdf5', dataset='MELD_dataset_NewSiteH27H28H101.csv')


In [17]:
# add demographic
age_array=[]
sex_array=[]
histo_array=[]
site_array=[]
scanner_array=[]
flair_array=[]
group_array=[]
sf_array=[]
mri_negative_array=[]
for subject in df_model['ID']:
    subj = MeldSubject(subject, cohort)
    age, sex, histo, site, sf, mri_negative = subj.get_demographic_features(["Age at preoperative", "Sex", "Histology", "Site", "Seizure free", "Ever reported MRI negative"])
    scanner_array.append(subj.scanner)
    age_array.append(age)
    sex_array.append(sex)
    histo_array.append(histo)
    site_array.append(site)
    sf_array.append(sf)
    flair_array.append(subj.has_flair)
    mri_negative_array.append(mri_negative)

    
df_model['Age at preoperative']=age_array
df_model['Sex']=sex_array
df_model['Histology']=histo_array
df_model['Site']=site_array
df_model['Scanner']=scanner_array
df_model['FLAIR']=flair_array
df_model['Seizure free']=sf_array
df_model['Ever reported MRI negative'] = mri_negative_array

df_model.head()

Unnamed: 0,ID,group,detected,number FP clusters,number TP clusters,tp,fp,fn,tn,dice lesional,dice non-lesional,Age at preoperative,Sex,Histology,Site,Scanner,FLAIR,Seizure free,Ever reported MRI negative
0,MELD_H2_15T_FCD_0001,True,False,0,0,0,0,271,293533,3.690037e-18,0.999539,20.0,1.0,,H2,15T,False,1.0,0.0
1,MELD_H2_15T_FCD_0003,True,True,1,1,3826,24434,74,265470,0.2379353,0.955877,10.0,0.0,,H2,15T,False,,0.0
2,MELD_H2_15T_FCD_0005,True,False,2,0,0,20027,262,273515,4.928779e-20,0.964237,20.0,1.0,,H2,15T,False,,0.0
3,MELD_H2_15T_FCD_0007,True,True,0,1,2058,3928,205,287613,0.4989696,0.992866,4.0,1.0,FCD_2B,H2,15T,False,1.0,0.0
4,MELD_H2_15T_FCD_0008,True,True,0,1,103,52023,867,240811,0.003879765,0.90105,10.0,1.0,,H2,15T,False,,0.0


In [18]:
pat = df_model[df_model['group']==True]

In [19]:
disp_df=pd.DataFrame(100*pat.groupby('Scanner').mean()['detected']).round(1)
disp_df['count'] = pat.groupby('Scanner').count()['detected']
disp_df

Unnamed: 0_level_0,detected,count
Scanner,Unnamed: 1_level_1,Unnamed: 2_level_1
15T,62.5,56
3T,72.5,204


In [20]:
disp_df=pd.DataFrame(100*pat.groupby(['Scanner','FLAIR']).mean()['detected']).round(1)
disp_df['count'] = pat.groupby(['Scanner','FLAIR']).count()['detected']
disp_df

Unnamed: 0_level_0,Unnamed: 1_level_0,detected,count
Scanner,FLAIR,Unnamed: 2_level_1,Unnamed: 3_level_1
15T,False,58.3,36
15T,True,70.0,20
3T,False,71.9,114
3T,True,73.3,90


In [21]:
disp_df=pd.DataFrame(100*pat.groupby(['Seizure free']).mean()['detected']).round(1)
disp_df['count'] = pat.groupby(['Seizure free']).count()['detected']
disp_df

Unnamed: 0_level_0,detected,count
Seizure free,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,64.7,51
1.0,79.2,106


In [22]:
disp_df=pd.DataFrame(100*pat.groupby(['Sex']).mean()['detected']).round(1)
disp_df['count'] = pat.groupby(['Sex']).count()['detected']
disp_df

Unnamed: 0_level_0,detected,count
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,64.0,125
1.0,76.3,135


In [23]:
pat['Histology'] = pat['Histology'].fillna('not available')
disp_df=pd.DataFrame(100*pat.groupby(['Histology']).mean()['detected']).round(1)
disp_df['n patients'] = pat.groupby(['Histology']).count()['detected']
disp_df.rename(columns={'detected':'% Detected'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pat['Histology'] = pat['Histology'].fillna('not available')


Unnamed: 0_level_0,% Detected,n patients
Histology,Unnamed: 1_level_1,Unnamed: 2_level_1
FCD_1,84.6,13
FCD_2A,75.4,57
FCD_2B,76.3,93
FCD_3,75.0,8
not available,58.4,89


In [24]:
pat['Histology']

0      not available
1      not available
2      not available
3             FCD_2B
4      not available
           ...      
448    not available
449    not available
450           FCD_2B
451           FCD_2A
452            FCD_1
Name: Histology, Length: 260, dtype: object

In [25]:
pat['Histology']

0      not available
1      not available
2      not available
3             FCD_2B
4      not available
           ...      
448    not available
449    not available
450           FCD_2B
451           FCD_2A
452            FCD_1
Name: Histology, Length: 260, dtype: object

In [26]:
disp_df=pd.DataFrame(100*pat.groupby(['Ever reported MRI negative']).mean()['detected']).round(1)
disp_df['count'] = pat.groupby(['Ever reported MRI negative']).count()['detected']
disp_df

Unnamed: 0_level_0,detected,count
Ever reported MRI negative,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,72.8,180
1.0,65.0,80


In [28]:
pat['paediatric'] = pat['Age at preoperative']<18
disp_df=pd.DataFrame(100*pat.groupby(['paediatric']).mean()['detected']).round(1)
disp_df['count'] = pat.groupby(['paediatric']).count()['detected']
disp_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pat['paediatric'] = pat['Age at preoperative']<18


Unnamed: 0_level_0,detected,count
paediatric,Unnamed: 1_level_1,Unnamed: 2_level_1
False,69.5,131
True,71.3,129


In [29]:
pat['Histology'] = pat['Histology'].fillna('not available')
pat['Histology']= pat['Histology']=='not available'
disp_df=pd.DataFrame(100*pat.groupby(['Histology']).mean()['detected']).round(1)
disp_df['n patients'] = pat.groupby(['Histology']).count()['detected']
disp_df.rename(columns={'detected':'% Detected'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pat['Histology'] = pat['Histology'].fillna('not available')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pat['Histology']= pat['Histology']=='not available'


Unnamed: 0_level_0,% Detected,n patients
Histology,Unnamed: 1_level_1,Unnamed: 2_level_1
False,76.6,171
True,58.4,89
