In [1]:
import pandas as pd
import numpy as np
import os
#import meld_classifier.old_hdf5_io as hio
import matplotlib.pyplot as plt
import meld_classifier.paths as paths
from statsmodels.stats.proportion import proportion_confint
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from statsmodels.stats.multitest import multipletests
from meld_classifier.meld_cohort import MeldCohort, MeldSubject


Setting MELD_DATA_PATH to /home/kw350/rds/rds-kw350-meld/meld_data/Data
Setting BASE_PATH to /home/kw350/rds/rds-kw350-meld/meld_data/Data
Setting EXPERIMENT_PATH to /rds/user/kw350/rds-kw350-meld/experiments/kw350/
Setting FS_SUBJECTS_PATH to /home/kw350/rds/rds-kw350-meld/meld_data/Data/output/fs_outputs


In [2]:
demographics_file = '/rds/project/kw350/rds-kw350-meld/meld_data/Data/demographics_qc_allgroups.csv'


In [3]:
def create_df(filename_template,folds = np.arange(10)):
    dfs=[]
    for fold in folds:
        test_df=pd.read_csv(filename_template.format(fold,fold),index_col=False)
        test_df.drop_duplicates(inplace=True,ignore_index=True)
        test_df.groupby('group').mean()
        fp = test_df['n_clusters']<1
        test_df['any']=fp
        test_df.border=test_df.border>100
        #test_df.detected+=test_df.border
        test_df=test_df.dropna()
        test_df.detected=test_df.detected.astype(int)
        dfs.append(test_df)
    return pd.concat(dfs)

#create results table
def results_row(df):
    # create results table
    #%detected
    #mean = pd.DataFrame(df.groupby('group',as_index=False).mean())
    
    
    detected = np.round(np.mean(df.detected[df.group])*100).astype(int)
    detected_plus = np.round(np.mean(df.detected[df.group]+df.border[df.group])*100).astype(int)
    #np.round(mean['detected'][mean['group']==True].values[0]*100).astype(int)
    
    median = pd.DataFrame(df.groupby('group',as_index=False).median())
    pat_med = median['n_clusters'][median['group']==True].values[0]
    pat_iqr = [np.percentile(df[df['group']==True]['n_clusters'].values,25),
               np.percentile(df[df['group']==True]['n_clusters'].values,75)]
    try:
        cont_med = median['n_clusters'][median['group']==False].values[0]
        cont_iqr = [np.percentile(df[df['group']==False]['n_clusters'].values,25),
               np.percentile(df[df['group']==False]['n_clusters'].values,75)]
        cont_spec = np.round(np.mean(df[df['group']==False]['n_clusters']==0)*100).astype(int)
        row = [ f'{detected_plus}% ({df.detected.sum()+df.border.sum()}/{df.group.sum()})',
           f'{detected}% ({df.detected.sum()}/{df.group.sum()})',
          f'{pat_med} ({pat_iqr[0]}-{pat_iqr[1]})',
          f'{cont_spec}% ({(df[df["group"]==False]["n_clusters"]==0).sum()}/{(df["group"]==0).sum()})',
          f'{cont_med} ({cont_iqr[0]}-{cont_iqr[1]})']
    except IndexError:
        row = [ f'{detected_plus}% ({df.detected.sum()+df.border.sum()}/{df.group.sum()})',
           f'{detected}% ({df.detected.sum()}/{df.group.sum()})',
          f'{pat_med} ({pat_iqr[0]}-{pat_iqr[1]})',
          'NA',
          'NA']
    
    
    
    
    return row

In [4]:
exps = ["c","c+a","c+a+n"]
rows_of_results=[]
for ei,exp in enumerate(exps):
    filename_template = os.path.join('/rds/project/kw350/rds-kw350-meld',
                      f'experiments/kw350/iteration_22-03-0{ei+1}','ensemble_22-03-31/fold_{}/',
                      'results/test_results_{}'+'_ensemble_0.csv')
    rows_of_results.append(results_row(create_df(filename_template)))

In [5]:
data = {'':  ['Sensitivity+ (Percentage of patients detected)',
              'Sensitivity (Percentage of patients detected)',
             'Number of clusters in patients (Median (IQR))',
             'Specificity (Percentage of controls with zero clusters',
             'Number of clusters in controls (Median (IQR))',
             ],
      "Basic features": rows_of_results[2],
       "Basic features + asymmetry":rows_of_results[1],
       "Basic features + asymmetry + intersubject normalisation":rows_of_results[0],
       }
df = pd.DataFrame(data)
df=df.set_index('')
df.transpose()

Unnamed: 0,Sensitivity+ (Percentage of patients detected),Sensitivity (Percentage of patients detected),Number of clusters in patients (Median (IQR)),Specificity (Percentage of controls with zero clusters,Number of clusters in controls (Median (IQR))
Basic features,54% (150/278),44% (121/278),3.0 (2.0-6.0),17% (30/180),2.0 (1.0-3.0)
Basic features + asymmetry,64% (177/278),55% (153/278),2.0 (1.0-4.0),41% (74/180),1.0 (0.0-2.0)
Basic features + asymmetry + intersubject normalisation,65% (181/278),59% (164/278),2.0 (1.0-5.0),44% (79/180),1.0 (0.0-2.0)


In [8]:
exps = ["fwhm0",
        "fwhm3",
        "fwhm5",
        "fwhmref",
        "fwhm15",
        "fwhm20",
        "fwhm25",
       ]
rows_of_results=[]
for ei,exp in enumerate(exps):
    filename_template = os.path.join('/rds/project/kw350/rds-kw350-meld',
                      f'experiments/kw350/iteration_22-04-0{ei+1}','ensemble_22-03-31/fold_{}/',
                      'results/test_results_{}'+'_ensemble_0.csv')
    rows_of_results.append(results_row(create_df(filename_template,
                                                folds = np.arange(10))))
    

In [9]:
data = {'':  ['Sensitivity+ (Percentage of patients detected)',
              'Sensitivity (Percentage of patients detected)',
             'Number of clusters in patients (Median (IQR))',
             'Specificity (Percentage of controls with zero clusters',
             'Number of clusters in controls (Median (IQR))',
             ],
      
       }
for ei,e in enumerate(exps):
    data[e]=rows_of_results[ei]
df = pd.DataFrame(data)
df=df.set_index('')
df.transpose()

Unnamed: 0,Sensitivity+ (Percentage of patients detected),Sensitivity (Percentage of patients detected),Number of clusters in patients (Median (IQR)),Specificity (Percentage of controls with zero clusters,Number of clusters in controls (Median (IQR))
fwhm0,64% (177/278),58% (161/278),2.0 (1.0-3.0),55% (99/180),0.0 (0.0-1.0)
fwhm3,59% (164/278),51% (142/278),2.0 (1.0-4.0),57% (103/180),0.0 (0.0-1.0)
fwhm5,63% (176/278),58% (162/278),2.0 (1.0-5.0),43% (78/180),1.0 (0.0-2.0)
fwhmref,66% (183/278),59% (165/278),2.0 (1.0-4.0),46% (83/180),1.0 (0.0-2.0)
fwhm15,63% (176/278),56% (156/278),2.0 (1.0-3.0),50% (90/180),0.5 (0.0-2.0)
fwhm20,64% (178/278),56% (156/278),2.0 (1.0-3.0),46% (83/180),1.0 (0.0-2.0)
fwhm25,58% (161/278),53% (147/278),2.0 (1.0-3.0),56% (100/180),0.0 (0.0-1.25)
