In [2]:
import os
import pickle
from glob import glob
import numpy as np
import torch
import pandas as pd
from fsl.data.image import Image
from fsl.utils.image.resample import resampleToPixdims
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path

sns.set()

In [3]:
def get_array(df,col):
    return df.loc[:][col].values

def stats_calc(df,col):
    arr = get_array(df,col)
    return np.min(arr), np.max(arr), np.mean(arr), np.median(arr), np.std(arr)

def plot_hist(df, col, title=None):
    array = get_array(df, col)
    fig, ax = plt.subplots(1,1,figsize=(8,6))
    ax.grid(axis='y', alpha = 0.75, b=True, which='major', linestyle='-')
    ax.hist(array, bins=50, color='blue', alpha=0.7, rwidth=0.85)
    ax.set_xlabel('Value')
    ax.set_ylabel('Frequency')
#     plt.title(title)
    fig.patch.set_facecolor('xkcd:white')
    _,_,mean,_,std = stats_calc(df, col)
    if col == 'min':
        thr = mean - 2 * std
    elif col=='max':
        thr = mean + 2 * std
    if col == 'min' or col =='max':
        ax.axvline(x=thr, color='r', linestyle='dashed', linewidth=2)
    else:
        ax.axvline(x=mean, color='r', linestyle='dashed', linewidth=2)
#     plt.savefig(title+'_'+col+'_Hist'+'.png', dpi=300, bbox_inches="tight")
    # plt.savefig(title+'_'+col+'_Hist'+'.png', dpi=300, facecolor='none', bbox_inches="tight")

def plot_bars(df, labels_list, title=None):
    array_list = get_array(df,labels_list)
#     fig = plt.figure(figsize=(8,6))
    fig, ax = plt.subplots(figsize=(8,6))
#     ax.set_title(title)
    ax.grid(axis='y', alpha = 0.75)
    ax.boxplot(array_list)
    ax.set_xticklabels(labels_list)
    # ax.set_xticklabels(['0', '1', '2','3','4','5', '25', '50', '75','95', '96', '97','98','99', '100'])
    plt.xlabel('Percentile')
    plt.ylabel('Values')
    plt.grid(axis='y', alpha = 0.75)
    fig.patch.set_facecolor('xkcd:white')
    # plt.savefig(title+'_'+'_Bar'+'.png', dpi=300, facecolor='none', bbox_inches="tight")
#     plt.savefig(title+'_'+'_Bar'+'.png', dpi=300, bbox_inches="tight")
    # plt.savefig(title+' UKBB'+'.png')

In [5]:
interesting_data_paths = [

    "T1_brain_to_MNI.pkl",
    "T1_brain_to_MNI_linear.pkl",
    "tractsNormSummed.pkl",
    "T1_GM_to_template_GM_mod.pkl",
    "T2_FLAIR_brain_to_MNI.pkl",
    "T2star_to_MNI.pkl",

    "dr_stage2_0.pkl",
    "dr_stage2_1.pkl",
    "dr_stage2_2.pkl",
    "dr_stage2_3.pkl",
    "dr_stage2_4.pkl",
    "dr_stage2_5.pkl",
    "dr_stage2_6.pkl",
    "dr_stage2_7.pkl",
    "dr_stage2_8.pkl",
    "dr_stage2_9.pkl",
    "dr_stage2_10.pkl",
    "dr_stage2_11.pkl",
    "dr_stage2_12.pkl",
    "dr_stage2_13.pkl",
    "dr_stage2_14.pkl",
    "dr_stage2_15.pkl",
    "dr_stage2_16.pkl",
    "dr_stage2_17.pkl",
    "dr_stage2_18.pkl",
    "dr_stage2_19.pkl",
    "dr_stage2_20.pkl",
    "dr_stage2_21.pkl",
    "dr_stage2_22.pkl",
    "dr_stage2_23.pkl",
    "dr_stage2_24.pkl",

    "zstat1.pkl",
    "zstat2.pkl",
    "zstat5.pkl",

    "cope1.pkl",
    "cope2.pkl",
    "cope5.pkl",

    "all_FA_skeletonised.pkl",
    "all_ICVF_skeletonised.pkl",
    "all_ISOVF_skeletonised.pkl",
    "all_L1_skeletonised.pkl",
    "all_L2_skeletonised.pkl",
    "all_L3_skeletonised.pkl",
    "all_MD_skeletonised.pkl",
    "all_MO_skeletonised.pkl",
    "all_OD_skeletonised.pkl",

    "all_FA.pkl",
    "all_ICVF.pkl",
    "all_ISOVF.pkl",
    "all_L1.pkl",
    "all_L2.pkl",
    "all_L3.pkl",
    "all_MD.pkl",
    "all_MO.pkl",
    "all_OD.pkl",

    "T1_to_MNI_warp_jac.pkl",
    "final_mask_to_MNI.pkl",
    
]

path_meaning = [

    "T1_nonlinear",
    "T1_linear",
    "tracts",
    "vbm",
    "T2_nonlinear",
    "swi",

    "rsfmri_0",
    "rsfmri_1",
    "rsfmri_2",
    "rsfmri_3",
    "rsfmri_4",
    "rsfmri_5",
    "rsfmri_6",
    "rsfmri_7",
    "rsfmri_8",
    "rsfmri_9",
    "rsfmri_10",
    "rsfmri_11",
    "rsfmri_12",
    "rsfmri_13",
    "rsfmri_14",
    "rsfmri_15",
    "rsfmri_16",
    "rsfmri_17",
    "rsfmri_18",
    "rsfmri_19",
    "rsfmri_20",
    "rsfmri_21",
    "rsfmri_22",
    "rsfmri_23",
    "rsfmri_24",

    "tfmri_1",
    "tfmri_2",
    "tfmri_5",

    "tfmri_c_1",
    "tfmri_c_2",
    "tfmri_c_5",

    "tbss_FA_s",
    "tbss_ICVF_s",
    "tbss_ISOVF_s",
    "tbss_L1_s",
    "tbss_L2_s",
    "tbss_L3_s",
    "tbss_MD_s",
    "tbss_MO_s",
    "tbss_OD_s",

    "tbss_FA",
    "tbss_ICVF",
    "tbss_ISOVF",
    "tbss_L1",
    "tbss_L2",
    "tbss_L3",
    "tbss_MD",
    "tbss_MO",
    "tbss_OD",

    "jacobian",
    "T2_lesions",
    
]



deterministic = [
    
    "T1_brain_to_MNI.pkl",
    "T1_brain_to_MNI_linear.pkl",
    "T2_FLAIR_brain_to_MNI.pkl",
#     "T2star_to_MNI.pkl",    # CHECK THIS!!!!!
]

data_paths_2mm = [

    "T1_GM_to_template_GM_mod.pkl",

    "dr_stage2_0.pkl",
    "dr_stage2_1.pkl",
    "dr_stage2_2.pkl",
    "dr_stage2_3.pkl",
    "dr_stage2_4.pkl",
    "dr_stage2_5.pkl",
    "dr_stage2_6.pkl",
    "dr_stage2_7.pkl",
    "dr_stage2_8.pkl",
    "dr_stage2_9.pkl",
    "dr_stage2_10.pkl",
    "dr_stage2_11.pkl",
    "dr_stage2_12.pkl",
    "dr_stage2_13.pkl",
    "dr_stage2_14.pkl",
    "dr_stage2_15.pkl",
    "dr_stage2_16.pkl",
    "dr_stage2_17.pkl",
    "dr_stage2_18.pkl",
    "dr_stage2_19.pkl",
    "dr_stage2_20.pkl",
    "dr_stage2_21.pkl",
    "dr_stage2_22.pkl",
    "dr_stage2_23.pkl",
    "dr_stage2_24.pkl",

    "zstat1.pkl",
    "zstat2.pkl",
    "zstat5.pkl",

    "cope1.pkl",
    "cope2.pkl",
    "cope5.pkl",
    
]

data_files = [

    "T1/T1_brain_to_MNI.nii.gz",
    "T1/T1_brain_to_MNI_linear.nii.gz",
    "dMRI/autoptx_preproc/tractsNormSummed.nii.gz",
    "T1/T1_vbm/T1_GM_to_template_GM_mod.nii.gz",
    "T2_FLAIR/T2_FLAIR_brain_to_MNI.nii.gz",
    "SWI/T2star_to_MNI.nii.gz",

    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",

    "fMRI/tfMRI.feat/reg_standard/stats/zstat1.nii.gz",
    "fMRI/tfMRI.feat/reg_standard/stats/zstat2.nii.gz",
    "fMRI/tfMRI.feat/reg_standard/stats/zstat5.nii.gz",

    "fMRI/tfMRI.feat/reg_standard/stats/cope1.nii.gz",
    "fMRI/tfMRI.feat/reg_standard/stats/cope2.nii.gz",
    "fMRI/tfMRI.feat/reg_standard/stats/cope5.nii.gz",

    "dMRI/TBSS/stats/all_FA_skeletonised.nii.gz",
    "dMRI/TBSS/stats/all_ICVF_skeletonised.nii.gz",
    "dMRI/TBSS/stats/all_ISOVF_skeletonised.nii.gz",
    "dMRI/TBSS/stats/all_L1_skeletonised.nii.gz",
    "dMRI/TBSS/stats/all_L2_skeletonised.nii.gz",
    "dMRI/TBSS/stats/all_L3_skeletonised.nii.gz",
    "dMRI/TBSS/stats/all_MD_skeletonised.nii.gz",
    "dMRI/TBSS/stats/all_MO_skeletonised.nii.gz",
    "dMRI/TBSS/stats/all_OD_skeletonised.nii.gz",

    "dMRI/TBSS/stats/all_FA.nii.gz",
    "dMRI/TBSS/stats/all_ICVF.nii.gz",
    "dMRI/TBSS/stats/all_ISOVF.nii.gz",
    "dMRI/TBSS/stats/all_L1.nii.gz",
    "dMRI/TBSS/stats/all_L2.nii.gz",
    "dMRI/TBSS/stats/all_L3.nii.gz",
    "dMRI/TBSS/stats/all_MD.nii.gz",
    "dMRI/TBSS/stats/all_MO.nii.gz",
    "dMRI/TBSS/stats/all_OD.nii.gz",

    "T1/transforms/T1_to_MNI_warp_jac.nii.gz",
    "T2_FLAIR/lesions/final_mask_to_MNI.nii.gz"
    
]

In [6]:
print(len(path_meaning))

57


In [45]:
scaling_values = {}

for idx, path in enumerate(interesting_data_paths):
    key = Path(path).stem
    
    if path in deterministic:
        scale_factor = 'norm99p'
    else:
        scale_factor = 'data_scale'
        
    if path in data_paths_2mm:
        resolution = '2mm'
    else:
        resolution = '1mm'
    
    data_file = data_files[idx]
    
    name = path_meaning[idx]
    data = pd.read_pickle(path)
    min_scale = data['min'].mean() - data['min'].std()*2
    max_scale = data['max'].mean() + data['max'].std()*2
    data_scale = max(abs(min_scale), abs(max_scale))
    norm99p = np.mean(data['norm99p'])
    scaling_values[key] = [name, min_scale, max_scale, data_scale, norm99p, scale_factor, resolution, data_file]
    
df_keys = ['name', 'min_scale', 'max_scale', 'data_scale', 'norm99p', 'scale_factor', 'resolution', 'data_file']
df = pd.DataFrame.from_dict(scaling_values, orient="index", columns=df_keys)
df.to_pickle('scaling_values.pkl')

In [46]:
scaling_values_df = pd.read_pickle('scaling_values.pkl')

In [47]:
scaling_values_df.loc['T1_brain_to_MNI'].min_scale

-89.20484050850203

In [48]:
scaling_values_df.head(10)

Unnamed: 0,name,min_scale,max_scale,data_scale,norm99p,scale_factor,resolution,data_file
T1_brain_to_MNI,T1_nonlinear,-89.204841,4087.455555,4087.455555,5.550106,norm99p,1mm,T1/T1_brain_to_MNI.nii.gz
T1_brain_to_MNI_linear,T1_linear,-431.418693,4092.229716,4092.229716,5.678574,norm99p,1mm,T1/T1_brain_to_MNI_linear.nii.gz
tractsNormSummed,tracts,0.0,0.412435,0.412435,25.47699,data_scale,1mm,dMRI/autoptx_preproc/tractsNormSummed.nii.gz
T1_GM_to_template_GM_mod,vbm,-0.000299,3.555727,3.555727,11.219166,data_scale,2mm,T1/T1_vbm/T1_GM_to_template_GM_mod.nii.gz
T2_FLAIR_brain_to_MNI,T2_nonlinear,-144.648843,1702.766756,1702.766756,5.238671,norm99p,1mm,T2_FLAIR/T2_FLAIR_brain_to_MNI.nii.gz
T2star_to_MNI,swi,0.0,76.58003,76.58003,5.023419,data_scale,1mm,SWI/T2star_to_MNI.nii.gz
dr_stage2_0,rsfmri_0,-13.733222,20.728238,20.728238,54.536475,data_scale,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
dr_stage2_1,rsfmri_1,-14.317963,24.217185,24.217185,42.863123,data_scale,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
dr_stage2_2,rsfmri_2,-13.529909,22.798476,22.798476,45.866659,data_scale,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
dr_stage2_3,rsfmri_3,-10.426248,13.635602,13.635602,530.537149,data_scale,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz


In [8]:
sf = scaling_values_df[scaling_values_df.name=='T1_nonlinear'].scale_factor.to_numpy()

In [9]:
sf[0]

'norm99p'

In [10]:
scaling_values_df[scaling_values_df.name=='T1_nonlinear'].scale_factor[0]

'norm99p'

In [11]:
# mod = 'T1_nonlinear'
mod = 'rsfmri_0'

In [12]:
sv = scaling_values_df[scaling_values_df.name==mod][scaling_values_df[scaling_values_df.name==mod].scale_factor[0]][0]

In [13]:
sv

20.728237795548115

In [49]:
scaling_values_df

Unnamed: 0,name,min_scale,max_scale,data_scale,norm99p,scale_factor,resolution,data_file
T1_brain_to_MNI,T1_nonlinear,-89.204841,4087.455555,4087.455555,5.550106,norm99p,1mm,T1/T1_brain_to_MNI.nii.gz
T1_brain_to_MNI_linear,T1_linear,-431.418693,4092.229716,4092.229716,5.678574,norm99p,1mm,T1/T1_brain_to_MNI_linear.nii.gz
tractsNormSummed,tracts,0.0,0.412435,0.412435,25.47699,data_scale,1mm,dMRI/autoptx_preproc/tractsNormSummed.nii.gz
T1_GM_to_template_GM_mod,vbm,-0.000299,3.555727,3.555727,11.219166,data_scale,2mm,T1/T1_vbm/T1_GM_to_template_GM_mod.nii.gz
T2_FLAIR_brain_to_MNI,T2_nonlinear,-144.648843,1702.766756,1702.766756,5.238671,norm99p,1mm,T2_FLAIR/T2_FLAIR_brain_to_MNI.nii.gz
T2star_to_MNI,swi,0.0,76.58003,76.58003,5.023419,data_scale,1mm,SWI/T2star_to_MNI.nii.gz
dr_stage2_0,rsfmri_0,-13.733222,20.728238,20.728238,54.536475,data_scale,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
dr_stage2_1,rsfmri_1,-14.317963,24.217185,24.217185,42.863123,data_scale,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
dr_stage2_2,rsfmri_2,-13.529909,22.798476,22.798476,45.866659,data_scale,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
dr_stage2_3,rsfmri_3,-10.426248,13.635602,13.635602,530.537149,data_scale,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz


In [50]:
scaling_values = {}

for idx, path in enumerate(interesting_data_paths):
    key = Path(path).stem
    name = path_meaning[idx]
    data = pd.read_pickle(path)
    min_scale = data['min'].mean() - data['min'].std()*2
    max_scale = data['max'].mean() + data['max'].std()*2
    data_scale = max(abs(min_scale), abs(max_scale))
    norm99p = np.mean(data['norm99p'])
    if path in deterministic:
        scale_factor = norm99p
    else:
        scale_factor = data_scale
    
    if path in data_paths_2mm:
        resolution = '2mm'
    else:
        resolution = '1mm'
        
    data_file = data_files[idx]
        
    scaling_values[name] = [scale_factor, resolution, data_file]
    
df_keys = ['scale_factor', 'resolution', 'data_file']
df = pd.DataFrame.from_dict(scaling_values, orient="index", columns=df_keys)
df.to_pickle('scaling_values_simple.pkl')

In [51]:
scaling_values_df_small = pd.read_pickle('scaling_values_simple.pkl')

In [52]:
scaling_values_df_small.head(7)

Unnamed: 0,scale_factor,resolution,data_file
T1_nonlinear,5.550106,1mm,T1/T1_brain_to_MNI.nii.gz
T1_linear,5.678574,1mm,T1/T1_brain_to_MNI_linear.nii.gz
tracts,0.412435,1mm,dMRI/autoptx_preproc/tractsNormSummed.nii.gz
vbm,3.555727,2mm,T1/T1_vbm/T1_GM_to_template_GM_mod.nii.gz
T2_nonlinear,5.238671,1mm,T2_FLAIR/T2_FLAIR_brain_to_MNI.nii.gz
swi,76.58003,1mm,SWI/T2star_to_MNI.nii.gz
rsfmri_0,20.728238,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz


In [53]:
scaling_values_df.head(7)

Unnamed: 0,name,min_scale,max_scale,data_scale,norm99p,scale_factor,resolution,data_file
T1_brain_to_MNI,T1_nonlinear,-89.204841,4087.455555,4087.455555,5.550106,norm99p,1mm,T1/T1_brain_to_MNI.nii.gz
T1_brain_to_MNI_linear,T1_linear,-431.418693,4092.229716,4092.229716,5.678574,norm99p,1mm,T1/T1_brain_to_MNI_linear.nii.gz
tractsNormSummed,tracts,0.0,0.412435,0.412435,25.47699,data_scale,1mm,dMRI/autoptx_preproc/tractsNormSummed.nii.gz
T1_GM_to_template_GM_mod,vbm,-0.000299,3.555727,3.555727,11.219166,data_scale,2mm,T1/T1_vbm/T1_GM_to_template_GM_mod.nii.gz
T2_FLAIR_brain_to_MNI,T2_nonlinear,-144.648843,1702.766756,1702.766756,5.238671,norm99p,1mm,T2_FLAIR/T2_FLAIR_brain_to_MNI.nii.gz
T2star_to_MNI,swi,0.0,76.58003,76.58003,5.023419,data_scale,1mm,SWI/T2star_to_MNI.nii.gz
dr_stage2_0,rsfmri_0,-13.733222,20.728238,20.728238,54.536475,data_scale,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz


In [54]:
mod = 'T1_nonlinear'
# mod = 'rsfmri_0'
sv = scaling_values_df_small.loc[mod].scale_factor

In [64]:
type(sv)

numpy.float64

In [56]:
re = scaling_values_df_small.loc[mod].resolution

In [57]:
re

'1mm'

In [58]:
scaling_values_df_small

Unnamed: 0,scale_factor,resolution,data_file
T1_nonlinear,5.550106,1mm,T1/T1_brain_to_MNI.nii.gz
T1_linear,5.678574,1mm,T1/T1_brain_to_MNI_linear.nii.gz
tracts,0.412435,1mm,dMRI/autoptx_preproc/tractsNormSummed.nii.gz
vbm,3.555727,2mm,T1/T1_vbm/T1_GM_to_template_GM_mod.nii.gz
T2_nonlinear,5.238671,1mm,T2_FLAIR/T2_FLAIR_brain_to_MNI.nii.gz
swi,76.58003,1mm,SWI/T2star_to_MNI.nii.gz
rsfmri_0,20.728238,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
rsfmri_1,24.217185,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
rsfmri_2,22.798476,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
rsfmri_3,13.635602,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz


In [59]:
len(scaling_values_df_small)

57

In [60]:
scaling_values_df_small.to_csv('scaling_values_simple.csv')

In [61]:
del scaling_values_df_small

In [62]:
scaling_values_df_small = pd.read_csv('scaling_values_simple.csv', index_col=0)

In [63]:
scaling_values_df_small

Unnamed: 0,scale_factor,resolution,data_file
T1_nonlinear,5.550106,1mm,T1/T1_brain_to_MNI.nii.gz
T1_linear,5.678574,1mm,T1/T1_brain_to_MNI_linear.nii.gz
tracts,0.412435,1mm,dMRI/autoptx_preproc/tractsNormSummed.nii.gz
vbm,3.555727,2mm,T1/T1_vbm/T1_GM_to_template_GM_mod.nii.gz
T2_nonlinear,5.238671,1mm,T2_FLAIR/T2_FLAIR_brain_to_MNI.nii.gz
swi,76.58003,1mm,SWI/T2star_to_MNI.nii.gz
rsfmri_0,20.728238,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
rsfmri_1,24.217185,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
rsfmri_2,22.798476,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz
rsfmri_3,13.635602,2mm,fMRI/rfMRI_25.dr/dr_stage2.nii.gz


In [69]:
modality_flag = 'rsfmri_0'

In [75]:
int(modality_flag.rsplit('_', 1)[1])

0

In [68]:
a = 'jacobian'
a.rsplit('_', 1)

['jacobian']