In [7]:
import matplotlib.pyplot as plt
import nibabel
import numpy as np
import pandas as pd
from pathlib import Path
from skimage.transform import rescale
from tqdm import tqdm
import os

In [31]:
import scipy
def compute_kl(img, brain, mask, _bin_heuristics = 'sturges'):
    size, _bins = np.histogram(img[brain.astype(bool)], bins = _bin_heuristics)
    bins = [(_bins[i]+_bins[i+1])/2 for i in range(len(_bins)-1)]

    size_healthy, bin_edges = np.histogram(img[brain.astype(bool)^mask.astype(bool)].reshape(-1), bins=_bins)
    size_tumor, _ = np.histogram(img[mask.astype(bool)].reshape(-1), bins = _bins)

    size_healthy = np.round(size_healthy/size_healthy.sum(), 5)
    size_tumor = np.round(size_tumor/size_tumor.sum(), 5)
    
    size_healthy = np.where(size_healthy>10e-6, size_healthy, 10e-6)
    size_tumor = np.where(size_tumor>10e-6, size_tumor, 10e-6)

    kl_dist = 0
    for h,t in zip(size_healthy, size_tumor):
        kl_dist += h * np.log2(h/t)

#     kl_dist = scipy.spatial.distance.jensenshannon(size_healthy, size_tumor)
        
            
    return kl_dist, size_healthy, size_tumor, _bins

In [32]:
def compute_emd(img, brain, mask):
    size, _bins = np.histogram(img[brain.astype(bool)], bins = 100)
    bins = [(_bins[i]+_bins[i+1])/2 for i in range(len(_bins)-1)]

    size_healthy, _ = np.histogram(img[brain.astype(bool)^mask.astype(bool)].reshape(-1), bins=_bins)
    size_tumor, _ = np.histogram(img[mask.astype(bool)].reshape(-1), bins=_bins -1 )

#     size_healthy = np.round(size_healthy/size_healthy.sum(), 5)
#     size_tumor = np.round(size_tumor/size_tumor.sum(), 5)
    
#     size_healthy = np.where(size_healthy>10e-6, size_healthy, 10e-6)
#     size_tumor = np.where(size_tumor>10e-6, size_tumor, 10e-6)

    emd = scipy.stats.wasserstein_distance(size_healthy, size_tumor)
            
    return emd, size_healthy, size_tumor, _bins

In [33]:
dataset = 'bgpd'
main_img = 'FLAIR.nii.gz'
label_name = 'mask_GTV_FLAIR.nii.gz'
mask_name = 'FLAIR_mask.nii.gz'

In [34]:
os.listdir('/anvar/public_datasets/preproc_study/schw/5_ss_shared/VS-SEG-239')

['T2.nii.gz', 'T1_mask.nii.gz', 'T1.nii.gz', 'T1_SEG.nii.gz']

In [35]:
root = Path('/anvar/public_datasets/preproc_study/{}/4a_resamp/'.format(dataset)) 

all_kl = []

for patient in tqdm(root.glob('*')):
    if patient.is_dir():
        img = nibabel.load(patient / main_img).get_fdata()
        mask = nibabel.load(patient / label_name).get_fdata()
        brain = nibabel.load('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset) +
                             str(patient).split('/')[-1] +'/' + mask_name).get_fdata()
        
        kl, _, _, _ = compute_kl(img, brain, mask)
        all_kl.append(kl)

180it [03:02,  1.01s/it]


In [36]:
root2 = Path('/anvar/public_datasets/preproc_study/{}/4d_susan'.format(dataset))

all_kl_susan = []

for patient in tqdm(root2.glob('*')):
    if patient.is_dir():
        try:
            img = nibabel.load(patient / main_img).get_fdata()
            mask = nibabel.load(patient / label_name).get_fdata()
            brain = nibabel.load('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset) +
                                 str(patient).split('/')[-1] +'/' + mask_name).get_fdata()


            kl, _, _, _ = compute_kl(img, brain, mask)
            all_kl_susan.append(kl)
        except:
            pass

181it [02:27,  1.23it/s]


In [37]:
root2 = Path('/anvar/public_datasets/preproc_study/{}/6_hist/6_hist_fold_0/'.format(dataset))

all_kl_hist = []

for patient in tqdm(root2.glob('*')):
    if patient.is_dir():
        try:
            img = nibabel.load(patient / main_img).get_fdata()
            mask = nibabel.load(patient / label_name).get_fdata()
            brain = nibabel.load('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset) +
                                 str(patient).split('/')[-1] +'/' + mask_name).get_fdata()

            kl, _, _, _ = compute_kl(img, brain, mask)
            all_kl_hist.append(kl)
        except:
            pass

180it [04:05,  1.36s/it]


In [38]:
root2 = Path('/anvar/public_datasets/preproc_study/{}/4b_n4/'.format(dataset))

all_kl_n4 = []

for patient in tqdm(root2.glob('*')):
    if patient.is_dir():
        try:
            img = nibabel.load(patient / main_img).get_fdata()
            mask = nibabel.load(patient / label_name).get_fdata()
            brain = nibabel.load('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset) +
                                 str(patient).split('/')[-1] +'/' + mask_name).get_fdata()

            kl, _, _, _ = compute_kl(img, brain, mask)
            all_kl_n4.append(kl)
        except:
            pass

181it [03:12,  1.07s/it]


In [39]:
root2 = Path('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset))

all_kl_ss = []

for patient in tqdm(root2.glob('*')):
    if patient.is_dir():
        try:
            img = nibabel.load(patient / main_img).get_fdata()
            mask = nibabel.load(patient / label_name).get_fdata()
            brain = nibabel.load('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset) +
                                 str(patient).split('/')[-1] +'/' + mask_name).get_fdata()

            kl, _, _, _ = compute_kl(img, brain, mask)
            all_kl_ss.append(kl)
        except:
            pass

181it [02:54,  1.04it/s]


In [40]:
from researchpy import ttest

def ttest_pair(df_1, df_2, name_1 = 'one', name_2 = 'two', correction = None):
    return ttest(pd.Series(df_1), pd.Series(df_2), 
#                  group1_name = name_1,
#                  group2_name= name_2, 
                 equal_variances=False, paired=True, correction = 8).iloc[4].values[1]

In [41]:
pivot = pd.DataFrame(columns = ['Mean(STD)', 'P-value'])
pivot.at[0,'Mean(STD)'] =  str(np.round(np.mean(all_kl),3)) + ' (' + str(np.round(np.std(all_kl),3)) +')'
pivot.at[1,'Mean(STD)'] =  str(np.round(np.mean(all_kl_n4),3)) + ' (' + str(np.round(np.std(all_kl_n4),3)) +')'
pivot.at[1,'P-value'] =  str(np.round(ttest_pair(all_kl, all_kl_n4),3)) 
pivot.at[2,'Mean(STD)'] =  str(np.round(np.mean(all_kl_susan),3)) + ' (' + str(np.round(np.std(all_kl_susan),3)) +')'
pivot.at[2,'P-value'] =  str(np.round(ttest_pair(all_kl, all_kl_susan),3)) 
pivot.at[3,'Mean(STD)'] =  str(np.round(np.mean(all_kl_hist),3)) + ' (' + str(np.round(np.std(all_kl_hist),3)) +')'
pivot.at[3,'P-value'] =  str(np.round(ttest_pair(all_kl, all_kl_hist),3)) 
pivot.at[4,'Mean(STD)'] =  str(np.round(np.mean(all_kl_ss),3)) + ' (' + str(np.round(np.std(all_kl_ss),3)) +')'
pivot.at[4,'P-value'] =  str(np.round(ttest_pair(all_kl, all_kl_ss),3)) 

In [42]:
pivot

Unnamed: 0,Mean(STD),P-value
0,1.64 (1.015),
1,1.702 (1.045),0.0
2,1.702 (1.045),0.0
3,1.65 (1.029),0.001
4,1.643 (1.014),0.0


In [265]:
kl/emd/KS test

SyntaxError: invalid syntax (3956564256.py, line 1)

In [182]:
all_kl

[13570.210000000001,
 13209.220000000001,
 13187.890000000005,
 14531.640000000001,
 13168.18,
 14109.49,
 11549.510000000006,
 14619.429999999998,
 14776.499999999998,
 15403.82,
 15739.830000000002,
 15223.89,
 13538.099999999999,
 13746.699999999999,
 15460.8,
 11525.560000000001,
 16231.199999999999,
 14974.020000000002,
 13617.250000000002,
 14471.310000000003,
 14468.23,
 13586.669999999998,
 14090.940000000002,
 14435.500000000004,
 14834.03,
 13015.83,
 14710.0,
 13108.100000000002,
 15334.560000000003,
 15908.599999999997,
 13222.829999999998,
 13405.380000000003,
 15075.230000000003,
 15038.449999999997,
 16893.159999999996,
 14780.880000000001,
 14911.709999999997,
 14892.369999999999,
 14239.639999999998,
 13701.269999999997,
 15278.859999999999,
 14905.3,
 12999.409999999998,
 15848.04,
 14020.740000000003,
 11967.37,
 13562.099999999999,
 14714.670000000004,
 12697.37,
 12346.05,
 12925.609999999997,
 14891.299999999997,
 15728.060000000003,
 16083.080000000004,
 13791.76

In [None]:
np.histogram(orig_tumor, bins = 100, normed =True)