In [1]:
import matplotlib.pyplot as plt
import nibabel
import numpy as np
import pandas as pd
from pathlib import Path
from skimage.transform import rescale
from tqdm import tqdm
import os

In [16]:
import scipy
def compute_kl(img, brain, mask, _bin_heuristics = 'sturges'):
    size, _bins = np.histogram(img[brain.astype(bool)], bins = _bin_heuristics)
    bins = [(_bins[i]+_bins[i+1])/2 for i in range(len(_bins)-1)]

    size_healthy, bin_edges = np.histogram(img[brain.astype(bool)^mask.astype(bool)].reshape(-1), bins=_bins)
    size_tumor, _ = np.histogram(img[mask.astype(bool)].reshape(-1), bins = _bins)

    size_healthy = np.round(size_healthy/size_healthy.sum(), 5)
    size_tumor = np.round(size_tumor/size_tumor.sum(), 5)
    
    size_healthy = np.where(size_healthy>10e-6, size_healthy, 10e-6)
    size_tumor = np.where(size_tumor>10e-6, size_tumor, 10e-6)

#     kl_dist = 0
#     for h,t in zip(size_healthy, size_tumor):
#         kl_dist += h * np.log2(h/t)

    kl_dist = scipy.spatial.distance.jensenshannon(size_healthy, size_tumor)
        
            
    return kl_dist, size_healthy, size_tumor, _bins

In [17]:
def compute_emd(img, brain, mask):
    size, _bins = np.histogram(img[brain.astype(bool)], bins = 100)
    bins = [(_bins[i]+_bins[i+1])/2 for i in range(len(_bins)-1)]

    size_healthy, _ = np.histogram(img[brain.astype(bool)^mask.astype(bool)].reshape(-1), bins=_bins)
    size_tumor, _ = np.histogram(img[mask.astype(bool)].reshape(-1), bins=_bins -1 )

#     size_healthy = np.round(size_healthy/size_healthy.sum(), 5)
#     size_tumor = np.round(size_tumor/size_tumor.sum(), 5)
    
#     size_healthy = np.where(size_healthy>10e-6, size_healthy, 10e-6)
#     size_tumor = np.where(size_tumor>10e-6, size_tumor, 10e-6)

    emd = scipy.stats.wasserstein_distance(size_healthy, size_tumor)
            
    return emd, size_healthy, size_tumor, _bins

In [18]:
dataset = 'bgpd'
main_img = 'FLAIR.nii.gz'
label_name = 'mask_GTV_FLAIR.nii.gz'
mask_name = 'FLAIR_mask.nii.gz'

In [19]:
os.listdir('/anvar/public_datasets/preproc_study/schw/5_ss_shared/VS-SEG-239')

['T2.nii.gz', 'T1_mask.nii.gz', 'T1.nii.gz', 'T1_SEG.nii.gz']

In [20]:
root = Path('/anvar/public_datasets/preproc_study/{}/4a_resamp/'.format(dataset)) 

all_kl = []

for patient in tqdm(root.glob('*')):
    if patient.is_dir():
        img = nibabel.load(patient / main_img).get_fdata()
        mask = nibabel.load(patient / label_name).get_fdata()
        brain = nibabel.load('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset) +
                             str(patient).split('/')[-1] +'/' + mask_name).get_fdata()
        
        kl, _, _, _ = compute_kl(img, brain, mask)
        all_kl.append(kl)

180it [02:35,  1.16it/s]


In [21]:
root2 = Path('/anvar/public_datasets/preproc_study/{}/4d_susan'.format(dataset))

all_kl_susan = []

for patient in tqdm(root2.glob('*')):
    if patient.is_dir():
        try:
            img = nibabel.load(patient / main_img).get_fdata()
            mask = nibabel.load(patient / label_name).get_fdata()
            brain = nibabel.load('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset) +
                                 str(patient).split('/')[-1] +'/' + mask_name).get_fdata()


            kl, _, _, _ = compute_kl(img, brain, mask)
            all_kl_susan.append(kl)
        except:
            pass

181it [02:12,  1.36it/s]


In [22]:
root2 = Path('/anvar/public_datasets/preproc_study/{}/6_hist/6_hist_fold_0/'.format(dataset))

all_kl_hist = []

for patient in tqdm(root2.glob('*')):
    if patient.is_dir():
        try:
            img = nibabel.load(patient / main_img).get_fdata()
            mask = nibabel.load(patient / label_name).get_fdata()
            brain = nibabel.load('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset) +
                                 str(patient).split('/')[-1] +'/' + mask_name).get_fdata()

            kl, _, _, _ = compute_kl(img, brain, mask)
            all_kl_hist.append(kl)
        except:
            pass

180it [03:12,  1.07s/it]


In [23]:
root2 = Path('/anvar/public_datasets/preproc_study/{}/4b_n4/'.format(dataset))

all_kl_n4 = []

for patient in tqdm(root2.glob('*')):
    if patient.is_dir():
        try:
            img = nibabel.load(patient / main_img).get_fdata()
            mask = nibabel.load(patient / label_name).get_fdata()
            brain = nibabel.load('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset) +
                                 str(patient).split('/')[-1] +'/' + mask_name).get_fdata()

            kl, _, _, _ = compute_kl(img, brain, mask)
            all_kl_n4.append(kl)
        except:
            pass

180it [02:32,  1.18it/s]


In [24]:
root2 = Path('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset))

all_kl_ss = []

for patient in tqdm(root2.glob('*')):
    if patient.is_dir():
        try:
            img = nibabel.load(patient / main_img).get_fdata()
            mask = nibabel.load(patient / label_name).get_fdata()
            brain = nibabel.load('/anvar/public_datasets/preproc_study/{}/5_ss_shared/'.format(dataset) +
                                 str(patient).split('/')[-1] +'/' + mask_name).get_fdata()

            kl, _, _, _ = compute_kl(img, brain, mask)
            all_kl_ss.append(kl)
        except:
            pass

181it [02:11,  1.37it/s]


In [25]:
from researchpy import ttest

def ttest_pair(df_1, df_2, name_1 = 'one', name_2 = 'two', correction = None):
    return ttest(pd.Series(df_1), pd.Series(df_2), 
#                  group1_name = name_1,
#                  group2_name= name_2, 
                 equal_variances=False, paired=True,)[1].iloc[4].values[1]

In [26]:
pivot = pd.DataFrame(columns = ['Mean(STD)', 'P-value'])
pivot.at[0,'Mean(STD)'] =  str(np.round(np.mean(all_kl),3)) + ' (' + str(np.round(np.std(all_kl),3)) +')'
pivot.at[1,'Mean(STD)'] =  str(np.round(np.mean(all_kl_n4),3)) + ' (' + str(np.round(np.std(all_kl_n4),3)) +')'
pivot.at[1,'P-value'] =  str(np.round(ttest_pair(all_kl, all_kl_n4),3)) 
pivot.at[2,'Mean(STD)'] =  str(np.round(np.mean(all_kl_susan),3)) + ' (' + str(np.round(np.std(all_kl_susan),3)) +')'
pivot.at[2,'P-value'] =  str(np.round(ttest_pair(all_kl, all_kl_susan),3)) 
pivot.at[3,'Mean(STD)'] =  str(np.round(np.mean(all_kl_hist),3)) + ' (' + str(np.round(np.std(all_kl_hist),3)) +')'
pivot.at[3,'P-value'] =  str(np.round(ttest_pair(all_kl, all_kl_hist),3)) 
pivot.at[4,'Mean(STD)'] =  str(np.round(np.mean(all_kl_ss),3)) + ' (' + str(np.round(np.std(all_kl_ss),3)) +')'
pivot.at[4,'P-value'] =  str(np.round(ttest_pair(all_kl, all_kl_ss),3)) 

  groups = group1.append(group2, ignore_index= True)
  groups = group1.append(group2, ignore_index= True)
  groups = group1.append(group2, ignore_index= True)
  groups = group1.append(group2, ignore_index= True)


In [27]:
# js distance
pivot

Unnamed: 0,Mean(STD),P-value
0,0.477 (0.129),
1,0.415 (0.12),0.0
2,0.484 (0.129),0.0
3,0.478 (0.13),0.0
4,0.478 (0.129),0.0


In [15]:
#kl divergence
pivot

Unnamed: 0,Mean(STD),P-value
0,1.64 (1.015),
1,1.145 (0.723),0.0
2,1.702 (1.045),0.0
3,1.65 (1.029),0.001
4,1.643 (1.014),0.0


In [30]:
root2 = Path('/anvar/public_datasets/preproc_study/{}/orig'.format(dataset))

shapes = []
for patient in tqdm(root2.glob('*')):
    if patient.is_dir():
        img = nibabel.load(patient / main_img).get_fdata()
        shapes.append(np.shape(img))

180it [01:04,  2.78it/s]


In [31]:
pd.Series(shapes).value_counts()

(512, 512, 240)    20
(512, 512, 27)     15
(512, 512, 24)     15
(512, 512, 25)      8
(512, 512, 272)     6
                   ..
(768, 768, 25)      1
(768, 768, 38)      1
(250, 384, 27)      1
(280, 320, 27)      1
(288, 320, 30)      1
Length: 73, dtype: int64