In [1]:
import matplotlib.pyplot as plt
import nibabel
import numpy as np
import pandas as pd
from pathlib import Path
from skimage.transform import rescale
from tqdm import tqdm
import os
import researchpy
from glob import glob
import shutil

# LGG

#### In this notebook we compare the true volumes of the tumor label before and after skull-stripping.

In [2]:
list_sub = os.listdir('/mnt/public_data/preproc_study/gbm/1_reg_individual/')

In [33]:
gt_volumes = []
t1_volumes = []
t2_volumes = []
flair_volumes = []
ct1_volumes = []

t1_mask_sums = []
t2_mask_sums = []
flair_mask_sums = []
ct1_mask_sums = []



def get_volume_gbm(zooms, label):
    """Calculates true volume of BraTS mask"""
    y_wt, y_tc, y_et = label > 0, ((label == 1) + (label == 3)) > 0, label == 3
    mask_sum = np.sum(y_wt)
    volume = mask_sum * zooms[0] * zooms[1] * zooms[2]
    return volume/1000


def get_volume_gbm_masked(zooms, label, mask):
    """Calculates true volume of BraTS mask"""
    y_wt, y_tc, y_et = label > 0, ((label == 1) + (label == 3)) > 0, label == 3
    mask = mask[y_wt]
    mask_sum = np.sum(mask)
    volume = mask_sum * zooms[0] * zooms[1] * zooms[2]
    return volume/1000


for patient in tqdm(glob('/mnt/public_data/preproc_study/gbm/1_reg_individual/TCGA*')):
    """ For all directories in the GBM dataset calculating the tumor volumes after skull stripping.
    In each modality separately."""
    if Path(patient).is_dir():

        label = nibabel.load(str(patient) + '/CT1_SEG.nii.gz').get_fdata()
        label_zooms = nibabel.load(str(patient) + '/CT1_SEG.nii.gz').header.get_zooms()
        gt_volumes.append(get_volume_gbm(label_zooms, label))
        
        t1_mask = nibabel.load(str(patient) + '/T1_mask.nii.gz').get_fdata()
        t1_mask_zooms = nibabel.load(str(patient) + '/T1_mask.nii.gz').header.get_zooms()
        assert t1_mask_zooms == label_zooms
        t1_volumes.append(get_volume_gbm_masked(label_zooms, label, t1_mask))
        t1_mask_sums.append(t1_mask.sum().sum())
        
        ct1_mask = nibabel.load(str(patient) + '/CT1_mask.nii.gz').get_fdata()
        ct1_mask_zooms = nibabel.load(str(patient) + '/CT1_mask.nii.gz').header.get_zooms()
        assert ct1_mask_zooms == label_zooms
        ct1_volumes.append(get_volume_gbm_masked(label_zooms, label, ct1_mask))
        ct1_mask_sums.append(ct1_mask.sum().sum())
        
        flair_mask = nibabel.load(str(patient) + '/FLAIR_mask.nii.gz').get_fdata()
        flair_mask_zooms = nibabel.load(str(patient) + '/FLAIR_mask.nii.gz').header.get_zooms()
        assert flair_mask_zooms == label_zooms
        flair_volumes.append(get_volume_gbm_masked(label_zooms, label, flair_mask))
        flair_mask_sums.append(flair_mask.sum().sum())
        
        t2_mask = nibabel.load(str(patient) + '/T2_mask.nii.gz').get_fdata()
        t2_mask_zooms = nibabel.load(str(patient) + '/T2_mask.nii.gz').header.get_zooms()
        assert t2_mask_zooms == label_zooms
        t2_volumes.append(get_volume_gbm_masked(label_zooms, label, t2_mask))
        t2_mask_sums.append(t2_mask.sum().sum())

100%|██████████| 102/102 [01:24<00:00,  1.21it/s]


In [4]:
gt_errors = abs(np.array(gt_volumes) - np.array(ct1_volumes))/np.array(gt_volumes)
flair_errors = abs(np.array(gt_volumes) - np.array(flair_volumes))/np.array(gt_volumes)
t1_errors = abs(np.array(gt_volumes) - np.array(t1_volumes))/np.array(gt_volumes)
t2_errors = abs(np.array(gt_volumes) - np.array(t2_volumes))/np.array(gt_volumes)

In [5]:
np.mean(gt_errors), np.mean(flair_errors), np.mean(t1_errors), np.mean(t2_errors)

(0.0035365294817691086,
 0.002749536803530542,
 0.005887912752872096,
 0.002644775665931629)

In [6]:
from researchpy import ttest

def ttest_pair(df_1, df_2, name_1 = 'one', name_2 = 'two', correction = None):
    return ttest(pd.Series(df_1), pd.Series(df_2), 
#                  group1_name = name_1,
#                  group2_name= name_2, 
                 equal_variances=False, paired=True,)[1].iloc[4].values[1]

In [7]:
ttest_pair(gt_errors, t1_errors), ttest_pair(gt_errors, t2_errors), ttest_pair(gt_errors, flair_errors)

  groups = group1.append(group2, ignore_index= True)
  groups = group1.append(group2, ignore_index= True)
  groups = group1.append(group2, ignore_index= True)


(0.114, 0.0007, 0.0)

In [17]:
# the maximum difference in the label cutting 1%
np.array(ct1_volumes)[t1_errors  == t1_errors.max()], t1_errors.max()

(array([159.7130227]), 0.15917026083384678)

In [10]:
# means of the label cutting
np.array(flair_volumes).mean(), np.array(flair_volumes).std(), np.array(ct1_volumes).mean(), np.array(ct1_volumes).std()

(107.55459191161943, 52.184958038968254, 107.46187568933186, 52.12487969799859)

##### The conclusion:

There is a statisticall difference on the errors with T1C and T1 and T2 image skull-stripping result, yet the difference is under 1%, so neglectable

In [27]:
t1_errors_mask_sums = abs(np.array(ct1_mask_sums) - np.array(t1_mask_sums))/np.array(ct1_mask_sums)*100
flair_errors_mask_sums = abs(np.array(ct1_mask_sums) - np.array(flair_mask_sums))/np.array(ct1_mask_sums)*100
t2_errors_mask_sums = abs(np.array(ct1_mask_sums) - np.array(t2_mask_sums))/np.array(ct1_mask_sums)*100

In [29]:
ttest_pair(ct1_mask_sums, t1_mask_sums), ttest_pair(ct1_mask_sums, t2_mask_sums), ttest_pair(ct1_mask_sums, flair_mask_sums)

  groups = group1.append(group2, ignore_index= True)
  groups = group1.append(group2, ignore_index= True)
  groups = group1.append(group2, ignore_index= True)


(0.0, 0.0, 0.0755)

In [30]:
t1_errors_mask_sums.mean(), flair_errors_mask_sums.mean(), t2_errors_mask_sums.mean()

(2.423496679548106, 2.3841133184755217, 3.248581323500852)

In [34]:
t1_errors_mask_sums.max()

25.987988881348368

In [31]:
t1_errors_mask_sums.std(), flair_errors_mask_sums.std(), t2_errors_mask_sums.std()

(3.3624669881894618, 1.9866390388664343, 2.528345812048424)

#### Conclusion:

Masks generated on aligned images differ in 3% in general, and maximum in 

# BGPD

In [86]:
list_sub = os.listdir('/mnt/public_data/preproc_study/bgpd/1_reg_individual/')

In [88]:
list(glob('/mnt/public_data/preproc_study/bgpd/1_reg_individual/1043_18_4/*'))

['/mnt/public_data/preproc_study/bgpd/1_reg_individual/1043_18_4/T1.nii.gz',
 '/mnt/public_data/preproc_study/bgpd/1_reg_individual/1043_18_4/T2_mask.nii.gz',
 '/mnt/public_data/preproc_study/bgpd/1_reg_individual/1043_18_4/CT1_mask.nii.gz',
 '/mnt/public_data/preproc_study/bgpd/1_reg_individual/1043_18_4/T1_mask.nii.gz',
 '/mnt/public_data/preproc_study/bgpd/1_reg_individual/1043_18_4/FLAIR_mask.nii.gz',
 '/mnt/public_data/preproc_study/bgpd/1_reg_individual/1043_18_4/CT1.nii.gz',
 '/mnt/public_data/preproc_study/bgpd/1_reg_individual/1043_18_4/mask_GTV_FLAIR.nii.gz',
 '/mnt/public_data/preproc_study/bgpd/1_reg_individual/1043_18_4/T2.nii.gz',
 '/mnt/public_data/preproc_study/bgpd/1_reg_individual/1043_18_4/FLAIR.nii.gz']

In [35]:
gt_volumes = []
t1_volumes = []
t2_volumes = []
flair_volumes = []
ct1_volumes = []


def get_volume(zooms, mask):
    """Calculates true volume of binary mask """
    mask = mask.astype(bool)*1
    mask_sum = mask.sum().sum()
    volume = mask_sum * zooms[0] * zooms[1] * zooms[2]
    return volume/1000

def get_volume_masked(zooms, label, mask):
    """Calculates true volume of GTV mask"""
    mask = mask[label.astype(bool)]
    mask_sum = np.sum(mask)
    volume = mask_sum * zooms[0] * zooms[1] * zooms[2]
    return volume/1000


for patient in tqdm(glob('/mnt/public_data/preproc_study/bgpd/1_reg_individual/*')):
    """ For all directories in the BGPD dataset calculating the tumor volumes after skull stripping.
    In each modality separately."""
    if Path(patient).is_dir():

        label = nibabel.load(str(patient) + '/mask_GTV_FLAIR.nii.gz').get_fdata()
        label_zooms = nibabel.load(str(patient) + '/mask_GTV_FLAIR.nii.gz').header.get_zooms()
        gt_volumes.append(get_volume(label_zooms, label))
        
        t1_mask = nibabel.load(str(patient) + '/T1_mask.nii.gz').get_fdata()
        t1_mask_zooms = nibabel.load(str(patient) + '/T1_mask.nii.gz').header.get_zooms()
        assert t1_mask_zooms == label_zooms
        t1_volumes.append(get_volume_masked(label_zooms, label, t1_mask))
        
        ct1_mask = nibabel.load(str(patient) + '/CT1_mask.nii.gz').get_fdata()
        ct1_mask_zooms = nibabel.load(str(patient) + '/CT1_mask.nii.gz').header.get_zooms()
        assert ct1_mask_zooms == label_zooms
        ct1_volumes.append(get_volume_masked(label_zooms, label, ct1_mask))
        
        flair_mask = nibabel.load(str(patient) + '/FLAIR_mask.nii.gz').get_fdata()
        flair_mask_zooms = nibabel.load(str(patient) + '/FLAIR_mask.nii.gz').header.get_zooms()
        assert flair_mask_zooms == label_zooms
        flair_volumes.append(get_volume_masked(label_zooms, label, flair_mask))
        
        t2_mask = nibabel.load(str(patient) + '/T2_mask.nii.gz').get_fdata()
        t2_mask_zooms = nibabel.load(str(patient) + '/T2_mask.nii.gz').header.get_zooms()
        assert t2_mask_zooms == label_zooms
        t2_volumes.append(get_volume_masked(label_zooms, label, t2_mask))

100%|██████████| 180/180 [05:51<00:00,  1.95s/it]


In [105]:
gt_errors = abs(np.array(gt_volumes[:90])/10 - np.array(ct1_volumes[:90]))/np.array(gt_volumes[:90])/10
flair_errors = abs(np.array(gt_volumes[:90])/10 - np.array(flair_volumes[:90]))/np.array(gt_volumes[:90])/10
t1_errors = abs(np.array(gt_volumes[:90])/10 - np.array(t1_volumes[:90]))/np.array(gt_volumes[:90])/10
t2_errors = abs(np.array(gt_volumes[:90])/10 - np.array(t2_volumes))/np.array(gt_volumes[:90])/10

In [106]:
np.mean(gt_errors), np.mean(flair_errors), np.mean(t1_errors), np.mean(t2_errors)

(5.051396260997581e-05,
 1.9232903004722573e-05,
 3.789241141235157e-05,
 2.876650289588212e-05)

In [99]:
ttest_pair(gt_errors[:90], t1_errors[:90]), ttest_pair(gt_errors[:90], t2_errors), ttest_pair(gt_errors, flair_errors)

  groups = group1.append(group2, ignore_index= True)
  groups = group1.append(group2, ignore_index= True)
  groups = group1.append(group2, ignore_index= True)


(0.0961, 0.1387, 0.0194)