In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

!nvidia-smi

Tue Feb  8 15:43:58 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 470.42.01    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:06.0 Off |                    0 |
| N/A   29C    P0    31W / 250W |   1834MiB / 12198MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE...  Off  | 00000000:00:07.0 Off |                    0 |
| N/A   30C    P0    25W / 250W |      2MiB / 12198MiB |      0%      Default |
|       

# DataLoader

In [2]:
# import os, sys
# dir2 = os.path.abspath('')
# dir1 = os.path.dirname(dir2)
# if not dir1 in sys.path: sys.path.append(dir1)
    
import os
import h5py
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt

from glob import glob
from tqdm import tqdm

%matplotlib inline

# Monai testing

In [5]:
from monai.data import CacheDataset, ZipDataset, DataLoader, Dataset, decollate_batch
from tqdm import tqdm
import monai.transforms as tf


train_transforms = tf.Compose([
    tf.LoadImaged(reader="NibabelReader", keys=['image', 'label']),
#     tf.Transposed(keys=['image', 'label'], indices=(2, 0, 1)),
    tf.AsDiscreted(keys=['label'], threshold_values=True),
    tf.ToNumpyd(keys=['image', 'label']),
    tf.NormalizeIntensityd(keys=['image'], channel_wise=True),
    tf.ToTensord(keys=['image', 'label']),
    # Augmentation here
])

# save_dir = "/cluster/home/kimsa/data/BraTs2020/BraTS2020_training_data/content/data_monai"
root_dir = "/cluster/projects/mcintoshgroup/BraTs2020/data_monai/"

num_train = 260  # 70%, 40300 slices (volume 1 ~ 260)
num_valid = 40 # 10% 6200 slices (volume 261 ~ 300)
num_test = 69 # 20% 10695 slices (volume 301 ~ 369)

image_list = sorted(glob(os.path.join(root_dir, "image", "*.nii.gz")))
label_list = sorted(glob(os.path.join(root_dir, "label", "*.nii.gz")))

get_volume_name = lambda x: os.path.basename(x).split("_slice")[0]
sort_key = lambda x: int(x.split('_')[-1])
image_sort_key = lambda x: int(os.path.basename(x).split("_slice_")[-1].replace(".nii.gz", ""))
unique_volumes = sorted(np.unique([get_volume_name(x) for x in image_list]), key=sort_key)

train_subjects, valid_subjects, test_subjects = unique_volumes[:num_train], unique_volumes[num_train:num_train+num_valid], unique_volumes[num_train+num_valid:]
data = list()
columns = ["Max", "Min", "Mean", "Std"]

target_subjects = test_subjects

pbar = tqdm(total=len(target_subjects))

total_stats = np.zeros((len(target_subjects), 4, 6))
for target_sub_idx, target_subject in enumerate(target_subjects):
    subject_image_list = sorted([x for x in image_list if target_subject == get_volume_name(x)], key=image_sort_key)
    subject_volume = np.zeros((len(subject_image_list), 4, 240, 240))
    for img_idx, image in enumerate(subject_image_list):
        subject_volume[img_idx] = nib.load(image).get_fdata()
        
    true_max = subject_volume.max((0, 2, 3))
    true_min = subject_volume.min((0, 2, 3))
    true_mean = subject_volume.mean((0, 2, 3))
    true_std = subject_volume.std((0, 2, 3))
    nonzero_mean = np.nanmean(np.where(subject_volume == 0, np.nan, subject_volume), (0, 2, 3))
    nonzero_std = np.nanstd(np.where(subject_volume == 0, np.nan, subject_volume), (0, 2, 3))
    
    data = [
        np.expand_dims(true_max, 1),
        np.expand_dims(true_min, 1),
        np.expand_dims(true_mean, 1),
        np.expand_dims(true_std, 1),
        np.expand_dims(nonzero_mean, 1),
        np.expand_dims(nonzero_std, 1),
    ]
    subject_stats = np.expand_dims(np.concatenate(data, 1), 0)
    total_stats[target_sub_idx] = subject_stats
    pbar.update(1)
    # break

# Stats
total_mean = total_stats.mean(0)




## Min-Max Stats

In [4]:
excel_save_path = "./asset/min_max_stats_test.xlsx"
mod_names = ["T1", "T1Gd", "T2", "T2-FLAIR"]
mod_columns = ["Max", "Min"]
mod_dfs = []

for mod_idx, mod_name in enumerate(mod_names):
    mod_min_max = total_stats[:, mod_idx, :2]
    
    mod_df = pd.DataFrame(data=mod_min_max, index=target_subjects, columns=mod_columns)
    mod_dfs.append(mod_df)

with pd.ExcelWriter(excel_save_path) as writer:
    for mod_idx, mod_name in enumerate(mod_names):
        mod_dfs[mod_idx].to_excel(writer, sheet_name=mod_name)

## Mean-Std Stats

In [79]:
mod_names = ["T1", "T1Gd", "T2", "T2-FLAIR"]
stat_names = ["Average_of_Max", "Average_of_Min", "True_Mean", "True_Std", "Non_zero_Mean", "Non_zero_Std"]
stat_df = pd.DataFrame(data=total_mean, columns=stat_names, index=mod_names)
stat_df.to_excel("./asset/mean_std_stats.xlsx")

## Data transformation from hdf5 to nifti image (For monai)

In [51]:
import os
import h5py
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt

from glob import glob
from tqdm import tqdm

root_dir = "/cluster/home/kimsa/data/BraTs2020/BraTS2020_training_data/content/data"
save_dir = "/cluster/home/kimsa/data/BraTs2020/BraTS2020_training_data/content/data_monai"
# save_dir = "/cluster/projects/mcintoshgroup/BraTs2020/data_monai"
os.makedirs(save_dir, exist_ok=True)
os.makedirs(os.path.join(save_dir, "image"), exist_ok=True)
os.makedirs(os.path.join(save_dir, "label"), exist_ok=True)
brats_files = glob(os.path.join(root_dir, "*.h5"))

shape_outlier = list()
status = dict()
pbar = tqdm(total=len(brats_files))


def save_nii(nii_arr, path):
    img = nib.Nifti1Image(nii_arr, np.eye(4))
    nib.save(img, path)
    return os.path.exists(path)
    
if __name__ == "__main__":

    # Iterate through brats files
    for file_idx, file in enumerate(brats_files):
        h5_obj = h5py.File(file)
        image_arr = h5_obj["image"][()]
        label_arr = h5_obj["mask"][()]
        
        # Transpose original image
        image_arr = np.transpose(image_arr, (2, 0, 1))
        label_arr = np.transpose(label_arr, (2, 0, 1))
        
        file_name = os.path.basename(file).replace(".h5", ".nii.gz")
        img_save_path = os.path.join(save_dir, "image", file_name)
        lbl_save_path = os.path.join(save_dir, "label", file_name)
        
        img_saved = save_nii(image_arr, img_save_path)
        lbl_saved = save_nii(label_arr, lbl_save_path)

        status[f'{file_name}'] = [img_saved, lbl_saved]
            
        if (image_arr.shape != (4, 240, 240)) or (label_arr.shape != (3, 240, 240)):
            shape_outlier.append(dict(
                name=os.path.basename(file),
                image_shape=image_arr.shape,
                label_shape=label_arr.shape
            ))
        pbar.update(1)

    print("\nShape Outlier")
    print(shape_outlier)

    status_df = pd.DataFrame.from_dict(status).T
    status_df.columns = ["Image Saved", "Label Saved"]
    status_df.to_csv("./tmp/transform.csv")









  0%|                                                                                                                                                                                               | 0/57195 [00:00<?, ?it/s][A[A[A[A[A[A[A[A







  0%|                                                                                                                                                                                       | 3/57195 [00:00<34:47, 27.39it/s][A[A[A[A[A[A[A[A







  0%|                                                                                                                                                                                       | 6/57195 [00:00<34:07, 27.93it/s][A[A[A[A[A[A[A[A







  0%|                                                                                                                                                                                       | 9/57195 [00:00<35:32, 26.82it/s][A[A[


Shape Outlier
[]










  0%|                                                                                                                                                                                      | 11/57195 [00:19<35:31, 26.82it/s][A[A[A[A[A[A[A[A