In [None]:
import matplotlib.pyplot as plt
import os
import PIL.Image as Image
import numpy as np
import pandas as pd
import glob

import pydicom
from pydicom.pixel_data_handlers import apply_voi_lut
from pydicom.pixel_data_handlers.util import apply_windowing
from sklearn.model_selection import train_test_split
common_dir = '/home/ncp/workspace/202002n050/050.신경계 질환 관련 임상 및 진료 데이터'

In [None]:
FILE_EXTENSION = ['.png', '.PNG', '.jpg', '.JPG', '.dcm', '.DCM', '.raw', '.RAW', '.svs', '.SVS']
IMG_EXTENSION = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG']
DCM_EXTENSION = ['.dcm', '.DCM']
RAW_EXTENSION = ['.raw', '.RAW']
NIFTI_EXTENSION = ['.nii']
NP_EXTENSION = ['.npy']

common_dir = '/home/ncp/workspace/202002n050/050.신경계 질환 관련 임상 및 진료 데이터'


def check_extension(filename, extension_ls=FILE_EXTENSION):
    return any(filename.endswith(extension) for extension in extension_ls)


def load_file_path(folder_path, extension_ls=FILE_EXTENSION, all_sub_folders=False):
    """find 'IMG_EXTENSION' file paths in folder.
    
    Parameters:
        folder_path (str) -- folder directory
        extension_ls (list) -- list of extensions
    
    Return:
        file_paths (list) -- list of 'extension_ls' file paths
    """
    
    file_paths = []
    assert os.path.isdir(folder_path), f'{folder_path} is not a valid directory'

    for root, _, fnames in sorted(os.walk(folder_path)):
        for fname in fnames:
            if check_extension(fname, extension_ls):
                path = os.path.join(root, fname)
                file_paths.append(path)
        if not all_sub_folders:
            break

    return file_paths[:]


def gen_new_dir(new_dir):
    try: 
        if not os.path.exists(new_dir): 
            os.makedirs(new_dir) 
            #print(f"New directory!: {new_dir}")
    except OSError: 
        print("Error: Failed to create the directory.")


def find_aihub_img_label_dirs(fname, mod='train'):
    if mod == 'train':
        img_dir = os.path.join(common_dir, '01.데이터/1.Training/원천데이터', fname, 'init/image')
        mask_dir = os.path.join(common_dir, '01.데이터/1.Training/라벨링데이터', fname, 'init/mask')
    elif mod == 'val':
        img_dir = os.path.join(common_dir, '01.데이터/2.Validation/원천데이터', fname, 'init/image')
        mask_dir = os.path.join(common_dir, '01.데이터/2.Validation/라벨링데이터', fname, 'init/mask')
    else:
        return None
    return [img_dir, mask_dir]


def pair_img_mask_path(fname, mod='train'):
    img_dir, mask_dir = find_aihub_img_label_dirs(fname, mod)
    img_path_ls = sorted(glob.glob(os.path.join(img_dir, '*.png')))
    if len(img_path_ls) == 0:
        return None
    img_path_dict = {os.path.splitext(os.path.basename(p))[0]:p for p in img_path_ls}
    if os.path.isdir(mask_dir):
        mask_path_ls = sorted(glob.glob(os.path.join(mask_dir, '*.png')))
        mask_path_dict = {os.path.splitext(os.path.basename(p))[0]:p for p in mask_path_ls}
    else:
        mask_path_dict = {}
    paired_list = []
    for imgnum, imgpath in img_path_dict.items():
        paired_list.append([imgpath, mask_path_dict.get(imgnum)])
    return paired_list


def find_aihub_img_label_paths(common_dir, mod='train'):
    if mod=='train':
        data_dir = os.path.join(common_dir, '01.데이터/1.Training/원천데이터')
    elif mod=='val':
        data_dir = os.path.join(common_dir, '01.데이터/2.Validation/원천데이터')
        
    _fname = os.listdir(data_dir)
    _fname = [p for p in _fname if os.path.isdir(os.path.join(data_dir, p))]
    paths_list = []
    for fname in _fname:
        tmp = pair_img_mask_path(fname, mod)
        if tmp:
            for p in tmp:
                paths_list.append(p)
    img_list, mask_list = list(zip(*paths_list))
    return img_list, mask_list


In [None]:
aihub_df = pd.read_csv('/home/ncp/workspace/blocks1/aihub_df_define_dcm.csv')

In [None]:
import pickle

In [None]:
with open('fname_dicom.pickle', 'rb') as fr:
    fname_dicom_dict_load = pickle.load(fr)

In [None]:
def sample_stack(stack, rows=6, cols=6, start_with=0, show_every=1):
    try:
        fig,ax = plt.subplots(rows,cols,figsize=[18,20])
        for i in range(rows*cols):
            ind = start_with + i*show_every
            ax[int(i/cols), int(i%cols)].set_title(f'slice {ind}')

            ax[int(i/cols), int(i%cols)].imshow(stack[ind],cmap='gray', vmin=0, vmax=255)#, vmin=0, vmax=255
            ax[int(i/cols), int(i%cols)].axis('off')
        plt.show()
    except:
        pass

In [None]:
def normalize(img_arr):
    norm_arr = (img_arr-img_arr.min())/(img_arr.max()-img_arr.min())*255
    return norm_arr.astype(np.uint8)

In [None]:
def normalize(img_arr):
    norm_arr = img_arr -img_arr.min()
    if norm_arr.max() != 0:
        norm_arr = norm_arr / norm_arr.max()
    norm_arr = (norm_arr*255).astype(np.uint8)
    return norm_arr

In [None]:
def load_mr_scans(dcm_paths):
    #dcm_paths = sorted(load_file_path(patient_folder_path, DCM_EXTENSION))
    
    slices = [pydicom.read_file(dcm_path, force=True) for dcm_path in dcm_paths]
    #slices = [s for s in slices if 'DIFFUSION' in s.ImageType]
    #slices = [s for s in slices if '*ep_b1000t' == s.SequenceName]
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]), reverse=True)
    
    
    
    images = np.stack([file.pixel_array for file in slices])
    #images = normalize(images)
    return slices, images

In [None]:
def save_arr_to_np(arr, savepoint, fname):
    np.save(os.path.join(savepoint, fname+'.npy'), arr)
    #print(f'Saved! {fname}')

In [None]:
import scipy.ndimage

In [None]:
def resample_3d(image_3d, dsize=(36,256,256)):
    rounded_resize_factor = np.array(dsize) / image_3d.shape
    
    return scipy.ndimage.interpolation.zoom(image_3d, rounded_resize_factor, mode='nearest')

In [None]:
savepoint = '/home/ncp/workspace/blocks1/dicom_to_np'
dwi_savepoint = os.path.join(savepoint, 'dwi')
adc_savepoint = os.path.join(savepoint, 'adc')
gen_new_dir(dwi_savepoint)
gen_new_dir(adc_savepoint)

for fname_, dwi_adc_ls_ in tqdm(fname_dicom_dict_load.items()):
    dwi_path_ls, adc_path_ls = dwi_adc_ls_
    if dwi_path_ls == []:
        pass
    else:
        _, dwi_3d_im = load_mr_scans(dwi_path_ls)
        #dwi_3d_im = resample_3d(dwi_3d_im)
        save_arr_to_np(dwi_3d_im, dwi_savepoint, fname_)
    if adc_path_ls == None:
        pass
    else:
        _, adc_3d_im = load_mr_scans(adc_path_ls)
        #adc_3d_im = resample_3d(adc_3d_im)
        save_arr_to_np(adc_3d_im, adc_savepoint, fname_)

In [None]:
import matplotlib.patches as patches
from matplotlib import cm, colors
from skimage import exposure, util

def ahe_3d(img_3d):
    img_3d = img_3d.transpose()
    img_3d = np.clip(img_3d,
                    np.percentile(img_3d, 5),
                    np.percentile(img_3d, 95))
    img_3d = img_3d -img_3d.min()
    if img_3d.max() != 0:
        img_3d = img_3d / img_3d.max()
    img_3d = (img_3d-img_3d.min()) / (img_3d.max()-img_3d.min())

    kernel_size = (img_3d.shape[0] // 5,
                  img_3d.shape[1] // 5,
                  img_3d.shape[2] // 5)
    kernel_size = np.array(kernel_size)
    clip_limit = 0.8

    img_3d_ahe = exposure.equalize_adapthist(img_3d,
                                            kernel_size=kernel_size,
                                            clip_limit=clip_limit)
    return img_3d_ahe.transpose()

def he_3d(img_3d):
    img_3d = img_3d.transpose()
    img_3d = np.clip(img_3d,
                    np.percentile(img_3d, 5),
                    np.percentile(img_3d, 95))
    img_3d = (img_3d-img_3d.min()) / (img_3d.max()-img_3d.min())

    img_3d_ahe = exposure.equalize_adapthist(img_3d)
    return img_3d_ahe.transpose()

In [None]:
def load_mr_scans(dcm_paths):
    #dcm_paths = sorted(load_file_path(patient_folder_path, DCM_EXTENSION))
    
    slices = [pydicom.read_file(dcm_path, force=True) for dcm_path in dcm_paths]
    #slices = [s for s in slices if 'DIFFUSION' in s.ImageType]
    #slices = [s for s in slices if '*ep_b1000t' == s.SequenceName]
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]), reverse=True)
    
    
    
    images = np.stack([file.pixel_array for file in slices])
    images = he_3d(images)
    return slices, images

In [None]:
savepoint = '/home/ncp/workspace/blocks1/dicom_to_np_3dhe'
dwi_savepoint = os.path.join(savepoint, 'dwi')
adc_savepoint = os.path.join(savepoint, 'adc')
gen_new_dir(dwi_savepoint)
gen_new_dir(adc_savepoint)

for fname_, dwi_adc_ls_ in tqdm(fname_dicom_dict_load.items()):
    dwi_path_ls, adc_path_ls = dwi_adc_ls_
    if dwi_path_ls == []:
        pass
    else:
        _, dwi_3d_im = load_mr_scans(dwi_path_ls)
        #dwi_3d_im = resample_3d(dwi_3d_im)
        save_arr_to_np(dwi_3d_im, dwi_savepoint, fname_)
    if adc_path_ls == None:
        pass
    else:
        _, adc_3d_im = load_mr_scans(adc_path_ls)
        #adc_3d_im = resample_3d(adc_3d_im)
        save_arr_to_np(adc_3d_im, adc_savepoint, fname_)

In [None]:
dwi_3d_im.dtype

In [None]:
def ahe_3d(img_3d):
    img_3d = img_3d.transpose()
    img_3d = np.clip(img_3d,
                    np.percentile(img_3d, 50),
                    np.percentile(img_3d, 97))
    img_3d = img_3d - img_3d.min()
    if img_3d.max() != 0:
        img_3d = img_3d / img_3d.max()
    #img_3d = (img_3d-img_3d.min()) / (img_3d.max()-img_3d.min())

    kernel_size = (img_3d.shape[0] // 2,
                  img_3d.shape[1] // 2,
                  img_3d.shape[2] // 2)
    kernel_size = np.array(kernel_size)
    clip_limit = .9

    img_3d_ahe = exposure.equalize_adapthist(img_3d,
                                            kernel_size=kernel_size,
                                            clip_limit=clip_limit)
    return img_3d_ahe.transpose()

def he_3d(img_3d):
    img_3d = img_3d.transpose()
    img_3d = np.clip(img_3d,
                    np.percentile(img_3d, 5),
                    np.percentile(img_3d, 100))
    img_3d = (img_3d-img_3d.min()) / (img_3d.max()-img_3d.min())

    img_3d_ahe = exposure.equalize_adapthist(img_3d)
    return img_3d_ahe.transpose()

In [None]:
loadpoint = '/home/ncp/workspace/blocks1/dicom_to_np_2dnorm'
dwi_loadpoint = os.path.join(loadpoint, 'dwi')
adc_loadpoint = os.path.join(loadpoint, 'adc')

savepoint = '/home/ncp/workspace/blocks1/dicom_to_np_2dnorm_resample'
dwi_savepoint = os.path.join(savepoint, 'dwi')
adc_savepoint = os.path.join(savepoint, 'adc')
gen_new_dir(dwi_savepoint)
gen_new_dir(adc_savepoint)

dwi_npy_paths = load_file_path(dwi_loadpoint, NP_EXTENSION)
adc_npy_paths = load_file_path(adc_loadpoint, NP_EXTENSION)



In [None]:
np.load(dwi_npy_paths[1222]).shape

In [None]:
import cv2

def resize_padding(arr):
    arr_rsz = np.stack([cv2.resize(slices, dsize=(256,256), interpolation=cv2.INTER_CUBIC) for slices in arr])
    pad = 50-len(arr_rsz)
    return np.pad(arr_rsz, ((pad//2,pad-pad//2),(0,0),(0,0)))

In [None]:
len(resize_padding(np.load(dwi_npy_paths[90]))) != 50

In [None]:
loadpoint = '/home/ncp/workspace/blocks1/dicom_to_np_2dnorm'
dwi_loadpoint = os.path.join(loadpoint, 'dwi')
adc_loadpoint = os.path.join(loadpoint, 'adc')

savepoint = '/home/ncp/workspace/blocks1/dicom_to_np_2dnorm_resample'
dwi_savepoint = os.path.join(savepoint, 'dwi')
adc_savepoint = os.path.join(savepoint, 'adc')
gen_new_dir(dwi_savepoint)
gen_new_dir(adc_savepoint)

dwi_npy_paths = load_file_path(dwi_loadpoint, NP_EXTENSION)
adc_npy_paths = load_file_path(adc_loadpoint, NP_EXTENSION)

for p in tqdm(dwi_npy_paths):
    fname = os.path.splitext(os.path.basename(p))[0]
    dwi_3d_im = np.load(p)
    dwi_3d_im = resample_3d(dwi_3d_im)
    save_arr_to_np(dwi_3d_im, dwi_savepoint, fname)
    
# for p in tqdm(adc_npy_paths):
#     fname = os.path.splitext(os.path.basename(p))[0]
#     adc_3d_im = np.load(p)
#     adc_3d_im = resize_padding(adc_3d_im)
#     save_arr_to_np(adc_3d_im, adc_savepoint, fname)

In [None]:
loadpoint = '/home/ncp/workspace/blocks1/dicom_to_np_2dnorm'
dwi_loadpoint = os.path.join(loadpoint, 'dwi')
adc_loadpoint = os.path.join(loadpoint, 'adc')

savepoint = '/home/ncp/workspace/blocks1/dicom_to_np_2dnorm_pad'
dwi_savepoint = os.path.join(savepoint, 'dwi')
adc_savepoint = os.path.join(savepoint, 'adc')
gen_new_dir(dwi_savepoint)
gen_new_dir(adc_savepoint)

dwi_npy_paths = load_file_path(dwi_loadpoint, NP_EXTENSION)
adc_npy_paths = load_file_path(adc_loadpoint, NP_EXTENSION)

for p in tqdm(dwi_npy_paths):
    fname = os.path.splitext(os.path.basename(p))[0]
    dwi_3d_im = np.load(p)
    dwi_3d_im = resize_padding(dwi_3d_im)
    save_arr_to_np(dwi_3d_im, dwi_savepoint, fname)
    
# for p in tqdm(adc_npy_paths):
#     fname = os.path.splitext(os.path.basename(p))[0]
#     adc_3d_im = np.load(p)
#     adc_3d_im = resize_padding(adc_3d_im)
#     save_arr_to_np(adc_3d_im, adc_savepoint, fname)

In [None]:
loadpoint = '/home/ncp/workspace/blocks1/dicom_to_np_3dnorm'
dwi_loadpoint = os.path.join(loadpoint, 'dwi')
adc_loadpoint = os.path.join(loadpoint, 'adc')

savepoint = '/home/ncp/workspace/blocks1/dicom_to_np_3dnorm_pad'
dwi_savepoint = os.path.join(savepoint, 'dwi')
adc_savepoint = os.path.join(savepoint, 'adc')
gen_new_dir(dwi_savepoint)
gen_new_dir(adc_savepoint)

dwi_npy_paths = load_file_path(dwi_loadpoint, NP_EXTENSION)
adc_npy_paths = load_file_path(adc_loadpoint, NP_EXTENSION)

for p in tqdm(dwi_npy_paths):
    fname = os.path.splitext(os.path.basename(p))[0]
    dwi_3d_im = np.load(p)
    dwi_3d_im = resize_padding(dwi_3d_im)
    save_arr_to_np(dwi_3d_im, dwi_savepoint, fname)
    
# for p in tqdm(adc_npy_paths):
#     fname = os.path.splitext(os.path.basename(p))[0]
#     adc_3d_im = np.load(p)
#     adc_3d_im = resize_padding(adc_3d_im)
#     save_arr_to_np(adc_3d_im, adc_savepoint, fname)

In [None]:
from tqdm.notebook import tqdm

In [None]:
loadpoint = '/home/ncp/workspace/blocks1/dicom_to_np'
dwi_loadpoint = os.path.join(loadpoint, 'dwi')
adc_loadpoint = os.path.join(loadpoint, 'adc')

savepoint = '/home/ncp/workspace/blocks1/dicom_to_np_he_resample_v2'
dwi_savepoint = os.path.join(savepoint, 'dwi')
adc_savepoint = os.path.join(savepoint, 'adc')
gen_new_dir(dwi_savepoint)
gen_new_dir(adc_savepoint)

dwi_npy_paths = load_file_path(dwi_loadpoint, NP_EXTENSION)
adc_npy_paths = load_file_path(adc_loadpoint, NP_EXTENSION)
cnt = 0
# for p in tqdm(dwi_npy_paths):
#     fname = os.path.splitext(os.path.basename(p))[0]
#     dwi_3d_im = np.load(p)
#     if len(dwi_3d_im) >= 20:
#         dwi_3d_im = he_3d(dwi_3d_im)
#         dwi_3d_im = resample_3d(dwi_3d_im)
#         save_arr_to_np(dwi_3d_im, dwi_savepoint, fname)
#     else:
#         cnt+=1
    
for p in tqdm(adc_npy_paths):
    fname = os.path.splitext(os.path.basename(p))[0]
    adc_3d_im = np.load(p)
    if len(adc_3d_im) >= 20:
        adc_3d_im = he_3d(adc_3d_im)
        adc_3d_im = resample_3d(adc_3d_im)
        save_arr_to_np(adc_3d_im, adc_savepoint, fname)
    else:
        cnt+=1

In [None]:
sample_he = load_file_path('/home/ncp/workspace/blocks1/dicom_to_np_he_resample_v2/dwi', NP_EXTENSION)

In [None]:
cnt

In [None]:
dwi_3d_im = np.load(p)

In [None]:
img_3d = dwi_3d_im

In [None]:
img_3d = np.clip(img_3d,
                np.percentile(img_3d, 5),
                np.percentile(img_3d, 95))
img_3d = (img_3d-img_3d.min()) / (img_3d.max()-img_3d.min())

In [None]:
len(img_3d)

In [None]:
def he_3d(img_3d):
    img_3d = img_3d.transpose()
    img_3d = np.clip(img_3d,
                    np.percentile(img_3d, 5),
                    np.percentile(img_3d, 95))
    img_3d = (img_3d-img_3d.min()) / (img_3d.max()-img_3d.min())

    img_3d_ahe = exposure.equalize_adapthist(img_3d)
    return img_3d_ahe.transpose()

In [None]:
loadpoint = '/home/ncp/workspace/blocks1/dicom_to_np_2dnorm'
dwi_loadpoint = os.path.join(loadpoint, 'dwi')
adc_loadpoint = os.path.join(loadpoint, 'adc')

dwi_npy_paths = load_file_path(dwi_loadpoint, NP_EXTENSION)
adc_npy_paths = load_file_path(adc_loadpoint, NP_EXTENSION)

In [None]:
dwi_dict = {os.path.splitext(os.path.basename(p))[0]:p for p in dwi_npy_paths}
adc_dict = {os.path.splitext(os.path.basename(p))[0]:p for p in adc_npy_paths}

savepoint = '/home/ncp/workspace/blocks1/dicom_to_png_2d'

dwi_adc_pair = []

for fname, p in dwi_dict.items():
    find_adc_path = adc_dict.get(fname)
    if find_adc_path:
        dwi_adc_pair.append([p, find_adc_path])

for dwi_p, adc_p in tqdm(dwi_adc_pair):
    dwi_im = np.load(dwi_p)
    adc_im = np.load(adc_p)
    fname = os.path.splitext(os.path.basename(dwi_p))[0]
    if len(dwi_im) == len(adc_im):
        dwi_savepoint = os.path.join(savepoint, fname, 'dwi')
        adc_savepoint = os.path.join(savepoint, fname, 'adc')
        gen_new_dir(dwi_savepoint)
        gen_new_dir(adc_savepoint)
        resize_and_save_2d(dwi_im, dwi_savepoint)
        resize_and_save_2d(adc_im, adc_savepoint)

In [None]:
import os
import numpy as np
import glob

FILE_EXTENSION = ['.png', '.PNG', '.jpg', '.JPG', '.dcm', '.DCM', '.raw', '.RAW', '.svs', '.SVS']
IMG_EXTENSION = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG']
DCM_EXTENSION = ['.dcm', '.DCM']
RAW_EXTENSION = ['.raw', '.RAW']
NIFTI_EXTENSION = ['.nii']
NP_EXTENSION = ['.npy']

common_dir = '/home/ncp/workspace/202002n050/050.신경계 질환 관련 임상 및 진료 데이터'


def check_extension(filename, extension_ls=FILE_EXTENSION):
    return any(filename.endswith(extension) for extension in extension_ls)


def load_file_path(folder_path, extension_ls=FILE_EXTENSION, all_sub_folders=False):
    """find 'IMG_EXTENSION' file paths in folder.
    
    Parameters:
        folder_path (str) -- folder directory
        extension_ls (list) -- list of extensions
    
    Return:
        file_paths (list) -- list of 'extension_ls' file paths
    """
    
    file_paths = []
    assert os.path.isdir(folder_path), f'{folder_path} is not a valid directory'

    for root, _, fnames in sorted(os.walk(folder_path)):
        for fname in fnames:
            if check_extension(fname, extension_ls):
                path = os.path.join(root, fname)
                file_paths.append(path)
        if not all_sub_folders:
            break

    return file_paths[:]


def gen_new_dir(new_dir):
    try: 
        if not os.path.exists(new_dir): 
            os.makedirs(new_dir) 
            #print(f"New directory!: {new_dir}")
    except OSError: 
        print("Error: Failed to create the directory.")


def find_aihub_img_label_dirs(fname, mod='train'):
    if mod == 'train':
        img_dir = os.path.join(common_dir, '01.데이터/1.Training/원천데이터', fname, 'init/image')
        mask_dir = os.path.join(common_dir, '01.데이터/1.Training/라벨링데이터', fname, 'init/mask')
    elif mod == 'val':
        img_dir = os.path.join(common_dir, '01.데이터/2.Validation/원천데이터', fname, 'init/image')
        mask_dir = os.path.join(common_dir, '01.데이터/2.Validation/라벨링데이터', fname, 'init/mask')
    else:
        return None
    return [img_dir, mask_dir]


def pair_img_mask_path(fname, mod='train'):
    img_dir, mask_dir = find_aihub_img_label_dirs(fname, mod)
    img_path_ls = sorted(glob.glob(os.path.join(img_dir, '*.png')))
    if len(img_path_ls) == 0:
        return None
    img_path_dict = {os.path.splitext(os.path.basename(p))[0]:p for p in img_path_ls}
    if os.path.isdir(mask_dir):
        mask_path_ls = sorted(glob.glob(os.path.join(mask_dir, '*.png')))
        mask_path_dict = {os.path.splitext(os.path.basename(p))[0]:p for p in mask_path_ls}
    else:
        mask_path_dict = {}
    paired_list = []
    for imgnum, imgpath in img_path_dict.items():
        paired_list.append([imgpath, mask_path_dict.get(imgnum)])
    return paired_list


def find_aihub_img_label_paths(common_dir, mod='train'):
    if mod=='train':
        data_dir = os.path.join(common_dir, '01.데이터/1.Training/원천데이터')
    elif mod=='val':
        data_dir = os.path.join(common_dir, '01.데이터/2.Validation/원천데이터')
        
    _fname = os.listdir(data_dir)
    _fname = [p for p in _fname if os.path.isdir(os.path.join(data_dir, p))]
    paths_list = []
    for fname in _fname:
        tmp = pair_img_mask_path(fname, mod)
        if tmp:
            for p in tmp:
                paths_list.append(p)
    img_list, mask_list = list(zip(*paths_list))
    return img_list, mask_list


In [None]:
import shutil

In [None]:
common_dir = '/home/ncp/workspace/202002n050/050.신경계 질환 관련 임상 및 진료 데이터'
savepoint =  '/home/ncp/workspace/blocks1/totalmask'
for fname, folder in tqdm(case_name_folder):
    _, mask_dir = find_aihub_img_label_dirs(fname, folder)
    copy_dir = os.path.join(savepoint, fname)
    try:
        shutil.copytree(mask_dir, copy_dir)
    except:
        pass

In [None]:
def find_dwi_adc_dir(img_folder_dir, fname):
    dwi_folder_dir = os.path.join(img_folder_dir, fname, 'dwi')
    adc_folder_dir = os.path.join(img_folder_dir, fname, 'adc')
    if (os.path.isdir(dwi_folder_dir)) & (os.path.isdir(adc_folder_dir)):
        return dwi_folder_dir, adc_folder_dir
    else:
        return None

In [None]:
def find_mask_dir(mask_folder_dir, fname):
    mask_folder_dir = os.path.join(mask_folder_dir, fname)
    if (os.path.isdir(mask_folder_dir)):
        return mask_folder_dir
    else:
        return None

In [None]:
img_common_dir = '/home/ncp/workspace/blocks1/dicom_to_png_2d/'
mask_common_dir = '/home/ncp/workspace/blocks1/totalmask'
img_fname = sorted(os.listdir(img_common_dir))

In [None]:
fname = 'brain_mri_1000001'

In [None]:
def save_png(im_3d, save_point):
    file_name = 0
    for im_2d in im_3d[::-1]:
        resized_img = Image.fromarray(im_2d).resize((256,256))
        resized_img.save(os.path.join(save_point, str(file_name).zfill(3)+'.png'))
        file_name += 1

In [None]:
def gen_2d_slices_to_3d(img_2d_path_ls):
    img_3d = np.stack([np.array(Image.open(p)) for p in img_2d_path_ls], axis=0)
    img_3d = resample_3d(img_3d)
    img_3d = np.where(img_3d>128, 1, 0).astype(np.float32)
    return img_3d

In [None]:
import scipy.ndimage

In [None]:
savepoint = '/home/ncp/workspace/blocks1/refined_mask_resample'
gen_new_dir(savepoint)
mask_common_dir = '/home/ncp/workspace/blocks/refined_mask'
mask_fname_ls = os.listdir(mask_common_dir)
for fname_ in tqdm(mask_fname_ls):
    mask_path_ls = sorted(load_file_path(os.path.join(mask_common_dir, fname_), IMG_EXTENSION),reverse=True)
    img_3d = gen_2d_slices_to_3d(mask_path_ls)
    save_arr_to_np(img_3d, savepoint, fname_)

In [None]:
mask_path_ls = sorted(load_file_path(os.path.join(mask_common_dir, 'brain_mri_CC_0329'), IMG_EXTENSION),reverse=True)

In [None]:
np.stack([np.array(Image.open(p)) for p in mask_path_ls], axis=0).shape

In [None]:
def save_2d_from_3d(im_3d, save_point):
    file_name = 0
    for im_2d in im_3d:
        Image.fromarray(im_2d).save(os.path.join(save_point, str(file_name).zfill(3)+'.png'))
        file_name += 1

In [None]:
common_dir = '/home/ncp/workspace/blocks1/dicom_to_np_2dnorm_resample/'
savepoint =  '/home/ncp/workspace/blocks2/dicom_to_png_2d_resample/'

file_name_ls = os.listdir(os.path.join(common_dir, 'adc'))
dwi_common_dir = os.path.join(common_dir, 'dwi')
adc_common_dir = os.path.join(common_dir, 'adc')
for fname in tqdm(file_name_ls):
    folder = os.path.splitext(fname)[0]
    dwi_save_path = os.path.join(savepoint, folder, 'dwi')
    adc_save_path = os.path.join(savepoint, folder, 'adc')
    gen_new_dir(dwi_save_path)
    gen_new_dir(adc_save_path)
    dwi_file_path = os.path.join(dwi_common_dir, fname)
    adc_file_path = os.path.join(adc_common_dir, fname)
    if os.path.isfile(dwi_file_path):
        save_2d_from_3d(np.load(dwi_file_path),dwi_save_path)
        save_2d_from_3d(np.load(adc_file_path),adc_save_path)

In [None]:
common_dir = '/home/ncp/workspace/blocks1/refined_mask_resample'

In [None]:
load_file_path(common_dir, NP_EXTENSION)[0]

In [None]:
os.listdir('/home/ncp/workspace/blocks2/dicom_to_png_2d_resample/brain_mri_2013-2681')

In [None]:
np.load(mask_path).astype(np.uint8).max()

In [None]:
common_dir = '/home/ncp/workspace/blocks1/refined_mask_resample'
savepoint =  '/home/ncp/workspace/blocks2/refined_mask_resample_2d/'

for mask_path in tqdm(load_file_path(common_dir, NP_EXTENSION)):
    folder = os.path.splitext(os.path.basename(mask_path))[0]
    mask_save_path = os.path.join(savepoint, folder)
    gen_new_dir(mask_save_path)
    save_2d_from_3d(np.load(mask_path).astype(np.uint8),mask_save_path)

In [None]:
os.path.isfile('/home/ncp/workspace/blocks1/dicom_to_np_2dnorm_resample/adc/brain_mri_2013-3536.npy')

In [None]:
np.load('/home/ncp/workspace/blocks1/dicom_to_np_2dnorm_resample/adc/brain_mri_2013-3536.npy').shape

In [None]:
os.path.splitext(fname[0])[0]

In [None]:
common_dir = '/home/ncp/workspace/202002n050/050.신경계 질환 관련 임상 및 진료 데이터'
savepoint =  '/home/ncp/workspace/blocks1/totalmask'
for fname, folder in tqdm(case_name_folder):
    _, mask_dir = find_aihub_img_label_dirs(fname, folder)
    copy_dir = os.path.join(savepoint, fname)
    try:
        shutil.copytree(mask_dir, copy_dir)
    except:
        pass

In [None]:
sorted(os.listdir('/home/ncp/workspace/blocks1/totalmask/brain_mri_1000107'))

In [None]:
def save_arr_to_np(arr, savepoint, fname):
    np.save(os.path.join(savepoint, fname+'.npy'), arr)
    #print(f'Saved! {fname}')

In [None]:
def gen_2d_slices_to_3d(img_2d_path_ls):
    img_3d = np.stack([np.array(Image.open(p)) for p in img_2d_path_ls], axis=0)
    img_3d = resample_3d(img_3d)
    img_3d = np.where(img_3d==1, 1, 0).astype(np.float32)
    return img_3d

In [None]:
def find_dwi_adc_dir(img_folder_dir, fname):
    dwi_folder_dir = os.path.join(img_folder_dir, fname, 'dwi')
    adc_folder_dir = os.path.join(img_folder_dir, fname, 'adc')
    if (os.path.isdir(dwi_folder_dir)) & (os.path.isdir(adc_folder_dir)):
        return dwi_folder_dir, adc_folder_dir
    else:
        return None

In [None]:
def find_mask_dir(mask_folder_dir, fname):
    mask_folder_dir = os.path.join(mask_folder_dir, fname)
    if (os.path.isdir(mask_folder_dir)):
        return mask_folder_dir
    else:
        return None

In [None]:
save_dir = '/home/ncp/workspace/blocks/refined_mask'

img_common_dir = '/home/ncp/workspace/blocks1/dicom_to_png_2d/'
mask_common_dir = '/home/ncp/workspace/blocks1/totalmask'

for fname in tqdm(sorted(os.listdir(img_common_dir))):
    dwi_adc_folder_dir = find_dwi_adc_dir(img_common_dir, fname)
    mask_folder_dir = find_mask_dir(mask_common_dir, fname)
    save_point = os.path.join(save_dir, fname)
    gen_new_dir(save_point)
    if dwi_adc_folder_dir:
        dwi_folder_dir, adc_folder_dir = dwi_adc_folder_dir
        dwi_path_ls = load_file_path(dwi_folder_dir, IMG_EXTENSION)
        dwi_img = np.array(Image.open(dwi_path_ls[0]))
        for idx in range(len(dwi_path_ls)):
            if mask_folder_dir:
                mask_path = os.path.join(mask_folder_dir, str(idx).zfill(3)+'.png')
                if os.path.isfile(mask_path):
                    mask_img = np.array(Image.open(mask_path))
                else:
                    mask_img = np.zeros_like(dwi_img)
                Image.fromarray(mask_img).save(os.path.join(save_point, str(idx).zfill(3)+'.png'))
            else:
                mask_img = np.zeros_like(dwi_img)
                Image.fromarray(mask_img).save(os.path.join(save_point, str(idx).zfill(3)+'.png'))
    #load_file_path(savepoint, IMG_EXTENSION)

In [None]:
savepoint = '/home/ncp/workspace/blocks1/refined_mask_resample'
gen_new_dir(savepoint)
mask_common_dir = '/home/ncp/workspace/blocks/refined_mask'
mask_fname_ls = os.listdir(mask_common_dir)
for fname_ in tqdm(mask_fname_ls):
    mask_path_ls = sorted(load_file_path(os.path.join(mask_common_dir, fname_), IMG_EXTENSION))
    img_3d = gen_2d_slices_to_3d(mask_path_ls)
    save_arr_to_np(img_3d, savepoint, fname_)

In [None]:
img_3d.max()

In [None]:
def gen_2d_slices_to_3d(img_2d_path_ls):
    img_3d = np.stack([np.array(Image.open(p)) for p in img_2d_path_ls], axis=0)
    img_3d = resample_3d(img_3d)
    img_3d = np.where(img_3d, 1, 0).astype(np.float32)
    return img_3d

In [None]:
dwi_path_ls = sorted(load_file_path(dwi_folder_dir, IMG_EXTENSION))
adc_path_ls = sorted(load_file_path(adc_folder_dir, IMG_EXTENSION))
list(zip(dwi_path_ls,adc_path_ls))

In [None]:
img_common_dir = '/home/ncp/workspace/blocks1/dicom_to_png_2d/'
mask_common_dir = '/home/ncp/workspace/blocks1/totalmask'

for fname in tqdm(sorted(os.listdir(img_common_dir))):
    dwi_adc_folder_dir = find_dwi_adc_dir(img_common_dir, fname)
    if dwi_adc_folder_dir:
        dwi_folder_dir, adc_folder_dir = dwi_adc_folder_dir
        dwi_path_ls = load_file_path(dwi_folder_dir, IMG_EXTENSION)
        adc_path_ls = load_file_path(adc_folder_dir, IMG_EXTENSION)
        if len(dwi_path_ls) == len(adc_path_ls):
            pass
        else:
            print(fname)

In [None]:
savepoint = '/home/ncp/workspace/blocks1/refined_mask_resample'
gen_new_dir(savepoint)
mask_common_dir = '/home/ncp/workspace/blocks/refined_mask'
mask_fname_ls = os.listdir(mask_common_dir)
for fname_ in tqdm(mask_fname_ls):
    mask_path_ls = sorted(load_file_path(os.path.join(mask_common_dir, fname_), IMG_EXTENSION))
    img_3d = gen_2d_slices_to_3d(mask_path_ls)
    save_arr_to_np(img_3d, savepoint, fname_)

In [None]:
img_3d.max()

In [None]:
def gen_2d_slices_to_3d(img_2d_path_ls):
    img_3d = np.stack([np.array(Image.open(p)) for p in img_2d_path_ls], axis=0)
    img_3d = resample_3d(img_3d)
    img_3d = np.where(img_3d, 1, 0).astype(np.float32)
    return img_3d

In [None]:
def resize_and_save_2d(im_3d, save_point):
    file_name = 0
    for im_2d in im_3d[::-1]:
        resized_img = Image.fromarray(im_2d).resize((256,256))
        resized_img.save(os.path.join(save_point, str(file_name).zfill(3)+'.png'))
        file_name += 1

In [None]:
for dwi_p, adc_p in dwi_adc_pair:
    dwi_3d_im = np.load(dwi_p)
    adc_3d_im = np.load(adc_p)
    

In [None]:
loadpoint = '/home/ncp/workspace/blocks1/dicom_to_np_3dnorm'
dwi_loadpoint = os.path.join(loadpoint, 'dwi')
adc_loadpoint = os.path.join(loadpoint, 'adc')

savepoint = '/home/ncp/workspace/blocks1/dicom_to_np_3dnorm_resample'
dwi_savepoint = os.path.join(savepoint, 'dwi')
adc_savepoint = os.path.join(savepoint, 'adc')
gen_new_dir(dwi_savepoint)
gen_new_dir(adc_savepoint)

dwi_npy_paths = load_file_path(dwi_loadpoint, NP_EXTENSION)
adc_npy_paths = load_file_path(adc_loadpoint, NP_EXTENSION)

for p in tqdm(dwi_npy_paths):
    fname = os.path.splitext(os.path.basename(p))[0]
    dwi_3d_im = np.load(p)
    dwi_3d_im = resample_3d(dwi_3d_im)
    save_arr_to_np(dwi_3d_im, dwi_savepoint, fname)
    
for p in tqdm(adc_npy_paths):
    fname = os.path.splitext(os.path.basename(p))[0]
    adc_3d_im = np.load(p)
    adc_3d_im = resample_3d(adc_3d_im)
    save_arr_to_np(adc_3d_im, adc_savepoint, fname)

In [None]:
savepoint = '/home/ncp/workspace/blocks1/dicom_to_np_2dnorm_resample'
dwi_savepoint = os.path.join(savepoint, 'dwi')
adc_savepoint = os.path.join(savepoint, 'adc')
gen_new_dir(dwi_savepoint)
gen_new_dir(adc_savepoint)

for fname_, dwi_adc_ls_ in tqdm(fname_dicom_dict_load.items()):
    dwi_path_ls, adc_path_ls = dwi_adc_ls_
    if dwi_path_ls == []:
        pass
    else:
        _, dwi_3d_im = load_mr_scans(dwi_path_ls)
        dwi_3d_im = resample_3d(dwi_3d_im)
        save_arr_to_np(dwi_3d_im, dwi_savepoint, fname_)
    if adc_path_ls == None:
        pass
    else:
        _, adc_3d_im = load_mr_scans(adc_path_ls)
        adc_3d_im = resample_3d(adc_3d_im)
        save_arr_to_np(adc_3d_im, adc_savepoint, fname_)

In [None]:
def load_mr_scans(dcm_paths):
    #dcm_paths = sorted(load_file_path(patient_folder_path, DCM_EXTENSION))
    
    slices = [pydicom.read_file(dcm_path, force=True) for dcm_path in dcm_paths]
    #slices = [s for s in slices if 'DIFFUSION' in s.ImageType]
    #slices = [s for s in slices if '*ep_b1000t' == s.SequenceName]
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]), reverse=True)
    
    
    
    images = np.stack([file.pixel_array for file in slices])
    images = normalize(images)
    return slices, images

In [None]:
savepoint = '/home/ncp/workspace/blocks1/dicom_to_np_3dnorm_resample'
dwi_savepoint = os.path.join(savepoint, 'dwi')
adc_savepoint = os.path.join(savepoint, 'adc')
gen_new_dir(dwi_savepoint)
gen_new_dir(adc_savepoint)

for fname_, dwi_adc_ls_ in tqdm(fname_dicom_dict_load.items()):
    dwi_path_ls, adc_path_ls = dwi_adc_ls_
    if dwi_path_ls == []:
        pass
    else:
        _, dwi_3d_im = load_mr_scans(dwi_path_ls)
        dwi_3d_im = resample_3d(dwi_3d_im)
        save_arr_to_np(dwi_3d_im, dwi_savepoint, fname_)
    if adc_path_ls == None:
        pass
    else:
        _, adc_3d_im = load_mr_scans(adc_path_ls)
        adc_3d_im = resample_3d(adc_3d_im)
        save_arr_to_np(adc_3d_im, adc_savepoint, fname_)