In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/Study/DL main project

/content/drive/MyDrive/Study/DL main project


source: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image

# **IMPORT LIBRARIES**

In [None]:
!pip install pydicom

Collecting pydicom
[?25l  Downloading https://files.pythonhosted.org/packages/f4/15/df16546bc59bfca390cf072d473fb2c8acd4231636f64356593a63137e55/pydicom-2.1.2-py3-none-any.whl (1.9MB)
[K     |████████████████████████████████| 1.9MB 6.5MB/s 
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.1.2


In [None]:
import os

from PIL import Image
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
from pydicom import dcmread
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [None]:
def get_xray(path):
    dicom = dcmread(path)
    return dicom

def xray_to_nparray(dicom,voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array

    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

def nparray_to_img(array, size = None, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
    im = Image.fromarray(array)
    if size == None:
        return im
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    
    return im

def xray_to_img(path,size = None,keep_ratio=False,voi_lut = True, fix_monochrome = True, resample=Image.LANCZOS):
    dicom = get_xray(path)
    data = xray_to_nparray(dicom,voi_lut=voi_lut,fix_monochrome=fix_monochrome)
    img = nparray_to_img(data,size = size, keep_ratio=keep_ratio, resample=resample)
    return img

In [None]:
# process csv
def split_train_csv(load_path,save_dir):
    data = pd.read_csv(load_path)
    #split train to each class
    class_ids = data['class_id'].unique()
    class_ids.sort()
    os.makedirs(save_dir,exist_ok=True)
    file_names = []
    for class_id in tqdm(class_ids):
        file_name = '{:02d}.csv'.format(class_id)
        file_names.append(file_name)
        data[data['class_id']==class_id].reset_index().to_csv(os.path.join(save_dir,file_name))
    return save_dir, file_names

In [None]:
# Process dicom
def get_raw_data_by_class(df,class_id,load_dir,save_dir = None):
    file_names = df[df['class_id']==class_id]['image_id'].unique()
    print()
    data = []
    for file_name in tqdm(file_names):
        path_dicom = os.path.join(load_dir,file_name+'.dicom')
        dicom = get_xray(path_dicom)
        xray = dicom.pixel_array
        if save_dir is not None:
            os.makedirs(save_dir,exist_ok=True)
            path_npy = os.path.join(save_dir,file_name+'.npy')
            if os.path.isfile(path_npy) is False:
                np.save(path_npy,xray)
        data.append([file_name,xray])        
    return data
def get_png_by_class(df,class_id,load_dir,save_dir = None):
    file_names = df[df['class_id']==class_id]['image_id'].unique()
    data = []
    for file_name in tqdm(file_names):
        path_dicom = os.path.join(load_dir,file_name+'.dicom')
        img = xray_to_img(path_dicom)
        if save_dir is not None:
            os.makedirs(save_dir,exist_ok=True)
            path_png = os.path.join(save_dir,file_name+'.png')
            if os.path.isfile(path_png) is False:
                img.save(path_png)
        data.append([file_name,img])        
    return data

In [None]:
dicom_dir = './train/dicom'
mask_dir = './train/mask'
npy_dir = './train/npy'
png_dir = './train/png'
csv_path = './csv'

# **Proccess**

## *split train.csv*

In [None]:
main_csv = os.path.join(csv_path,'train.csv')
csv_folder, csv_names= split_train_csv(main_csv,csv_path)

100%|██████████| 15/15 [00:00<00:00, 30.25it/s]


## *spit class 14 to batch (first batch_size = class 12)*

In [None]:
csv_14_path = os.path.join(csv_path,csv_names[14])
class_14 = pd.read_csv(csv_14_path)

In [None]:
csv_12_path = os.path.join(csv_path,csv_names[12])
class_12 = pd.read_csv(csv_12_path)

In [None]:
nb_14 = len(class_14['image_id'].unique())
nb_12 = len(class_12['image_id'].unique())
batch_size = 150
nb_batch = int((nb_14-nb_12)/batch_size)

In [None]:
csv_14_batch = os.path.join(csv_path,'14')
os.makedirs(csv_14_batch,exist_ok=True)
for i in range(nb_batch+1):
    s = (i-1)*batch_size+nb_12
    e = i*batch_size+nb_12
    if i == 0:
        s = 0; e = nb_12
    save_path = os.path.join(csv_14_batch,'{:03d}.csv'.format(i))
    class_14[class_14['image_id'].isin(class_14['image_id'].unique()[s:e])].to_csv(save_path)

## *Proccess 14 - 001*

In [None]:
class_14_001 = pd.read_csv('./csv/14/001.csv')

In [None]:
raw_data = get_raw_data_by_class(class_14_001,14,'./train/dicom/14/001','./train/npy/14/001')

In [None]:
png_data = get_png_by_class(class_14_001,14,'./train/dicom/14/001','./train/png/14/001')

HBox(children=(FloatProgress(value=0.0, max=150.0), HTML(value='')))

  f"The (0028,0101) 'Bits Stored' value ({ds.BitsStored}-bit) "
  f"The (0028,0101) 'Bits Stored' value ({ds.BitsStored}-bit) "



