# Image preprocessing

Due to dicom data is huge and high resolution  
I have converted it to jpg for enhacing training speed  
Convert Hounsfield units(HU) to 3 type window image as 3 channel image also.  
refer to :[radiopaedia_windowing-ct](https://radiopaedia.org/articles/windowing-ct)  

#### 3 type window used (window center, window width): 
1. brain(40,80) 
2. subdural(80,200)
3. soft(40,380)

In [None]:
import numpy as np
import tqdm
import pandas as pd
import pydicom
import cv2
import glob
import os
import multiprocessing


RESIZED_WIDTH  = 256
RESIZED_HEIGHT = 256
OUTPUT_DIR = os.getcwd()+'RSNATRAIN'

print(OUTPUT_DIR)

if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)


[reference](https://www.kaggle.com/jhoward/cleaning-the-data-for-rapid-prototyping-fastai)

In [None]:
def correct_dcm(img):
    x = img.pixel_array + 1000
    px_mode = 4096
    x[x>=px_mode] = x[x>=px_mode] - px_mode
    img.PixelData = x.tobytes()
    img.RescaleIntercept = -1000

def _window_image(img, window_center, window_width):
    if (img.BitsStored == 12) and (img.PixelRepresentation == 0) and (int(img.RescaleIntercept) > -100):
        correct_dcm(img)
    img = img.pixel_array * img.RescaleSlope + img.RescaleIntercept
    
    img = cv2.resize(img, (RESIZED_WIDTH,RESIZED_HEIGHT), interpolation = cv2.INTER_LINEAR)
    
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img = np.clip(img, img_min, img_max)

    return img

def bsb_window(img):
    brain_img = _window_image(img, 40, 80)
    subdural_img = _window_image(img, 80, 200)
    soft_img = _window_image(img, 40, 380)
    
    brain_img = (brain_img - 0) / 80
    subdural_img = (subdural_img - (-20)) / 200
    soft_img = (soft_img - (-150)) / 380
    
    bsb_img = np.array([brain_img, subdural_img, soft_img]).transpose(1,2,0)
    
    return bsb_img


def save_to_jpg(path):
    
    dcm = pydicom.dcmread(path)

    try:
        img = bsb_window(dcm)
        img = np.round(img*255)
        img = img.astype(np.uint8)
        # img = crop_image(img)
        # img = cv2.resize(img, (RESIZED_WIDTH, RESIZED_HEIGHT))
        # print(img.min(),img.max())
    except:
        print('exception!')
        img = np.zeros((RESIZED_WIDTH, RESIZED_HEIGHT, 3), dtype=np.uint8)
        
    cv2.imwrite(OUTPUT_DIR+"/"+dcm.SOPInstanceUID + '.jpg', img)

In [None]:

#dicom = glob.glob(os.path.join(img_dir, '*.dcm'))

#pool = multiprocessing.Pool(2) # multiprocessing.cpu_count() can be used instead of "4"
#for _ in tqdm.tqdm(pool.imap(save_to_jpg, dicom), total=len(dicom)):
#    pass 