Sigmoid Gradient (Brain + Subdural + Bone) Windowing and save as png images
https://www.kaggle.com/dcstang/see-like-a-radiologist-with-systematic-windowing/comments

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import pydicom
import os
from tqdm import tqdm
from joblib import delayed, Parallel
import glob
import cv2

In [2]:
def get_first_of_dicom_field_as_int(x):
    #get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
    if type(x) == pydicom.multival.MultiValue:
        return int(x[0])
    else:
        return int(x)
    
def get_windowing(data):
    dicom_fields = [data[('0028','1050')].value, #window center
                    data[('0028','1051')].value, #window width
                    data[('0028','1052')].value, #intercept
                    data[('0028','1053')].value] #slope
    return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]

def sigmoid_window(img, window_center, window_width, U=1.0, eps=(1.0 / 255.0)):
    _, _, intercept, slope = get_windowing(img)
    img = img.pixel_array * slope + intercept
    ue = np.log((U / eps) - 1.0)
    W = (2 / window_width) * ue
    b = ((-2 * window_center) / window_width) * ue
    z = W * img + b
    img = U / (1 + np.power(np.e, -1.0 * z))
    img = (img - np.min(img)) / (np.max(img) - np.min(img))
    return img

def sigmoid_bsb_window(img):
    brain_img = sigmoid_window(img, 40, 80)
    subdural_img = sigmoid_window(img, 80, 200)
    bone_img = sigmoid_window(img, 600, 2000)
    
    bsb_img = np.zeros((brain_img.shape[0], brain_img.shape[1], 3))
    bsb_img[:, :, 0] = brain_img
    bsb_img[:, :, 1] = subdural_img
    bsb_img[:, :, 2] = bone_img
    return bsb_img

def map_to_gradient_sig(grey_img):
    rainbow_img = np.zeros((grey_img.shape[0], grey_img.shape[1], 3))
    rainbow_img[:, :, 0] = np.clip(4*grey_img - 2, 0, 1.0) * (grey_img > 0.01) * (grey_img <= 1.0)
    rainbow_img[:, :, 1] =  np.clip(4*grey_img * (grey_img <=0.75), 0,1) + np.clip((-4*grey_img + 4) * (grey_img > 0.75), 0, 1)
    rainbow_img[:, :, 2] = np.clip(-4*grey_img + 2, 0, 1.0) * (grey_img > 0.01) * (grey_img <= 1.0)
    return rainbow_img

def sigmoid_rainbow_bsb_window(img):
    brain_img = sigmoid_window(img, 40, 80)
    subdural_img = sigmoid_window(img, 80, 200)
    bone_img = sigmoid_window(img, 600, 2000)
    combo = (brain_img*0.35 + subdural_img*0.5 + bone_img*0.15)
    combo_norm = (combo - np.min(combo)) / (np.max(combo) - np.min(combo))
    return map_to_gradient_sig(combo_norm)

In [3]:
def convert_dicom_to_jpg(dicomfile, outputdir):
    try:
        data = pydicom.read_file(dicomfile)
        img_processed = sigmoid_rainbow_bsb_window(data)
        img_processed = img_processed * 255
        id = dicomfile.split("/")[-1].split(".")[0]
        output_image = os.path.join(outputdir, id + ".jpg")
        cv2.imwrite(output_image, img_processed)
    except:
        print(dicomfile)

def extract_images(inputdir, outputdir):
    os.makedirs(outputdir, exist_ok=True)
    files = glob.glob(inputdir + "/*.dcm")
    Parallel(n_jobs=8)(delayed(convert_dicom_to_jpg)(file, outputdir) for file in tqdm(files, total=len(files)))

In [4]:
inputdir = '/home/jupyter/stage_1_train_images/'
outputdir = '/home/jupyter/train_images_bsb_jpg/'

In [None]:
extract_images(inputdir, outputdir)

 11%|█         | 71800/674258 [29:43<3:56:58, 42.37it/s] 

In [None]:
inputdir = '/home/jupyter/stage_1_test_images/'
outputdir = '/home/jupyter/test_images_bsb_jpg/'

In [None]:
extract_images(inputdir, outputdir)

In [None]:
# look at some to check
# check size
# also do test set

In [3]:
files = sorted(glob.glob("/home/jupyter/train_images_bsb_jpg/*.jpg")) # 674257
print(len(files)) # 1 corrupted file: ID_6431af929

674257


In [5]:
files = sorted(glob.glob("/home/jupyter/test_images_bsb_jpg/*.jpg"))
print(len(files)) # 1 corrupted file: ID_6431af929

78545


In [None]:
TRAIN_IMG_PATH = "/home/jupyter/train_images_bsb/"
TEST_IMG_PATH = "/home/jupyter/test_images_bsb/"

def view_images(images, title = '', aug = None):
    width = 5
    height = 2
    fig, axs = plt.subplots(height, width, figsize=(15,5))
    
    for im in range(0, height * width):
        data = pydicom.read_file(os.path.join(TRAIN_IMG_PATH, images[im]+ '.dcm'))
        image = data.pixel_array
        window_center , window_width, intercept, slope = get_windowing(data)
        image_windowed = window_image(image, window_center, window_width, intercept, slope)


        i = im // width
        j = im % width
        axs[i,j].imshow(image_windowed, cmap=plt.cm.bone) 
        axs[i,j].axis('off')
        
    plt.suptitle(title)
    plt.show()

In [7]:
train = pd.read_csv('train.csv', index_col=0)
val = pd.read_csv('val.csv', index_col=0)
test = pd.read_csv('test.csv', index_col=0)

In [8]:
train.head()

Unnamed: 0,filename,PatientID,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
0,ID_231d901c1.jpg,ID_b81a287f,1,0,0,0,1,0
2,ID_127689cce.jpg,ID_42910d3d,0,0,0,0,0,0
3,ID_25457734a.jpg,ID_329aafa7,0,0,0,0,0,0
4,ID_81c9aa125.jpg,ID_6b544c3c,0,0,0,0,0,0
5,ID_87e8b2528.jpg,ID_d6e578fb,0,0,0,0,0,0


In [9]:
train['image'] = train['ID'].str.slice(stop=12) # separate image ID and put in a column
train_df['diagnosis'] = train_df['ID'].str.slice(start=13) # separate the condition and put in a column. They have the condition if there is a 1 next to the condition

view_images(train_df[(train_df['diagnosis'] == 'epidural') & (train_df['Label'] == 1)][:10].image.values, title = 'Images with epidural')

NameError: name 'train_df' is not defined

In [None]:
view_images(train_df[(train_df['diagnosis'] == 'intraparenchymal') & (train_df['Label'] == 1)][:10].image.values, title = 'Images with intraparenchymal')

In [None]:
view_images(train_df[(train_df['diagnosis'] == 'intraventricular')& (train_df['Label'] == 1)][:10].image.values, title = 'Images with intraventricular')

In [None]:
view_images(train_df[(train_df['diagnosis'] == 'subarachnoid')& (train_df['Label'] == 1)][:10].image.values, title = 'Images with subarachnoid')

In [None]:
view_images(train_df[(train_df['diagnosis'] == 'subdural') & (train_df['Label'] == 1)][:10].image.values, title = 'Images with subarachnoid')

In [1]:
!du -h train_images_bsb 

324K	train_images_bsb/.ipynb_checkpoints
69G	train_images_bsb


In [2]:
!du -h test_images_bsb 

8.2G	test_images_bsb


In [6]:
train_dcm = sorted(glob.glob("/home/jupyter/stage_1_train_images/*.dcm"))
test_dcm = sorted(glob.glob("/home/jupyter/stage_1_test_images/*.dcm"))
print("train .dcm files: ", len(train))
print("test .dcm files: ", len(test))

NameError: name 'train' is not defined

In [2]:
train_png = sorted(glob.glob("/home/jupyter/train_images_bsb/*.png"))
test_png = sorted(glob.glob("/home/jupyter/test_images_bsb/*.png"))
print("train .png files: ", len(train_png))
print("test .png files: ", len(test_png))

train .png files:  674257
test .png files:  78545


In [10]:
train_png[0:5]

['/home/jupyter/train_images_bsb/ID_000039fa0.png',
 '/home/jupyter/train_images_bsb/ID_00005679d.png',
 '/home/jupyter/train_images_bsb/ID_00008ce3c.png',
 '/home/jupyter/train_images_bsb/ID_0000950d7.png',
 '/home/jupyter/train_images_bsb/ID_0000aee4b.png']

In [3]:
# make 299 x 299 images

In [25]:
def save_and_resize(filenames, load_dir, save_dir):    
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for filename in tqdm(filenames):
        path = os.path.join(load_dir, filename)
        new_path = os.path.join(save_dir, filename)
        im = cv2.imread(path)
        resized = cv2.resize(im, (299, 299))
        res = cv2.imwrite(new_path, resized)    

In [26]:
train_load_dir = '/home/jupyter/train_images_bsb/'
test_load_dir = '/home/jupyter/test_images_bsb/'

train_save_dir = '/home/jupyter/train_images_bsb_299/'
test_save_dir = 'home/jupyter/test_images_bsb_299/'

In [27]:
train_filenames = os.listdir('/home/jupyter/train_images_bsb')
test_filenames = os.listdir('/home/jupyter/test_images_bsb')

In [28]:
#save_and_resize(filenames=train_filenames, load_dir=train_load_dir, save_dir=train_save_dir)

 57%|█████▋    | 382849/674258 [4:57:36<3:19:24, 24.36it/s] 

error: OpenCV(4.1.1) /io/opencv/modules/imgproc/src/resize.cpp:3720: error: (-215:Assertion failed) !ssize.empty() in function 'resize'


In [None]:
save_and_resize(filenames=test_filenames, load_dir=test_load_dir, save_dir=test_save_dir)

 57%|█████▋    | 382849/674258 [4:57:50<3:19:24, 24.36it/s]