In [1]:
import os
import os
import cv2
import glob
import torch
import pickle 
import pydicom
import zipfile
import numpy as np

import pandas as pd
import seaborn as sns
import torch.nn as nn

from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset

from tqdm import tqdm
from joblib import Parallel, delayed
from pydicom.pixel_data_handlers.util import apply_voi_lut
from torchvision.transforms.v2 import Resize, Compose, RandomHorizontalFlip, ColorJitter, RandomAffine, RandomErasing, ToTensor



In [2]:
with open('/kaggle/input/highest-liver/highest_liver_dict.pkl', 'rb') as f:
    hdi = pickle.load(f)

In [3]:
conv_range = {}
for patient in tqdm(hdi):
    liv_det = hdi[patient] 
    series = int(liv_det.split("/")[-2])
    dcm_no = liv_det.split("/")[-1]
    dcm_no = int(dcm_no[:-4])
    list_dcms = os.listdir(f'/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/{patient}/{series}')
    list_dcms = [int(x[:-4]) for x in list_dcms]
    list_dcms.sort(reverse=False)
    idx = dcm_no - list_dcms[0]
    if (idx+30>=len(list_dcms)):
        continue
    if(idx-30<0):
        continue
    conv_list = list_dcms[idx-30:idx+30]
    conv_list = [f'/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/{patient}/{series}/{x}.dcm' for x in conv_list]
    conv_range[patient] = conv_list
        

100%|██████████| 3147/3147 [04:40<00:00, 11.22it/s]


In [4]:
len(conv_range[10004])

60

In [5]:
with open('conv_range.pkl', 'wb') as f:
    pickle.dump(conv_range, f)

In [6]:
def standardize_pixel_array(dcm: pydicom.dataset.FileDataset) -> np.ndarray:
    """
    Source : https://www.kaggle.com/competitions/rsna-2023-abdominal-trauma-detection/discussion/427217
    """
    # Correct DICOM pixel_array if PixelRepresentation == 1.
    pixel_array = dcm.pixel_array
    if dcm.PixelRepresentation == 1:
        bit_shift = dcm.BitsAllocated - dcm.BitsStored
        dtype = pixel_array.dtype 
        pixel_array = (pixel_array << bit_shift).astype(dtype) >>  bit_shift
#         pixel_array = pydicom.pixel_data_handlers.util.apply_modality_lut(new_array, dcm)

    intercept = float(dcm.RescaleIntercept)
    slope = float(dcm.RescaleSlope)
    center = int(dcm.WindowCenter)
    width = int(dcm.WindowWidth)
    low = center - width / 2
    high = center + width / 2    
    
    pixel_array = (pixel_array * slope) + intercept
    pixel_array = np.clip(pixel_array, low, high)

    return pixel_array

out_dataset_root = '/kaggle/working/liver_images/'
os.makedirs(out_dataset_root, exist_ok=True)

In [7]:
def process(patient, size=512, save_folder=""):
#     out_path = os.path.join(out_dataset_root,str(patient))
#     os.makedirs(out_path,exist_ok=True)
    imgs = {}
    for f in (conv_range[patient]):
        dicom = pydicom.dcmread(f)

        pos_z = dicom[(0x20, 0x32)].value[-1]

        img = standardize_pixel_array(dicom)
        img = (img - img.min()) / (img.max() - img.min() + 1e-6)

        if dicom.PhotometricInterpretation == "MONOCHROME1":
            img = 1 - img
        
        imgs[pos_z] = img

    for i, k in enumerate(sorted(imgs.keys())):
        img = imgs[k]

        if size is not None:
            img = cv2.resize(img, (size, size))
            
#         cv2.imwrite(out_path + f"/{i}.png", (img * 255).astype(np.uint8))
#         cv2.imwrite(save_folder + f"/{patient}" + f"/{i}.png", (img * 255).astype(np.uint8))
        im = cv2.imencode('.png', (img * 255).astype(np.uint8))[1]
        save_folder.writestr(f'{patient}/{i}.png', im)

In [8]:
with zipfile.ZipFile("output.zip", 'w') as save_folder:
    for patient in tqdm(conv_range):
        process(patient, size=None, save_folder=save_folder)

100%|██████████| 2717/2717 [1:03:50<00:00,  1.41s/it]
