***For each patient, we run the organ detection resnet. Then we find out for each patient which of the slices contain the organs. The results are stored in highest_liver_dict dictionary.***

In [None]:
import os
import os
import cv2
import glob
import torch
import pickle 
import pydicom
import zipfile
import numpy as np

import pandas as pd
import seaborn as sns
import torch.nn as nn

from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset

from tqdm import tqdm
from joblib import Parallel, delayed
from pydicom.pixel_data_handlers.util import apply_voi_lut
from torchvision.transforms.v2 import Resize, Compose, RandomHorizontalFlip, ColorJitter, RandomAffine, RandomErasing, ToTensor

In [None]:
list_dcms = list(os.listdir(f'/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/10004/21057'))
paths = list_dcms[::50]

In [None]:
look_liver= {}
for name in tqdm(os.listdir('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/')):
    for name2 in os.listdir(f'/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/{name}'):
        list_dcms = list(os.listdir(f'/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/{name}/{name2}'))
        list_dcms = [int(x[:-4]) for x in list_dcms]
        list_dcms.sort(reverse=False)
        fifty = list_dcms[::10]
        fifty = [f'/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/{name}/{name2}/{x}.dcm'for x in fifty]
        look_liver[int(name)] = fifty

In [None]:
len(look_liver[10004])

In [None]:
def standardize_pixel_array(dcm: pydicom.dataset.FileDataset) -> np.ndarray:
    """
    Source : https://www.kaggle.com/competitions/rsna-2023-abdominal-trauma-detection/discussion/427217
    """
    # Correct DICOM pixel_array if PixelRepresentation == 1.
    pixel_array = dcm.pixel_array
    if dcm.PixelRepresentation == 1:
        bit_shift = dcm.BitsAllocated - dcm.BitsStored
        dtype = pixel_array.dtype 
        pixel_array = (pixel_array << bit_shift).astype(dtype) >>  bit_shift
#         pixel_array = pydicom.pixel_data_handlers.util.apply_modality_lut(new_array, dcm)

    intercept = float(dcm.RescaleIntercept)
    slope = float(dcm.RescaleSlope)
    center = int(dcm.WindowCenter)
    width = int(dcm.WindowWidth)
    low = center - width / 2
    high = center + width / 2    
    
    pixel_array = (pixel_array * slope) + intercept
    pixel_array = np.clip(pixel_array, low, high)

    return pixel_array

In [None]:
out_dataset_root = '/tmp/fifty/'
os.makedirs(out_dataset_root, exist_ok=True)

In [None]:
f = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/14846/42425/640.dcm'
key = f.split("/")[-1]
key = int(key[:-4])
key

In [None]:
def process(patient, size=512):
    out_path = os.path.join(out_dataset_root,str(patient))
    os.makedirs(out_path,exist_ok=True)
    imgs = {}
    for f in (look_liver[patient]):
        dicom = pydicom.dcmread(f)

#         pos_z = dicom[(0x20, 0x32)].value[-1]
        key = f.split("/")[-1]
        key = int(key[:-4])
        img = standardize_pixel_array(dicom)
        img = (img - img.min()) / (img.max() - img.min() + 1e-6)

        if dicom.PhotometricInterpretation == "MONOCHROME1":
            img = 1 - img

        imgs[key] = img

    for i, k in enumerate(sorted(imgs.keys())):
        img = imgs[k]

        if size is not None:
            img = cv2.resize(img, (size, size))
            
        cv2.imwrite(out_path + f"/{k}.png", (img * 255).astype(np.uint8))
#         if isinstance(save_folder, str):
#             cv2.imwrite(save_folder + f"{patient}_{study}_{i}.png", (img * 255).astype(np.uint8))
#         else:
#             im = cv2.imencode('.png', (img * 255).astype(np.uint8))[1]
#             save_folder.writestr(f'{patient}_{study}_{i:04d}.png', im)

In [None]:
for patient in tqdm(look_liver):
    process(patient, size=224)

In [None]:
test_transforms = Compose([
                            ToTensor(),
                        ])

In [None]:
# dataset
class LiverData(Dataset):
    
    def __init__(self, paths, transform=None):
        
        super().__init__()                
        self.paths = paths
        self.transform = transform
    
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):

        path = self.paths[idx]
        img = Image.open(path).convert('RGB')

        if self.transform is not None:
            img = self.transform(img)
                                
        
        return {
            'image': img,
        }

In [None]:
class CNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        
        model = timm.create_model('resnet18d', pretrained=False)
        model.fc = nn.Sequential(
                nn.Linear(in_features=512, out_features=100, bias=True),
                nn.BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                nn.ReLU(inplace=True),
        )
        self.model = model
        self.fin = nn.Linear(100, 1)
        
    def forward(self, x):
        
        # extract features
        x = self.model(x)
        
        # output logits
        out = self.fin(x)

        return out

In [None]:
model = torch.load('/kaggle/input/kidney-det/best_resnet_18d_kiddet.pth')
model = model.to('cuda')

In [None]:
len(os.listdir('/tmp/fifty/'))

In [None]:
highest_liver_dict = {}
for patient in tqdm(list(os.listdir('/tmp/fifty/'))):
    patient = int(patient)
    path_l = list(os.listdir(f'/tmp/fifty/{str(patient)}'))
    path_l = [int(x[:-4]) for x in path_l]
    path_l.sort(reverse=False)
    paths = [f'/tmp/fifty/{str(patient)}/{x}.png' for x in path_l]

    data = LiverData(paths,test_transforms)
    loader = DataLoader(data,batch_size = len(look_liver[patient]), shuffle = False)
    
    for batch_data in (loader):
                                          
        inputs = batch_data['image'].to('cuda')
        out = model(inputs)
        
        probabilities = torch.sigmoid(out)
        probs = probabilities.detach().cpu().numpy()
        idx = np.argmax(probs)
        highest_liver_dict[patient] = look_liver[patient][idx]

In [None]:
with open('highest_liver_dict.pkl', 'wb') as f:
    pickle.dump(highest_liver_dict, f)