<a href="https://www.kaggle.com/code/johnycoder/rsna-make-dataset?scriptVersionId=144779375" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import numpy as np, pandas as pd, SimpleITK as sitk, matplotlib.pyplot as plt, os
import tensorflow as tf, pydicom as dicom, cv2
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
TRAIN_IMG_PATH = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images'
PICS_PER_PACIENT = 1
IMG_SIZE = [224,224]
EPOCHS = 25
TARGET_COLS  = [
        "bowel_injury", "extravasation_injury",
        "kidney_healthy", "kidney_low", "kidney_high",
        "liver_healthy", "liver_low", "liver_high",
        "spleen_healthy", "spleen_low", "spleen_high",
    ]

In [None]:
def dataset_maker(full_df, part_to_use):
    how_many = int(len(full_df)*part_to_use)
    smalltrain = train.head(how_many)
    full_df['patient_id'] = full_df['patient_id'].astype(str)
    X_train, X_valid, y_train, y_valid = train_test_split(full_df.patient_id, full_df.drop(['patient_id'], axis = 1), test_size = 0.2)
    y_train['patient_id'] = X_train
    y_valid['patient_id'] = X_valid
    train_dataset = y_train
    valid_dataset = y_valid
    return train_dataset, valid_dataset

train = pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-detection/train.csv')
train_dataset, valid_dataset = dataset_maker(train, 1)
mediumtrain_dataset, mediumvalid_dataset = dataset_maker(train, 0.3)
smalltrain_dataset, smallvalid_dataset = dataset_maker(train, 0.1)

In [None]:

def save_img(patient_id, labels, output_path):
    directory = os.listdir(os.path.join(TRAIN_IMG_PATH, patient_id))[0] #taking only the first one, dont know what the second does yet
    files = os.listdir(os.path.join(TRAIN_IMG_PATH, patient_id, directory))
    files.sort(key = lambda x: int(x.split('.')[0]))
    img_arrays = []
    if PICS_PER_PACIENT == 1:
        file = files[int(len(files)/2)]
        file = os.path.join(TRAIN_IMG_PATH, patient_id, directory, file)
        ds = dicom.dcmread(file)
        res = cv2.resize(ds.pixel_array, dsize=IMG_SIZE, interpolation=cv2.INTER_CUBIC)
        res_norm = (res-np.min(res))/(np.max(res)-np.min(res))
        img_arrays.append(res_norm)
    else:
        files = files[PICS_PER_PACIENT::int(len(files)//PICS_PER_PACIENT)]
        files = [os.path.join(TRAIN_IMG_PATH, patient_id, directory, x) for x in files]
        for img in files:
            ds = dicom.dcmread(img)
            res = cv2.resize(ds.pixel_array, dsize=IMG_SIZE, interpolation=cv2.INTER_CUBIC)
            res_norm = (res-np.min(res))/(np.max(res)-np.min(res))
            img_arrays.append(res_norm)
    img_3d = np.array(img_arrays)
    labels = np.array(labels)
    labels = labels.astype(np.int16)
    labels = [labels.copy() for _ in range(PICS_PER_PACIENT)]
    return img_3d, labels

def save_img_to_numpy(dataset, output_path):
    if not os.path.isdir(output_path): os.mkdir(output_path)
    image_array = []
    label_array = []
    os.chdir(output_path)
    for index, row in dataset.iterrows():
        image, labels = save_img(row['patient_id'], row[TARGET_COLS].values, output_path)
        image_array.extend(image)
        label_array.extend(labels)
    name = output_path.split('/')[-1]
    np.savez(name, **{'images':np.array(image_array), 'labels':np.array(label_array)})

#print(type(train_dataset))
save_img_to_numpy(train_dataset, '/kaggle/working/train_images')
save_img_to_numpy(valid_dataset, '/kaggle/working/valid_images')
save_img_to_numpy(mediumtrain_dataset, '/kaggle/working/mediumtrain_images')
save_img_to_numpy(mediumvalid_dataset, '/kaggle/working/mediumvalid_images')
save_img_to_numpy(smalltrain_dataset, '/kaggle/working/smalltrain_images')
save_img_to_numpy(smallvalid_dataset, '/kaggle/working/smallvalid_images')

# Credits

This notebook was forked from https://www.kaggle.com/code/awsaf49/rsna-atd-cnn-tpu-train

Evaluation metric from https://www.kaggle.com/code/metric/rsna-trauma-metric/notebook