In [88]:
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
import glob

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

In [89]:
### CONFIG CLASS ###

# From preprocessing
root = '/kaggle/input/rsna-2023-abdominal-trauma-detection'
test_size = 0.2
target_col = ['bowel_healthy', 'bowel_injury', 'extravasation_healthy', 'extravasation_injury', 'kidney_healthy', 'kidney_low', 'kidney_high', 'liver_healthy', 'liver_low', 'liver_high', 'spleen_healthy', 'spleen_low', 'spleen_high']
outliers_patients = ['31284', '10275']

# Specific to model
train_path = '/kaggle/input/fr-wagon-preprocess-dcm3-zoom/output/train' # 815 files
val_path = '/kaggle/input/fr-wagon-preprocess-dcm3-zoom/output/val' # 185 files

# tester_path = '/kaggle/input/fr-wagon-preprocess-dcm2/output/train/*.npy'
BATCH_SIZE = 1
EPOCHS = 10
AUTOTUNE = tf.data.AUTOTUNE

In [90]:
# From preprocessing

# GET TRAIN DATA FUNCTIONS

def y_patient() -> pd.DataFrame:
    y_patient = pd.read_csv(f"{root}/train.csv")
    y_patient['patient_id'] = y_patient['patient_id'].astype(str)
    y_patient.drop([y_patient[ y_patient['patient_id'] == out].index.values[0] for out in outliers_patients], axis=0, inplace=True)
    return y_patient

def serie_patient() -> pd.DataFrame:
    dcm_parquet=pd.read_parquet(f"{root}/train_dicom_tags.parquet", engine='pyarrow')
    series_patients=pd.DataFrame()
    series_patients["patient_id"] = dcm_parquet["PatientID"].astype(str)
    series_patients["serie_id"] = dcm_parquet["SeriesInstanceUID"].apply(lambda x: x.split(".")[-1])
    series_patients["SliceThickness"] = dcm_parquet["SliceThickness"]

    return series_patients

# SPLIT FUNCTIONS

def split_group(group, test_size=test_size):
    if len(group) == 1:
        return (group, pd.DataFrame()) if np.random.rand() < test_size else (pd.DataFrame(), group)
    else:
        return train_test_split(group, test_size=test_size, random_state=42)
    
def split_patients(y_patient):
    y_train_patient = pd.DataFrame()
    y_val_patient = pd.DataFrame()

    for i, group in y_patient.groupby(target_col):
        train_group, val_group = split_group(group, test_size=0.2)
        y_train_patient = pd.concat([y_train_patient, train_group], ignore_index=True)
        y_val_patient = pd.concat([y_val_patient, val_group], ignore_index=True)
        
    return y_train_patient, y_val_patient

# GET DATA
y_patients = y_patient()
series_patients = serie_patient()

# SPLIT AT PATIENT LEVEL
y_train_patient, y_val_patient = split_patients(y_patients)

# SPLIT AT SERIE LEVEL
y_train_serie = y_train_patient.merge(series_patients, how='left', on='patient_id')
y_val_serie = y_val_patient.merge(series_patients, how='left', on='patient_id')

In [91]:
xy_train_serie = y_train_serie.drop(['patient_id', 'SliceThickness'], axis=1).drop_duplicates()
xy_train_serie['path'] = xy_train_serie['serie_id'].apply(lambda row : f"{train_path}/{row}_dcm.npz" )
train_serie_id = [str(file.split('_')[0]) for file in os.listdir(train_path)]
xy_train_select = pd.DataFrame(index= range(len(train_serie_id)), columns=xy_train_serie.columns)

for i in range(len(train_serie_id)):
    xy_train_select.iloc[i, :] = xy_train_serie[ xy_train_serie['serie_id'] == train_serie_id[i]].iloc[0]
    
xy_val_serie = y_val_serie.drop(['patient_id', 'SliceThickness'], axis=1).drop_duplicates()
xy_val_serie['path'] = xy_val_serie['serie_id'].apply(lambda row : f"{val_path}/{row}_dcm.npz" )
val_serie_id = [str(file.split('_')[0]) for file in os.listdir(val_path)]
xy_val_select = pd.DataFrame(index= range(len(val_serie_id)), columns=xy_val_serie.columns)

for i in range(len(val_serie_id)):
    if val_serie_id[i] != '13774':
        xy_val_select.iloc[i, :] = xy_val_serie[ xy_val_serie['serie_id'] == val_serie_id[i]].iloc[0]
    else:
        xy_val_select.iloc[i, :] = xy_val_serie[ xy_val_serie['serie_id'] == val_serie_id[i+1]].iloc[0]
    
len(xy_train_select), len(xy_val_select)

(79, 21)

In [184]:
# tf.config.run_functions_eagerly(run_eagerly=True)

def decode_scan_and_label(path, label):
    
#     file_bytes = tf.io.read_file(image_path)
#     image = tf.io.decode_png(file_bytes, channels=3, dtype=tf.uint8)
#     image = tf.image.resize(image, config.IMAGE_SIZE, method="bilinear")
#     image = tf.cast(image, tf.float32) / 255.0

#     file_bytes = tf.io.read_file(path)
#     path = path.numpy()
#     path = bytes.decode(path)
    
#     file_bytes = tf.io.read_file(path)
#     tf.io.decode_raw(file_bytes)
#     path = bytes.decode(file_bytes)
    
    print(type(path))
    scan = np.load(path)['arr_0.npy']
    scan = np.expand_dims(scan, axis=-1)
    scan = tf.cast(scan, tf.uint8)    
    
    label = tf.cast(label, tf.uint8)
    labels = (label[0], labels[2], label[4:7], labels[7:10], labels[10:])
    
    return (scan, labels)

def mappable_func(path, label):
    result = tf.py_function(decode_scan_and_label, [path, label], (tf.uint8, tf.uint8))

    print(result[0].get_shape(), result[1].get_shape())   

    result[0].set_shape([255, 255, 255])
    result[1].set_shape(([1, 1, 3, 3, 3]))
    
    print(result[0].get_shape(), result[1].get_shape())   
    
    return result

def build_dataset(paths, labels):
    dataset = tf.data.Dataset.from_tensor_slices((paths, labels))
    dataset = dataset.map(mappable_func)
    dataset = dataset.shuffle(buffer_size=BATCH_SIZE * 10)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [185]:
train_x_paths = xy_train_select.path.tolist()
train_y_labels = np.asarray(xy_train_select[target_col].values).astype(np.float32)

val_x_paths = xy_val_select.path.tolist()
val_y_labels = np.asarray(xy_val_select[target_col].values).astype(np.float32)

len(train_x_paths), len(val_x_paths)

(79, 21)

In [195]:
dataset = tf.data.Dataset.from_tensor_slices((train_x_paths, train_y_labels))
list(dataset.as_numpy_iterator())

[(b'/kaggle/input/fr-wagon-preprocess-dcm3-zoom/output/train/61569_dcm.npz',
  array([1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0.], dtype=float32)),
 (b'/kaggle/input/fr-wagon-preprocess-dcm3-zoom/output/train/9181_dcm.npz',
  array([1., 0., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0.], dtype=float32)),
 (b'/kaggle/input/fr-wagon-preprocess-dcm3-zoom/output/train/62218_dcm.npz',
  array([1., 0., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0.], dtype=float32)),
 (b'/kaggle/input/fr-wagon-preprocess-dcm3-zoom/output/train/48623_dcm.npz',
  array([1., 0., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0.], dtype=float32)),
 (b'/kaggle/input/fr-wagon-preprocess-dcm3-zoom/output/train/9138_dcm.npz',
  array([1., 0., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0.], dtype=float32)),
 (b'/kaggle/input/fr-wagon-preprocess-dcm3-zoom/output/train/61936_dcm.npz',
  array([1., 0., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0.], dtype=float32)),
 (b'/kaggle/input/fr-wagon-preprocess-dcm3-zoom/output/train/20581

In [186]:
ds_train = build_dataset(train_x_paths, train_y_labels)
ds_val = build_dataset(val_x_paths, val_y_labels)
len(ds_train), len(ds_val)



(79, 21)

In [189]:
ds_train.as_numpy_iterator().next, ds_val.as_numpy_iterator().next

(<bound method _NumpyIterator.next of <tensorflow.python.data.ops.dataset_ops._NumpyIterator object at 0x78ee37fd1ea0>>,
 <bound method _NumpyIterator.next of <tensorflow.python.data.ops.dataset_ops._NumpyIterator object at 0x78ee36e44190>>)

In [190]:
def init_model():
    
    inputs = tf.keras.layers.Input(shape=(255, 255, 255, 1))
    
    x = tf.keras.layers.Conv3D(32, kernel_size=(5, 5, 5), strides=(2, 2, 2), padding='same', activation='relu')(inputs)   
    x = tf.keras.layers.MaxPool3D(pool_size=(2, 2, 2))(x)

    x = tf.keras.layers.Conv3D(64, kernel_size=(3, 3, 3), strides=(2, 2, 2), padding='same', activation='relu')(x)   
    x = tf.keras.layers.MaxPool3D(pool_size=(2, 2, 2))(x)

    x = tf.keras.layers.Conv3D(128, kernel_size=(3, 3, 3), activation='relu')(x)
    x = tf.keras.layers.MaxPool3D(pool_size=(2, 2, 2))(x)
    
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.GlobalMaxPooling3D()(x)
    
    x = tf.keras.layers.Flatten()(x)

    x = tf.keras.layers.Dense(1042, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.4)(x)
    
    x = tf.keras.layers.Dense(521, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.4)(x)
    
    x_extra = tf.keras.layers.Dense(20, activation='relu')(x)
    x_bowel = tf.keras.layers.Dense(20, activation='relu')(x)
    x_kidney = tf.keras.layers.Dense(20, activation='relu')(x)
    x_spleen = tf.keras.layers.Dense(20, activation='relu')(x)
    x_liver = tf.keras.layers.Dense(20, activation='relu')(x)
    
    extra = tf.keras.layers.Dense(1, name='extra', activation='sigmoid')(x_extra)
    bowel = tf.keras.layers.Dense(1, name='bowel', activation='sigmoid')(x_bowel)
    kidney = tf.keras.layers.Dense(3, name='kidney', activation='softmax')(x_kidney)
    spleen = tf.keras.layers.Dense(3, name='spleen', activation='softmax')(x_spleen)
    liver = tf.keras.layers.Dense(3, name='liver', activation='softmax')(x_liver)
    
    model = tf.keras.Model(inputs=inputs, outputs=(extra, bowel, kidney, spleen, liver))
    
    return model

In [191]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    
loss = {'extra': tf.keras.losses.BinaryCrossentropy(), 
        'bowel': tf.keras.losses.BinaryCrossentropy(), 
        'kidney': tf.keras.losses.CategoricalCrossentropy(), 
        'spleen': tf.keras.losses.CategoricalCrossentropy(), 
        'liver': tf.keras.losses.CategoricalCrossentropy()}

metrics = {'extra': ['accuracy'], 
           'bowel': ['accuracy'], 
           'kidney': ['accuracy'], 
           'spleen': ['accuracy'],
           'liver': ['accuracy']}

def compile(model):
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    return model

In [192]:
model = init_model()
model = compile(model)
model.summary()

Model: "model_11"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_13 (InputLayer)          [(None, 255, 255, 2  0           []                               
                                55, 1)]                                                           
                                                                                                  
 conv3d_34 (Conv3D)             (None, 128, 128, 12  4032        ['input_13[0][0]']               
                                8, 32)                                                            
                                                                                                  
 max_pooling3d_33 (MaxPooling3D  (None, 64, 64, 64,   0          ['conv3d_34[0][0]']              
 )                              32)                                                        

In [193]:
es = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', 
                   patience=5, 
                   verbose=1, 
                   restore_best_weights=True)

history = model.fit(ds_train,
          callbacks = [es],
          batch_size = 32,
          epochs = EPOCHS, 
          validation_data = ds_val)

Epoch 1/10
<class 'tensorflow.python.framework.ops.EagerTensor'>
<class 'tensorflow.python.framework.ops.EagerTensor'>


InvalidArgumentError: {{function_node __wrapped__IteratorGetNext_output_types_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} TypeError: expected str, bytes or os.PathLike object, not EagerTensor
Traceback (most recent call last):

  File "/opt/conda/lib/python3.10/site-packages/tensorflow/python/ops/script_ops.py", line 265, in __call__
    return func(device, token, args)

  File "/opt/conda/lib/python3.10/site-packages/tensorflow/python/ops/script_ops.py", line 143, in __call__
    outputs = self._call(device, args)

  File "/opt/conda/lib/python3.10/site-packages/tensorflow/python/ops/script_ops.py", line 150, in _call
    ret = self._func(*args)

  File "/opt/conda/lib/python3.10/site-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
    return func(*args, **kwargs)

  File "/tmp/ipykernel_32/3139056352.py", line 19, in decode_scan_and_label
    scan = np.load(path)['arr_0.npy']

  File "/opt/conda/lib/python3.10/site-packages/numpy/lib/npyio.py", line 405, in load
    fid = stack.enter_context(open(os_fspath(file), "rb"))

TypeError: expected str, bytes or os.PathLike object, not EagerTensor


	 [[{{node EagerPyFunc}}]] [Op:IteratorGetNext]

In [None]:
train_x = np.stack([np.expand_dims(np.load(path)['arr_0'], axis=-1) for path in train_x_paths], axis=0)

In [None]:
val_x = np.stack([np.expand_dims(np.load(path)['arr_0'], axis=-1) for path in val_x_paths], axis=0)

In [None]:
train_x.shape

In [None]:
train_y_labels_list = [train_y_labels[:,0], train_y_labels[:,2], train_y_labels[:,3:7], train_y_labels[:,7:10], train_y_labels[:,10:13]] 
val_y_labels_list = [val_y_labels[:,0], val_y_labels[:,2], val_y_labels[:,3:7], val_y_labels[:,7:10], val_y_labels[:,10:13]] 

In [None]:
len(train_y_labels_list)

In [None]:
train_x.shape

In [None]:
es = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', 
                   patience=5, 
                   verbose=1, 
                   restore_best_weights=True)

model.fit(train_x, train_y_labels_list,
          callbacks = [es],
          batch_size = 32,
          epochs = EPOCHS, 
          validation_data = (val_x, val_y_labels_list))

In [None]:
def decode_image_and_label(path):
    path = tf.convert_to_tensor(path)
    path = bytes.decode(path.numpy())
    
    filename = f"{path}".split('/')[-1].split('_')[0]
    array_3D = np.load(path)['arr_0.npy']
    array_tf = tf.cast(array_3D, tf.int64)
    
    serie_id = f"{path}".split('/')[-1].split('_')[0]
    y_label = y_labels[ y_labels['serie_id'] == serie_id].drop('serie_id', axis=1)
    labels = tf.constant(y_label.to_numpy())
    #         bowel          fluid           kidney        liver          spleen
    labels = (labels[0:1], labels[1:2], labels[2:5], labels[5:8], labels[8:11])

    return (array_tf, labels)

# def mappable_func(path):
#     result = tf.py_function(decode_image_and_label, [path], (tf.int64, tf.uint8))
#     return result

def build_dataset(path):
    ds_size = len(list(tf.data.Dataset.list_files(f"{path}/*")))
    
    ds = (
        tf.data.Dataset.list_files(f"{path}/*")
        .map(decode_image_and_label, num_parallel_calls=AUTOTUNE)
        .shuffle(ds_size, reshuffle_each_iteration=False)
        .batch(BATCH_SIZE)
        .prefetch(AUTOTUNE)
    )
    return ds

In [None]:
# decode_image_and_label(f"{train_path}/10048_dcm.npz")
tf.data.Dataset.list_files(f"{train_path}/*.npz").map(decode_image_and_label).prefetch(AUTOTUNE)

In [None]:
len(list(tf.data.Dataset.list_files(f"{train_path}/*")))

In [None]:
ds_train = build_dataset(train_path)
ds_val = build_dataset(val_path)

In [None]:
len(ds_train), iter(ds_train)

In [None]:
images, labels = next(iter(ds_train))
images.shape, [label.shape for label in labels]

In [None]:
decode_image_and_label(f"{train_path}/10048_dcm.npz")

In [None]:
init_model().summary()