<a href="https://www.kaggle.com/code/johnycoder/rsna-make-dataset-segmentation?scriptVersionId=146250535" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
# Imports
import numpy as np, pandas as pd, SimpleITK as sitk, matplotlib.pyplot as plt, os
import tensorflow as tf, pydicom as dicom, cv2, matplotlib as mpl, nibabel as nib
from sklearn.model_selection import train_test_split
from scipy import ndimage
from PIL import Image
import time

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [2]:
# Some constants intialisation
TRAIN_IMG_PATH = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images'
PICS_PER_PACIENT = 20
IMG_SIZE = [256,256]
TARGET_COLS  = [
        "bowel_injury", "extravasation_injury",
        "kidney_healthy", "kidney_low", "kidney_high",
        "liver_healthy", "liver_low", "liver_high",
        "spleen_healthy", "spleen_low", "spleen_high",
    ]

In [3]:
# Getting dataframe linking series_id of segmented scans with respective patient_id
mask_ser = [x[:-4] for x in os.listdir("/kaggle/input/rsna-2023-abdominal-trauma-detection/segmentations")]
masks_img = pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_series_meta.csv')
masks_img['series_id'] = masks_img['series_id'].astype(str)
masks_img['patient_id'] = masks_img['patient_id'].astype(str)
#masks_red = masks_img[masks_img['series_id'].isin(masks)]
masks_red = masks_img.drop(['aortic_hu', 'incomplete_organ'], axis = 1)
display(masks_red) # there are in total 206 segmented images

Unnamed: 0,patient_id,series_id
0,10004,21057
1,10004,51033
2,10005,18667
3,10007,47578
4,10026,29700
...,...,...
4706,9961,2003
4707,9961,63032
4708,9980,40214
4709,9980,40466


In [4]:
def display_scan_with_mask(number):
    # plotting stuff
    image_shape = (512,512)
    num_images = 12
    
    #reading list of dicoms for respective patient_id and ordering them
    patient_id = str(masks_red.iloc[number][0])
    series_id = str(masks_red.iloc[number][1])
    should_predict=False if series_id in masks else True
    print(should_predict)
    if should_predict==False : img_data = nib.load('/kaggle/input/rsna-2023-abdominal-trauma-detection/segmentations/'+series_id+'.nii').get_fdata()
    dicoms = [int(x[:-4]) for x in os.listdir('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/'+patient_id+'/'+series_id)]
    dicoms = [str(x)+'.dcm' for x in sorted(dicoms)]
    step = int((len(dicoms)/(PICS_PER_PACIENT)))
    #index_to_load = list(range(step,len(dicoms)-step,step))
    #print(index_to_load)
    step = int((len(dicoms)/(PICS_PER_PACIENT)))
    for i, img in enumerate(range(step,len(dicoms)-step,int((len(dicoms)/(PICS_PER_PACIENT))))):
        alpha = 0.7
        mask = []
        ds = dicom.dcmread('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/'+patient_id+'/'+series_id+'/'+dicoms[img])
        scan = (np.stack((ds.pixel_array,) * 3, axis=-1) - np.min(ds.pixel_array))/(np.max(ds.pixel_array) - np.min(ds.pixel_array)) #normalising
        if should_predict:
            model = tf.keras.models.load_model('/kaggle/input/rsna-segmentation/rsna_segmentation.keras')
            tf_scan = tf.image.resize(scan, (128,128), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            tf_scan = tf.expand_dims(tf_scan, axis=0)
            mask = model.predict(tf_scan)
            mask = tf.argmax(mask, axis=-1)
            NORM = mpl.colors.Normalize(vmin=0, vmax=6)
            mask = mpl.colormaps['turbo'](mask[0]/5)[:, :, :3]
            mask = cv2.resize(mask, (512, 512))
            mask = mask.astype(np.float64)
            combined = cv2.addWeighted(scan, alpha, mask, 1 - alpha, 0)
            plt.imshow(combined)
            plt.show()
        else:
            alpha = 0.7
            ds = dicom.dcmread('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/'+patient_id+'/'+series_id+'/'+dicoms[img])
            scan = (np.stack((ds.pixel_array,) * 3, axis=-1) - np.min(ds.pixel_array))/(np.max(ds.pixel_array) - np.min(ds.pixel_array)) #normalising
            mask = mpl.colormaps['turbo'](img_data[:, :, -img] /5)[:, :, :3]
            combined = cv2.addWeighted(scan, alpha, np.rot90(mask), 1 - alpha, 0)
            plt.imshow(combined)
            plt.show()
        
        #axes[i].imshow(combined)
        #axes[i].axis('off')
    #plt.tight_layout()
    #plt.show()

#for i in range(1,200,20):
 #   display_scan_with_mask(i) # enter any number up to 205

In [5]:
def dataset_maker(full_df, part_to_use):
    how_many = int(len(full_df)*part_to_use)
    smalltrain = train.head(how_many)
    full_df['patient_id'] = full_df['patient_id'].astype(str)
    X_train, X_valid, y_train, y_valid = train_test_split(full_df.patient_id, full_df.drop(['patient_id'], axis = 1), test_size = 0.2)
    y_train['patient_id'] = X_train
    y_valid['patient_id'] = X_valid
    train_dataset = y_train
    valid_dataset = y_valid
    return train_dataset, valid_dataset

train = pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-detection/train.csv')
masks_patient = masks_red.patient_id.to_list()
train['patient_id'] = train['patient_id'].astype(str)
train = train[train['patient_id'].isin(masks_patient)]
#train = pd.merge(train, masks_red, on='patient_id', how='outer')
train_dataset, valid_dataset = dataset_maker(train, 0.3)  #cant use full dataset since I dont have enough gpu left

train_dataset = pd.merge(train_dataset, masks_red, on='patient_id', how='left')
valid_dataset = pd.merge(valid_dataset, masks_red, on='patient_id', how='left')
display(train_dataset)

Unnamed: 0,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high,any_injury,patient_id,series_id
0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,43381,43057
1,1,0,0,1,1,0,0,1,0,0,0,1,0,1,41360,30952
2,1,0,0,1,1,0,0,1,0,0,0,1,0,1,41360,43088
3,1,0,1,0,1,0,0,1,0,0,1,0,0,0,39548,39049
4,1,0,1,0,1,0,0,1,0,0,1,0,0,0,63833,25377
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3757,1,0,1,0,1,0,0,1,0,0,1,0,0,0,37637,58774
3758,1,0,1,0,1,0,0,1,0,0,1,0,0,0,49274,41815
3759,1,0,1,0,1,0,0,1,0,0,1,0,0,0,49274,49616
3760,1,0,0,1,1,0,0,1,0,0,1,0,0,1,15472,14668


In [6]:
def save_img(row, output_path, new_df):
    patient_id = row['patient_id']
    series_id = row['series_id']
    masks = [x[:-4] for x in os.listdir("/kaggle/input/rsna-2023-abdominal-trauma-detection/segmentations")]
    should_predict=False if series_id in masks else True
    if should_predict==False : img_data = nib.load('/kaggle/input/rsna-2023-abdominal-trauma-detection/segmentations/'+series_id+'.nii').get_fdata()
    labels = row[TARGET_COLS]#.values
    dicom_files = [int(x[:-4]) for x in os.listdir('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/'+patient_id+'/'+series_id)]
    dicoms = [str(x)+'.dcm' for x in sorted(dicom_files)]
    step = int((len(dicoms)/(PICS_PER_PACIENT)))
    dicoms = [dicom.dcmread('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/'+patient_id+'/'+series_id+'/'+dicoms[i]) for i in range(step,len(dicoms)-step, step)]
    dicoms = [ds.pixel_array for ds in dicoms]
    scans = [(np.stack((dicom,) * 3, axis=-1) - np.min(dicom))/(np.max(dicom) - np.min(dicom)) for dicom in dicoms]
    masks = []
    if should_predict:
            model = tf.keras.models.load_model('/kaggle/input/rsna-segmentation/rsna_segmentation.keras')
            tf_scan = tf.image.resize(scans, (128,128), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            #tf_scan = tf.expand_dims(tf_scan, axis=0)
            masks = model.predict(tf_scan)
    for i, img in enumerate(range(step,len(dicom_files)-step,step)):
        alpha = 0.7
        combined = []
        scan = scans[i]  #normalising
        if should_predict:
            mask = tf.argmax(masks[i], axis=-1)
            NORM = mpl.colors.Normalize(vmin=0, vmax=6)
            mask = mpl.colormaps['turbo'](mask/5)[:, :, :3]
            mask = cv2.resize(mask, (512, 512))
            mask = mask.astype(np.float64)
        else:
            mask = np.rot90(mpl.colormaps['turbo'](img_data[:, :, -img] /5)[:, :, :3])
        try:
            combined = cv2.addWeighted(scan, alpha,mask, 1 - alpha, 0)
            res = (cv2.resize(combined, dsize=IMG_SIZE, interpolation=cv2.INTER_CUBIC)*255).astype(np.uint8)
            image = Image.fromarray(res)
            os.chdir(output_path)
            image.save(patient_id+'_'+series_id+'_'+str(i)+'.png')
            labels['file'] = patient_id+'_'+series_id+'_'+str(i)+'.png'
            new_df = pd.concat([new_df, labels.to_frame().T], ignore_index=True)
        except:
            print('UNSUCCESFUL')
            print('Series:'+series_id+', Patient:'+patient_id)
    return new_df

def save_img_to_numpy(dataset, output_dir, limit):
    new_df = pd.DataFrame(columns = TARGET_COLS)
    if not os.path.isdir('/kaggle/working'+output_dir): os.mkdir('/kaggle/working'+output_dir)
    image_array = []
    label_array = []
    os.chdir('/kaggle/working'+output_dir)
    for index, row in dataset.iterrows():
        new_df = save_img(row, '/kaggle/working'+output_dir, new_df)
        end_time = time.time()
        elapsed_time = end_time - start_time
        print(elapsed_time)
        if elapsed_time > limit:
            break
            print('/kaggle/working'+output_dir[:-7]+'.csv')
            new_df.to_csv('/kaggle/working'+output_dir[:-7]+'.csv')

#print(type(train_dataset))
#save_img_to_numpy(train_dataset.head(), '/smalltrain_images')
#save_img_to_numpy(valid_dataset.head(), '/smallvalid_images')
print('run')
start_time = time.time()
save_img_to_numpy(train_dataset, '/train_images', limit = 23000)
save_img_to_numpy(valid_dataset, '/valid_images', limit = 28800)
"""
save_img_to_numpy(smalltrain_dataset, '/kaggle/working/smalltrain_images')
save_img_to_numpy(smallvalid_dataset, '/kaggle/working/smallvalid_images')
"""

run
19.768857955932617
23.000232458114624
25.674214601516724
42.93218684196472
60.39053273200989
77.04690074920654
95.296058177948
113.302898645401
129.92293739318848
146.94647479057312
164.41048431396484
182.47192931175232
200.0274260044098
218.94724655151367
236.19263195991516
254.16880297660828
271.00065302848816
288.5684187412262
305.42281198501587
322.63574504852295
340.24506521224976
357.06695914268494
374.3885054588318
391.6547338962555
408.7414882183075
426.3402497768402
443.7823717594147
460.52440643310547
478.19722032546997
495.1347110271454
512.908371925354
530.9044671058655
547.6004378795624
566.5922772884369
583.2921860218048
600.2388327121735
617.706903219223
UNSUCCESFUL
Series:31294, Patient:19664
UNSUCCESFUL
Series:31294, Patient:19664
UNSUCCESFUL
Series:31294, Patient:19664
UNSUCCESFUL
Series:31294, Patient:19664
UNSUCCESFUL
Series:31294, Patient:19664
UNSUCCESFUL
Series:31294, Patient:19664
UNSUCCESFUL
Series:31294, Patient:19664
UNSUCCESFUL
Series:31294, Patient:1966

  scans = [(np.stack((dicom,) * 3, axis=-1) - np.min(dicom))/(np.max(dicom) - np.min(dicom)) for dicom in dicoms]


UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
UNSUCCESFUL
Series:4344, Patient:49314
7318.802409410477
7335.807914495468
7353.539115428925
7372.205478429794
7389.2827932834625
7407.4236035346985
7424.113694429398
7442.991760492325
7460.130222558975
7478.131371498108
7497.059954881668
7514.119572162628
7534.6152780056
7551.75913977623
7569.246

  scans = [(np.stack((dicom,) * 3, axis=-1) - np.min(dicom))/(np.max(dicom) - np.min(dicom)) for dicom in dicoms]


16354.067237615585
16372.657076835632
16389.690574884415
16409.501664161682
16427.15496611595
16448.887865304947
16466.031003713608
16485.830109119415
16503.02318072319
16520.701412916183
16537.743363380432
16556.655080795288
16574.134290218353
16592.02592611313
16610.42317867279
16628.41911125183
16646.856288194656
16663.91722226143
16682.21062231064
16704.905146837234
16723.333758592606
16742.126076459885
16758.939910888672
16777.681535959244
16794.6726834774
16812.809282779694
16829.781126737595
16848.731292963028
16865.600614070892
16882.800362348557
16902.26500248909
16919.030173778534
16937.920391082764
16954.453686714172
16975.296550273895
16992.023970127106
17010.126080989838
17026.978353977203
17044.79328894615
17063.450890541077
17083.99610352516
17103.32583117485
17120.38990879059
17123.179866552353
17126.449507951736
17145.423782110214
17162.189507484436
17180.53251361847
17197.417382001877
17215.40554523468
17233.20260810852
17250.21940088272
17269.05038189888
UNSUCCESFUL


"\nsave_img_to_numpy(smalltrain_dataset, '/kaggle/working/smalltrain_images')\nsave_img_to_numpy(smallvalid_dataset, '/kaggle/working/smallvalid_images')\n"

# Credits

This notebook was forked from https://www.kaggle.com/code/awsaf49/rsna-atd-cnn-tpu-train

Evaluation metric from https://www.kaggle.com/code/metric/rsna-trauma-metric/notebook