#### Kaggle Setup

In [7]:
# the data root for kaggle submission
# root = '/kaggle/input/rsna-2023-abdominal-trauma-detection'

#### Local Setup

In [1]:
root = '/mnt/d/kaggle/rsna-2023-abdominal-trauma-detection'
%matplotlib notebook

In [None]:
import pandas as pd
from collections import OrderedDict
import pydicom
import os
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.image
import matplotlib.animation as animation
from IPython.display import HTML
import dotenv
import torch
import torchvision as tv
from torchvision.transforms import v2
from torchvision.models import resnet50, ResNet50_Weights
from torch.nn.functional import silu, sigmoid, softmax
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

## Data Loading

In [None]:
train_image_root = f'{root}/train_images'
train_data_path = f'{root}/train.csv'
scan_path = f'{root}/train_series_meta.csv'
injuries_path = f'{root}/image_level_labels.csv'
tag_path = f'{root}/train_dicom_tags.parquet'

In [None]:
labels = pd.read_csv(train_data_path)
scans = pd.read_csv(scan_path)
injuries = pd.read_csv(injuries_path)
tags = pd.read_parquet(tag_path)

In [5]:
class Config:
    TRAIN_SPLIT = 0.95
    LABELS = [
        "bowel_injury", "extravasation_injury",
        "kidney_healthy", "kidney_low", "kidney_high",
        "liver_healthy", "liver_low", "liver_high",
        "spleen_healthy", "spleen_low", "spleen_high",
    ]
    BATCH_SIZE = 128
    IMAGE_SIZE = 512
    NUM_EPOCHS = 10
    VAL_BATCH_SIZE=128
    BATCHES_PER_EVALUATION = 32
torch.cuda.empty_cache()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
config = Config()

## Choosing the Training & Validation Images

In [6]:
label_by_patient = labels.groupby(['patient_id'])
def id_to_labels(row):
    return label_by_patient.get_group(row['patient_id']).squeeze()
# The injuries give us "interesting" images to use
data = pd.concat((injuries[['patient_id']].apply(id_to_labels, axis=1), injuries[['series_id', 'instance_number']]), axis=1)
data['image_path'] = train_image_root + '/' + data['patient_id'].astype(str) + '/' + data['series_id'].astype(str) + '/' + data['instance_number'].astype(str) + ".dcm"

In [7]:
data.head()

Unnamed: 0,patient_id,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high,any_injury,series_id,instance_number,image_path
0,10004,1,0,0,1,0,1,0,1,0,0,0,0,1,1,21057,362,/kaggle/input/rsna-2023-abdominal-trauma-detec...
1,10004,1,0,0,1,0,1,0,1,0,0,0,0,1,1,21057,363,/kaggle/input/rsna-2023-abdominal-trauma-detec...
2,10004,1,0,0,1,0,1,0,1,0,0,0,0,1,1,21057,364,/kaggle/input/rsna-2023-abdominal-trauma-detec...
3,10004,1,0,0,1,0,1,0,1,0,0,0,0,1,1,21057,365,/kaggle/input/rsna-2023-abdominal-trauma-detec...
4,10004,1,0,0,1,0,1,0,1,0,0,0,0,1,1,21057,366,/kaggle/input/rsna-2023-abdominal-trauma-detec...


In [8]:
# TRAIN_SPLIT of the patients will be used for training, the rest for validation
train_people = pd.Series(data['patient_id'].unique()).sample(frac=config.TRAIN_SPLIT)
train = data.loc[data['patient_id'].isin(train_people)]
val = data.loc[~data['patient_id'].isin(train_people)]

train.set_index(np.asarray(range(len(train))), inplace=True)
val.set_index(np.asarray(range(len(val))), inplace=True)

In [9]:
train.head()

Unnamed: 0,patient_id,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high,any_injury,series_id,instance_number,image_path
0,10004,1,0,0,1,0,1,0,1,0,0,0,0,1,1,21057,362,/kaggle/input/rsna-2023-abdominal-trauma-detec...
1,10004,1,0,0,1,0,1,0,1,0,0,0,0,1,1,21057,363,/kaggle/input/rsna-2023-abdominal-trauma-detec...
2,10004,1,0,0,1,0,1,0,1,0,0,0,0,1,1,21057,364,/kaggle/input/rsna-2023-abdominal-trauma-detec...
3,10004,1,0,0,1,0,1,0,1,0,0,0,0,1,1,21057,365,/kaggle/input/rsna-2023-abdominal-trauma-detec...
4,10004,1,0,0,1,0,1,0,1,0,0,0,0,1,1,21057,366,/kaggle/input/rsna-2023-abdominal-trauma-detec...


## Declare Dataset

In [10]:
image_augment = v2.Compose([
    v2.RandomResizedCrop(size=(config.IMAGE_SIZE, config.IMAGE_SIZE), scale=(0.8, 1.0), antialias=True),
])

In [11]:
def read_image(path):
    ds = pydicom.dcmread(path)
    return ds.pixel_array

class MedicalDataset(Dataset):
    def __init__(self, source: pd.DataFrame):
        self.image_paths = source['image_path']
        # 0/1 kidney injury, 0/1 fluid injury, kidney health (3 vals), liver health (3 vals), spleen health (3 vals)
        self.labels = torch.from_numpy(source[config.LABELS].to_numpy().astype('float32'))
        self.source = source
    def __len__(self):
        return len(self.source)
    def __getitem__(self, idx):
        image = torch.from_numpy(np.expand_dims(read_image(self.image_paths[idx]).astype('float32'), 0))
        image -= image.min()
        image /= image.max()

        image = torch.repeat_interleave(image, 3, dim=0)
        image = image_augment(image)
        # print(image.min(), image.max(), image.shape)
        if image.shape != (3, config.IMAGE_SIZE, config.IMAGE_SIZE):
            raise Exception(f'Image at {self.image_paths[idx]} has shape {image.shape}')
        label = (self.labels[idx][0:1], self.labels[idx][1:2], self.labels[idx][2:5], self.labels[idx][5:8], self.labels[idx][8:11])
        return image, label

In [12]:
train_ds = MedicalDataset(train)
val_ds = MedicalDataset(val)
train_dl = DataLoader(train_ds, batch_size=config.BATCH_SIZE, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=config.VAL_BATCH_SIZE)

## Declare Model

In [13]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()

        self.backbone = nn.Sequential(*(list(resnet50(weights=ResNet50_Weights.DEFAULT).children())[:-1]))
        for param in self.backbone.parameters():
            param.requires_grad = False
        self.necks = nn.ModuleList([nn.Linear(2048, 32) for _ in range(5)])
        self.bowel = nn.Linear(32, 1)
        self.fluid = nn.Linear(32, 1)
        self.kidney = nn.Linear(32, 3)
        self.liver = nn.Linear(32, 3)
        self.spleen = nn.Linear(32, 3)

    def forward(self, x):
        x = torch.flatten(self.backbone(x), start_dim=1)
        # bowel, fluid, kidney, liver, spleen
        necks = [silu(self.necks[i](x)) for i in range(5)]

        bowel = torch.flatten(sigmoid(self.bowel(necks[0])), start_dim=1)
        fluid = torch.flatten(sigmoid(self.fluid(necks[1])), start_dim=1)
        kidney = softmax(torch.flatten(self.kidney(necks[2]), start_dim=1), dim=1)
        liver = softmax(torch.flatten(self.liver(necks[3]), start_dim=1), dim=1)
        spleen = softmax(torch.flatten(self.spleen(necks[4]), start_dim=1), dim=1)
        return bowel, fluid, kidney, liver, spleen

In [14]:
classifier = Classifier().to(device)

In [15]:
def calc_loss(output, target): # tuples 4D batched tensors please!
    bce = nn.BCELoss()
    cross = nn.CrossEntropyLoss()
    loss = bce(output[0], target[0]) + bce(output[1], target[1]) + cross(output[2], target[2]) + cross(output[3], target[3]) + cross(output[4], target[4])
    return loss
optimizer = torch.optim.Adam(classifier.parameters())

In [16]:
def to_device(inputs, labels):
    return inputs.to(device), tuple(map(lambda i: i.to(device), labels))

for epoch in range(config.NUM_EPOCHS):
    running_loss = 0.0
    for i, data in enumerate(train_dl):
        inputs, labels = to_device(*data)

        optimizer.zero_grad()
        outputs = classifier(inputs)
        loss = calc_loss(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        print(f'Epoch {epoch} batch {i} loss: {running_loss / (i % config.BATCHES_PER_EVALUATION + 1)} (cur batch: {loss.item()})')
        if i % config.BATCHES_PER_EVALUATION == config.BATCHES_PER_EVALUATION-1:
            total_val_loss = 0.0
            for data in val_dl:
                inputs, labels = to_device(*data)
                outputs = classifier(inputs)
                loss = calc_loss(outputs, labels)
                total_val_loss += loss.item()
            print(f'\t Validation Loss: {total_val_loss / len(val_dl)}')
            running_loss = 0.0

Epoch 0 batch 0 loss: 4.661365985870361 (cur batch: 4.661365985870361)
Epoch 0 batch 1 loss: 4.55441951751709 (cur batch: 4.447473049163818)
Epoch 0 batch 2 loss: 4.452319145202637 (cur batch: 4.2481184005737305)
Epoch 0 batch 3 loss: 4.364541530609131 (cur batch: 4.101208686828613)
Epoch 0 batch 4 loss: 4.298175239562989 (cur batch: 4.032710075378418)
Epoch 0 batch 5 loss: 4.207996924718221 (cur batch: 3.7571053504943848)
Epoch 0 batch 6 loss: 4.137872219085693 (cur batch: 3.7171239852905273)
Epoch 0 batch 7 loss: 4.078884929418564 (cur batch: 3.6659739017486572)
Epoch 0 batch 8 loss: 4.03549697664049 (cur batch: 3.6883933544158936)
Epoch 0 batch 9 loss: 3.9884081363677977 (cur batch: 3.564608573913574)
Epoch 0 batch 10 loss: 3.9481254490939053 (cur batch: 3.5452985763549805)
Epoch 0 batch 11 loss: 3.9284846782684326 (cur batch: 3.7124361991882324)
Epoch 0 batch 12 loss: 3.8989008389986477 (cur batch: 3.5438947677612305)
Epoch 0 batch 13 loss: 3.867074183055333 (cur batch: 3.453327655

In [17]:
torch.save(classifier.state_dict(), 'classifier-v1.pth')