In [1]:
import pandas as pd
import numpy as np
import cv2
from matplotlib import pyplot as plt
import sklearn.metrics
import warnings
from sklearn.exceptions import UserWarning
warnings.filterwarnings('ignore', category=UserWarning)

import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch import nn
from resnet3d import generate_model
from timm.utils import AverageMeter

from tqdm import tqdm
import sys
import glob
import gc
import os

os.environ['CUDA_LAUNCH_BLOCKING']='1'

# Parameters

In [2]:
BASE_PATH = '/home/junseonglee/01_codes/input/rsna-2023-abdominal-trauma-detection'
RESOL = 256
BATCH_SIZE = 8
LR = 0.001
N_EPOCHS = 30
train_df = pd.read_csv(f'{BASE_PATH}/train.csv')
train_meta_df = pd.read_csv(f'{BASE_PATH}/train_meta.csv')
train_df = train_df.sort_values(by=['patient_id'])

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#DEVICE = 'cpu'

# Dataset

In [3]:
class AbdominalCTDataset(Dataset):
    def __init__(self, meta_df):
        self.meta_df = meta_df
    
    def __len__(self):
        return len(self.meta_df)
    
    def __getitem__(self, idx):
        row = self.meta_df.iloc[idx]
        label = row[['bowel_healthy','bowel_injury',
                    'extravasation_healthy','extravasation_injury',
                    'kidney_healthy','kidney_low','kidney_high',
                    'liver_healthy','liver_low','liver_high',
                    'spleen_healthy','spleen_low','spleen_high', 'any_injury']]
        data_3d = cv2.imread(row['path'], cv2.IMREAD_GRAYSCALE)
        data_3d = cv2.equalizeHist(data_3d)
        data_3d = data_3d.reshape(1, RESOL, RESOL, RESOL).astype(float)  # channel, 3D
        #avg std
        data_3d -=47.5739
        data_3d /=34.6175
        data_3d = torch.from_numpy(data_3d.astype(np.float32))
        label = label.to_numpy().astype(np.float32)
                
        #any_injury = label[-1]
        #nu_any_injury = np.zeros(2)
        #nu_any_injury[int(any_injury)]= 1
        
        #label = np.hstack([label[:-1], nu_any_injury])
        label = torch.from_numpy(label)
        return data_3d, label        

train_dataset = AbdominalCTDataset(train_meta_df)
data_3d, label = train_dataset[0]
print(label)

del train_dataset, data_3d, label
gc.collect()

tensor([1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 1.])


16

In [4]:
'''
#normalizatino parameter
train_dataset = AbdominalCTDataset(train_meta_df)
data_3d, label = train_dataset[0]

avgs = np.zeros(len(train_dataset))
stds = np.zeros(len(train_dataset))
for i in tqdm(range(0, len(train_dataset))):
    data_3d, label = train_dataset[i]
    data_3d = data_3d.numpy()
    avgs[i] = np.average(data_3d)
    stds[i] = np.std(data_3d)
print(np.average(avgs))
print(np.average(stds))    

del train_dataset, data_3d, label, avgs, stds
gc.collect()
'''

'\n#normalizatino parameter\ntrain_dataset = AbdominalCTDataset(train_meta_df)\ndata_3d, label = train_dataset[0]\n\navgs = np.zeros(len(train_dataset))\nstds = np.zeros(len(train_dataset))\nfor i in tqdm(range(0, len(train_dataset))):\n    data_3d, label = train_dataset[i]\n    data_3d = data_3d.numpy()\n    avgs[i] = np.average(data_3d)\n    stds[i] = np.std(data_3d)\nprint(np.average(avgs))\nprint(np.average(stds))    \n\ndel train_dataset, data_3d, label, avgs, stds\ngc.collect()\n'

# Model

In [5]:
class AbdominalClassifier(nn.Module):
    def __init__(self, model_depth, device = DEVICE):
        super().__init__()
        self.device = device
        self.resnet3d = generate_model(model_depth = model_depth, n_input_channels = 1)
        self.flatten  = nn.Flatten()
        self.dropout  = nn.Dropout(p=0.5)
        self.softmax  = nn.Softmax(dim=1)
        self.sigmoid  = nn.Sigmoid()
        size_res_out  = 56832
        self.fc_bowel = nn.Linear(size_res_out, 2)
        self.fc_extrav= nn.Linear(size_res_out, 2)
        self.fc_kidney= nn.Linear(size_res_out, 3)
        self.fc_liver = nn.Linear(size_res_out, 3)
        self.fc_spleen= nn.Linear(size_res_out, 3)
        
        self.maxpool  = nn.MaxPool1d(5, 1)

    def forward(self, x):
        x = self.resnet3d(x)
        for i in range(0, 4):
            x[i] = self.flatten(x[i])
        x = torch.cat(x, axis = 1)
        x     = self.dropout(x)
        bowel = self.fc_bowel(x)
        extrav= self.fc_extrav(x)
        kidney= self.fc_kidney(x)
        liver = self.fc_liver(x)
        spleen= self.fc_spleen(x)

        labels = torch.cat([bowel, extrav, kidney, liver, spleen], dim = 1)

        bowel_soft = self.softmax(bowel)
        extrav_soft = self.softmax(extrav)
        kidney_soft = self.softmax(kidney)
        liver_soft = self.softmax(liver)
        spleen_soft = self.softmax(spleen)

        any_in = torch.cat([1-bowel_soft[:,0:1], 1-extrav_soft[:,0:1], 
                            1-kidney_soft[:,0:1], 1-liver_soft[:,0:1], 1-spleen_soft[:,0:1]], dim = 1) 
        any_in = self.maxpool(any_in)
        any_not_in = 1-any_in
        any_in = torch.cat([any_not_in, any_in], dim = 1)

        return labels, any_in

In [6]:
model = AbdominalClassifier(10)

def get_n_params(model):
    pp=0
    for p in list(model.parameters()):
        nn=1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp

print(get_n_params(model))
del model
gc.collect()

15094349


0

In [7]:
train_dataset = AbdominalCTDataset(train_meta_df[train_meta_df['fold']!=0])
valid_dataset = AbdominalCTDataset(train_meta_df[train_meta_df['fold']==0])

train_loader = DataLoader(dataset = train_dataset, shuffle = True, batch_size = BATCH_SIZE, pin_memory = True, 
                          num_workers = 8, drop_last = False)

valid_loader = DataLoader(dataset = valid_dataset, shuffle = False, batch_size = BATCH_SIZE, pin_memory = True, 
                          num_workers = 8, drop_last = False)                        

# Train

In [8]:
model = AbdominalClassifier(18)
model.to(DEVICE)

optimizer = torch.optim.AdamW(model.parameters(), lr = LR)
ttl_iters = N_EPOCHS * len(train_loader)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=LR, 
                                                steps_per_epoch=len(train_loader), epochs = N_EPOCHS)
#scheduler = CosineAnnealingLR(optimizer, T_max=ttl_iters, eta_min=1e-6)


weights = np.ones(2)
weights[1] = 2
crit_bowel  = nn.CrossEntropyLoss(weight = torch.from_numpy(weights).to(DEVICE))
weights[1] = 6
crit_extrav = nn.CrossEntropyLoss(weight = torch.from_numpy(weights).to(DEVICE))
crit_any = nn.CrossEntropyLoss(weight = torch.from_numpy(weights).to(DEVICE))

weights = np.ones((3))
weights[1] = 2
weights[2] = 4
crit_kidney = nn.CrossEntropyLoss(weight = torch.from_numpy(weights).to(DEVICE))
crit_liver  = nn.CrossEntropyLoss(weight = torch.from_numpy(weights).to(DEVICE))
crit_spleen = nn.CrossEntropyLoss(weight = torch.from_numpy(weights).to(DEVICE))


In [9]:
def normalize_to_one(tensor):
    norm = torch.sum(tensor, 1)
    for i in range(0, tensor.shape[1]):
        tensor[:,i]/=norm
    return tensor

def apply_softmax_to_labels(X_out):
    softmax = nn.Softmax(dim=1)

    X_out[:,:2]    = normalize_to_one(softmax(X_out[:,:2]))
    X_out[:,2:4]   = normalize_to_one(softmax(X_out[:,2:4]))
    X_out[:,4:7]   = normalize_to_one(softmax(X_out[:,4:7]))
    X_out[:,7:10]  = normalize_to_one(softmax(X_out[:,7:10]))
    X_out[:,10:13] = normalize_to_one(softmax(X_out[:,10:13]))

    return X_out

def calculate_score(X_outs, ys):
    bowel_weights =  ys[:,0] + 2*ys[:,1]
    extrav_weights = ys[:,2] + 6*ys[:,3]
    kidney_weights = ys[:,4] + 2*ys[:,5] + 4*ys[:,6]
    liver_weights  = ys[:,7] + 2*ys[:,8] + 4*ys[:,9]
    spleen_weights = ys[:,10] + 2*ys[:,11] + 4*ys[:,12]
    any_in_weights = ys[:,13] + 6*ys[:,14]

    loss = (
             sklearn.metrics.log_loss(ys[:,:2], X_outs[:,:2], sample_weight = bowel_weights)
           + sklearn.metrics.log_loss(ys[:,2:4], X_outs[:,2:4], sample_weight = extrav_weights)
           + sklearn.metrics.log_loss(ys[:,4:7], X_outs[:,4:7], sample_weight = kidney_weights)
           + sklearn.metrics.log_loss(ys[:,7:10], X_outs[:,7:10], sample_weight = liver_weights)
           + sklearn.metrics.log_loss(ys[:,10:13], X_outs[:,10:13], sample_weight = spleen_weights)
           + sklearn.metrics.log_loss(ys[:,13:15], X_outs[:,13:15], sample_weight =  any_in_weights)
           ) / 6
    return loss

In [10]:
if __name__ == '__main__':
    scaler = torch.cuda.amp.GradScaler(enabled=True)
    val_metrics = np.ones(N_EPOCHS)*100

    for epoch in range(0, N_EPOCHS):
        train_meters = {'loss': AverageMeter()}
        val_meters   = {'loss': AverageMeter()}
        
        model.train()
        pbar = tqdm(train_loader, leave=False)    
        for X, y in pbar:
            batch_size = X.shape[0]
            X, y = X.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            
            with torch.cuda.amp.autocast(enabled=True):
                X_out, X_any  = model(X)
                loss  = crit_bowel(X_out[:,:2], y[:,:2])
                loss += crit_extrav(X_out[:,2:4], y[:,2:4])
                loss += crit_kidney(X_out[:,4:7], y[:,4:7])
                loss += crit_liver(X_out[:,7:10], y[:,7:10])
                loss += crit_spleen(X_out[:,10:13], y[:,10:13])

                loss += crit_any(X_any,  torch.cat([torch.ones(batch_size, 1).to(DEVICE)- y[:,13:14],y[:,13:14]], dim = 1))  
                loss /= 6
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scheduler.step()
                scaler.update()          

            trn_loss = loss.item()      
            train_meters['loss'].update(trn_loss, n=X.size(0))     
            pbar.set_description(f'Train loss: {trn_loss}')   
        print('Epoch {:d} / trn/loss={:.4f}'.format(epoch+1, train_meters['loss'].avg))    
        

        X_outs=[]
        ys=[]
        model.eval()
        for X, y in tqdm(valid_loader, leave=False):
            batch_size = X.shape[0]        
            X, y = X.to(DEVICE), y.to(DEVICE)
            with torch.cuda.amp.autocast(enabled=True):        
                with torch.no_grad():                 
                    X_out, X_any = model(X)                                           
                    y_any = torch.cat([torch.ones(batch_size, 1).to(DEVICE)- y[:,13:14],y[:,13:14]], dim = 1)              
                              
                    X_out = apply_softmax_to_labels(X_out).to('cpu').numpy()

                    X_any = X_any.to('cpu').numpy()
                    X_out = np.hstack([X_out, X_any])
                    X_outs.append(X_out)

                    y     = y.to('cpu').numpy()[:,:-1]
                    y_any = y_any.to('cpu').numpy()
                    y     = np.hstack([y, y_any])
                    ys.append(y)

        X_outs = np.vstack(X_outs) 
        ys     = np.vstack(ys)
        metric = calculate_score(X_outs, ys)                
        print('Epoch {:d} / val/loss={:.4f}'.format(epoch+1, metric))   
        
        #Save the best model    
        if(metric < np.min(val_metrics)):
            try:
                os.makedirs(f'{BASE_PATH}/weights')
            except:
                a = 1
            best_metric = metric
            print(f'Best val_metric {best_metric} at epoch {epoch+1}!')
            torch.save(model, f'{BASE_PATH}/weights/best.pt')    
        val_metrics[epoch] = metric
        

  return F.conv3d(
                                                                                  

Epoch 1 / trn/loss=0.9407




Epoch 1 / val/loss=0.8085
Best val_metric 0.8085478295159066 at epoch 1!


                                                                                  

Epoch 2 / trn/loss=0.9467




Epoch 2 / val/loss=0.6616
Best val_metric 0.6616173343306172 at epoch 2!


                                                                                  

Epoch 3 / trn/loss=0.9881




Epoch 3 / val/loss=0.7488


Train loss: 0.7419307654102643:  68%|██████▊   | 324/473 [02:10<00:58,  2.54it/s] 