**About** : This notebook is used to validate models.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ../src/

/workspace/kaggle_rsna_abdominal/src


## Initialization

### Imports

In [4]:
import os
import sys
import glob
import json
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm

In [5]:
# !pip install -qU python-gdcm pydicom pylibjpeg

### Utils

In [6]:
import pydicom

def standardize_pixel_array(dcm: pydicom.dataset.FileDataset) -> np.ndarray:
    """
    Source : https://www.kaggle.com/competitions/rsna-2023-abdominal-trauma-detection/discussion/427217
    """
    # Correct DICOM pixel_array if PixelRepresentation == 1.
    pixel_array = dcm.pixel_array
    if dcm.PixelRepresentation == 1:
        bit_shift = dcm.BitsAllocated - dcm.BitsStored
        dtype = pixel_array.dtype 
        pixel_array = (pixel_array << bit_shift).astype(dtype) >>  bit_shift
#         pixel_array = pydicom.pixel_data_handlers.util.apply_modality_lut(new_array, dcm)

    intercept = float(dcm.RescaleIntercept)
    slope = float(dcm.RescaleSlope)
    center = int(dcm.WindowCenter)
    width = int(dcm.WindowWidth)
    low = center - width / 2
    high = center + width / 2    
    
    pixel_array = (pixel_array * slope) + intercept
    pixel_array = np.clip(pixel_array, low, high)

    return pixel_array

In [7]:
import cv2
import pydicom


def process(patient, study, size=None, save_folder="", data_path=""):
    all_imgs = {}
    imgs = {}
    for f in sorted(glob.glob(data_path + f"{patient}/{study}/*.dcm")):
        dicom = pydicom.dcmread(f)

        pos_z = dicom[(0x20, 0x32)].value[-1]

        img = standardize_pixel_array(dicom)
        img = (img - img.min()) / (img.max() - img.min() + 1e-6)

        if dicom.PhotometricInterpretation == "MONOCHROME1":
            img = 1 - img

        imgs[pos_z] = img

    for i, k in enumerate(sorted(imgs.keys())):
        img = imgs[k]

        if size is not None:
            img = cv2.resize(img, (size, size))

        all_imgs[save_folder + f"{patient}_{study}_{i}.png"] =  (img * 255).astype(np.uint8)
    return all_imgs

### Main

In [8]:
DATA_PATH = "../input/train_images/"
SAVE_FOLDER = "../output/tmp/"

In [9]:
BATCH_SIZE = 64
BATCH_SIZE_2 = 1024
USE_FP16 = True
NUM_WORKERS = 2
FOLDS = [0]

## Level 1

In [13]:
from data.dataset import Abdominal2DInfDataset
from data.transforms import get_transfos
from inference.extract_features import predict, Config

from util.torch import load_model_weights
from model_zoo.models import define_model

In [14]:
exp_folders = [
    ("../logs/2023-09-06/4/", "seg"),
    ("../logs/2023-09-15/22/", "probas"),
]

In [21]:
models = []

for exp_folder, mode in exp_folders:
    models_ = []
    config = Config(json.load(open(exp_folder + "config.json", "r")))
    
    model = define_model(
        config.name,
        drop_rate=config.drop_rate,
        drop_path_rate=config.drop_path_rate,
        use_gem=config.use_gem,
        replace_pad_conv=config.replace_pad_conv,
        num_classes=config.num_classes,
        num_classes_aux=config.num_classes_aux,
        n_channels=config.n_channels,
        reduce_stride=config.reduce_stride,
        pretrained=False
    )
    model = model.cuda().eval()
    
    for fold in FOLDS:
        weights = exp_folder + f"{config.name}_{fold}.pt"
        model = load_model_weights(model, weights, verbose=config.local_rank == 0)
        models_.append(model)
        
    models.append(models_)


 -> Loading encoder weights from ../logs/2023-09-06/4/tf_efficientnetv2_s_0.pt


 -> Loading encoder weights from ../logs/2023-09-15/22/tf_efficientnetv2_s_0.pt



In [22]:
dfs = []
for patient in tqdm(sorted(os.listdir(DATA_PATH))):
    for study in sorted(os.listdir(DATA_PATH + patient)):
        
#         print("-> Patient", patient, '- Study', study)
        imgs = process(patient, study, data_path=DATA_PATH)

        df = pd.DataFrame({"path": imgs.keys()})
        df['patient'] = df['path'].apply(lambda x: x.split('_')[0])
        df['series'] = df['path'].apply(lambda x: x.split('_')[1])
        df['frame'] = df['path'].apply(lambda x: x.split('_')[2][:-4])
        dfs.append(df)
        
        for models_list, (exp_folder, _) in zip(models, exp_folders):
            exp_name = "_".join(exp_folder.split('/')[-3:-1])
            config = Config(json.load(open(exp_folder + "config.json", "r")))
            
            dataset = Abdominal2DInfDataset(
                df,
                transforms=get_transfos(augment=False, resize=config.resize),
                frames_chanel=config.frames_chanel if hasattr(config, "frames_chanel") else 0,
                imgs=imgs
            )

            preds = []
            for model in models_list:
                pred, _ = predict(
                    model,
                    dataset,
                    config.loss_config,
                    batch_size=BATCH_SIZE,
                    use_fp16=USE_FP16,
                    num_workers=NUM_WORKERS,
                )
                preds.append(pred)

            np.save(SAVE_FOLDER + f"{study}_{exp_name}.npy", np.mean(preds, 0))

100%|██████████| 1/1 [00:07<00:00,  7.71s/it]


In [23]:
# ref = np.load(exp_folder + "pred_val_0.npy")
# ref = ref[:len(pred)]
# np.abs(ref - pred).max()

In [24]:
# plt.plot(preds[0])
# plt.plot(preds[1])

In [25]:
df = pd.concat(dfs, ignore_index=True)
df = df.groupby(['patient', 'series']).max().reset_index()

## Level 2

In [26]:
from data.dataset import PatientFeatureInfDataset
from model_zoo.models_lvl2 import define_model as define_model_2
from inference.lvl2 import predict as predict_2
from inference.lvl2 import to_sub_format

In [27]:
EXP_FOLDERS_2 = [
    "../logs/2023-09-15/36/"
]

In [29]:
all_preds = []

for exp_folder in EXP_FOLDERS_2:
    config_2 = Config(json.load(open(exp_folder + "config.json", "r")))
    
    dataset = PatientFeatureInfDataset(
        df['series'], config_2.exp_folders, max_len=config_2.max_len, save_folder=SAVE_FOLDER
    )
    
    model = define_model_2(
        config_2.name,
        ft_dim=config_2.ft_dim,
        layer_dim=config_2.layer_dim,
        n_layers=config_2.n_layers,
        dense_dim=config_2.dense_dim,
        p=config_2.p,
        use_msd=config_2.use_msd,
        num_classes=config_2.num_classes,
        num_classes_aux=config_2.num_classes_aux,
        n_fts=config_2.n_fts,
    )
    model = model.eval().cuda()
    
    models = []
    for fold in FOLDS:
        weights = exp_folder + f"{config_2.name}_{fold}.pt"
        model = load_model_weights(model, weights, verbose=config.local_rank == 0)
    
        preds = predict_2(
            model,
            dataset,
            config_2.loss_config,
            batch_size=BATCH_SIZE_2,
            use_fp16=USE_FP16,
            num_workers=NUM_WORKERS,
        )
        all_preds.append(preds)


 -> Loading encoder weights from ../logs/2023-09-15/36/rnn_0.pt



In [38]:
preds = np.mean(all_preds, 0)

for i in range(preds.shape[1]):
    df[f'pred_{i}'] = preds[:, i]
    
dfg = df.drop(['series', 'path', 'frame'], axis=1).groupby('patient').mean().reset_index()
sub = to_sub_format(dfg)

In [39]:
# (np.load(exp_folder + "pred_oof.npy")[0] - preds).max()

In [40]:
sub.to_csv(SAVE_FOLDER + "submission.csv", index=False)
sub

Unnamed: 0,patient_id,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high
0,10082,0.970703,0.02948,0.510742,0.489014,0.978516,0.012634,0.009033,0.954102,0.042084,0.003614,0.705566,0.168213,0.126221


Done ! 