**About** : This notebook is used to validate models.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

In [None]:
from util.logger import upload_to_kaggle

In [None]:
upload_to_kaggle(
    ["../logs/2023-09-20/14/", "../logs/2023-09-20/36/", "../logs/2023-09-28/15/"],
    "../output/dataset/",
    "RSNA Abdomen Weights 1",
    update_folders=True
)

In [None]:
# !pip install -qU python-gdcm pydicom pylibjpeg dicomsdl

### Imports

In [None]:
import os
import sys
import cv2
import glob
import json
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm

In [None]:
from inference.extract_features import Config  # predict, 
from inference.lvl2 import predict as predict_2
from inference.lvl2 import PatientFeatureInfDataset, to_sub_format

from util.torch import load_model_weights

from inference.processing import process, restrict_imgs
from inference.lvl1 import predict, AbdominalInfDataset
from data.transforms import get_transfos

from model_zoo.models import define_model
from model_zoo.models_lvl2 import define_model as define_model_2

### Main

In [None]:
DATA_PATH = "../input/test_images/"
SAVE_FOLDER = "../output/tmp/"

In [None]:
BATCH_SIZE = 32
BATCH_SIZE_2 = 512
USE_FP16 = True
NUM_WORKERS = 2

RESTRICT = True

## Level 1

In [None]:
FOLD = 0

EXP_FOLDERS = [
    ("../logs/2023-09-20/14/", "seg", [FOLD]),
    ("../logs/2023-09-20/36/", "probas_2d", [FOLD]),
]

EXP_FOLDERS_2 = [
    "../logs/2023-09-28/15/"
]
# FOLDS_2 = [0, 1, 2, 3]
FOLDS_2 = [FOLD]

In [None]:
models = []

for exp_folder, mode, folds in EXP_FOLDERS:
    models_ = []
    config = Config(json.load(open(exp_folder + "config.json", "r")))

    model = define_model(
        config.name,
        drop_rate=config.drop_rate,
        drop_path_rate=config.drop_path_rate,
        use_gem=config.use_gem,
        head_3d=config.head_3d if hasattr(config, "head_3d") else "",
        n_frames=config.n_frames if hasattr(config, "n_frames") else "",
        replace_pad_conv=config.replace_pad_conv,
        num_classes=config.num_classes,
        num_classes_aux=config.num_classes_aux,
        n_channels=config.n_channels,
        reduce_stride=config.reduce_stride,
        increase_stride=config.increase_stride if hasattr(config, "increase_stride") else False,
        pretrained=False
    )
    model = model.cuda().eval()
    
    for fold in folds:
        weights = exp_folder + f"{config.name}_{fold}.pt"
        model = load_model_weights(model, weights, verbose=config.local_rank == 0)
        models_.append(model)
        
    models.append(models_)

In [None]:
%%time

dfs = []
for patient in tqdm(sorted(os.listdir(DATA_PATH))):
    if FOLD == 0:
        if patient != "10082":
            continue

    for study in sorted(os.listdir(DATA_PATH + patient)):
        print("-> Patient", patient, '- Study', study)

        imgs, n_imgs = process(patient, study, data_path=DATA_PATH, on_gpu=True, crop_size=384, restrict=RESTRICT)

#         img_paths = sorted(glob.glob(f'../input/imgs/{patient}_{study}*'))
#         if RESTRICT:
#             img_paths, n_imgs = restrict_imgs(img_paths)
#         imgs = {k.split('/')[-1]: cv2.imread(k, 0).astype(np.float32) / 255 for k in img_paths}  # 

        df = pd.DataFrame({"path": imgs.keys()})
        df['patient_id'] = df['path'].apply(lambda x: x.split('_')[0])
        df['patient'] = df['path'].apply(lambda x: x.split('_')[0])
        df['series'] = df['path'].apply(lambda x: x.split('_')[1])
        df['frame'] = df['path'].apply(lambda x: int(x.split('_')[2][:-4]))
        dfs.append(df)
        
        for models_list, (exp_folder, _, _) in zip(models, EXP_FOLDERS):
            exp_name = "_".join(exp_folder.split('/')[-2:-1])
            if "2023" not in exp_name:  # locally
                exp_name = "_".join(exp_folder.split('/')[-3:-1])
            
            config = Config(json.load(open(exp_folder + "config.json", "r")))
            
            transfos = get_transfos(
                augment=False,
                resize=(384, 384),
                crop=True
            )
            dataset = AbdominalInfDataset(
                df,
                transforms=transfos,
                frames_chanel=config.frames_chanel if hasattr(config, "frames_chanel") else 0,
                n_frames=config.n_frames if hasattr(config, "n_frames") else 1,
                stride=config.stride if hasattr(config, "stride") else 1,
                imgs=imgs
            )

            preds = []
            for model in models_list:
                pred, _ = predict(
                    model,
                    dataset,
                    config.loss_config,
                    batch_size=BATCH_SIZE,
                    use_fp16=USE_FP16,
                    num_workers=NUM_WORKERS,
                )
                preds.append(pred)

            if RESTRICT:
                pred_padded = np.zeros((n_imgs, pred.shape[-1]))
                pred_padded[-len(pred):] = np.mean(preds, 0)
            else:
                pred_padded = np.mean(preds, 0)

            np.save(SAVE_FOLDER + f"{study}_{exp_name}.npy", pred_padded)
            
#             ref = np.load(exp_folder + "pred_val_0.npy")
#             ref = ref[:len(pred)]
#             print(np.abs(ref - np.mean(preds, 0)).max())
            
#             break
#     break

df = pd.concat(dfs, ignore_index=True)
df = df.groupby(['patient', 'series']).max().reset_index()

## Level 2

In [None]:
all_preds = []

for exp_folder in EXP_FOLDERS_2:
    config_2 = Config(json.load(open(exp_folder + "config.json", "r")))

    dataset = PatientFeatureInfDataset(
        df['series'],
        config_2.exp_folders,
        max_len=config_2.max_len,
        restrict=config_2.restrict,
        resize=config_2.resize,
        save_folder=SAVE_FOLDER
    )
    
    model = define_model_2(
        config_2.name,
        ft_dim=config_2.ft_dim,
        layer_dim=config_2.layer_dim,
        n_layers=config_2.n_layers,
        dense_dim=config_2.dense_dim,
        p=config_2.p,
        use_msd=config_2.use_msd,
        num_classes=config_2.num_classes,
        num_classes_aux=config_2.num_classes_aux,
        n_fts=config_2.n_fts,
    )
    model = model.eval().cuda()

    for fold in FOLDS_2:
        weights = exp_folder + f"{config_2.name}_{fold}.pt"
        model = load_model_weights(model, weights, verbose=config.local_rank == 0)
    
        preds = predict_2(
            model,
            dataset,
            config_2.loss_config,
            batch_size=BATCH_SIZE_2,
            use_fp16=USE_FP16,
            num_workers=NUM_WORKERS,
        )
        all_preds.append(preds)

In [None]:
preds = np.mean(all_preds, 0).astype(np.float64)

for i in range(preds.shape[1]):
    df[f'pred_{i}'] = preds[:, i]

dfg = df.drop(['series', 'path', 'frame', 'patient_id'], axis=1).groupby('patient').mean().reset_index()
sub = to_sub_format(dfg)

### Checks

In [None]:
df_oof = pd.read_csv(exp_folder + 'df_oof.csv')
df_oof = df_oof[df_oof['patient_id'].isin(sub['patient_id'].values.astype(int))]

df_oof = df_oof[["patient_id", "fold"] + list(df_oof.columns[-11:])]
df_oof.columns = ["patient_id", "fold", "bowel_injury", "extravasation_injury"] + list(sub.columns[-9:])
df_oof["extravasation_healthy"] = 1 - df_oof["extravasation_injury"]
df_oof["bowel_healthy"] = 1 - df_oof["bowel_injury"]

df_oof = df_oof.sort_values('patient_id', ignore_index=True)


df_oof['diff'] = (sub[sub.columns[1:]] - df_oof[sub.columns[1:]]).abs().max(1)

df_oof = df_oof[list(sub.columns) + ['fold', 'diff']]
# df_oof[df_oof['fold'] == FOLD]
df_oof

In [None]:
df_oof = pd.read_csv(exp_folder + 'df_oof.csv')
df_oof = df_oof[df_oof['patient_id'].isin(sub['patient_id'].values.astype(int))]

df_oof = df_oof[["patient_id", "fold"] + list(df_oof.columns[-11:])]
df_oof.columns = ["patient_id", "fold", "bowel_injury", "extravasation_injury"] + list(sub.columns[-9:])
df_oof["extravasation_healthy"] = 1 - df_oof["extravasation_injury"]
df_oof["bowel_healthy"] = 1 - df_oof["bowel_injury"]

df_oof = df_oof.sort_values('patient_id', ignore_index=True)


df_oof['diff'] = (sub[sub.columns[1:]] - df_oof[sub.columns[1:]]).abs().max(1)

df_oof = df_oof[list(sub.columns) + ['fold', 'diff']]
# df_oof[df_oof['fold'] == FOLD]
df_oof

In [None]:
# sub.to_csv(SAVE_FOLDER + "submission.csv", index=False)
# sub

Done ! 