**About** : This notebook is used to validate models.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

In [None]:
from util.logger import upload_to_kaggle

In [None]:
upload_to_kaggle(
#     ["../logs/2023-09-06/4/", "../logs/2023-09-15/22/", "../logs/2023-09-15/36/"],
#     ["../logs/2023-09-18/81/", "../logs/2023-09-18/79/"],
    ["../logs/2023-09-19/11/", "../logs/2023-09-19/10/", "../logs/2023-09-18/90/"],
    "../output/dataset/",
    "RSNA Abdomen Weights 1",
    update_folders=True
)

In [None]:
# !pip install -qU python-gdcm pydicom pylibjpeg

### Imports

In [None]:
import os
import sys
import glob
import json
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm

In [None]:
from util.torch import load_model_weights

from data.dataset import Abdominal2DInfDataset
from data.transforms import get_transfos
from inference.extract_features import predict, Config
from model_zoo.models import define_model

from data.processing import process
from data.dataset import PatientFeatureInfDataset
from model_zoo.models_lvl2 import define_model as define_model_2
from inference.lvl2 import predict as predict_2
from inference.lvl2 import to_sub_format

### Main

In [None]:
DATA_PATH = "../input/train_images/"
SAVE_FOLDER = "../output/tmp/"

In [None]:
BATCH_SIZE = 64
BATCH_SIZE_2 = 1024
USE_FP16 = True
NUM_WORKERS = 2
FOLDS = [0]

## Level 1

In [None]:
exp_folders = [
    ("../logs/2023-09-06/4/", "seg"),
    ("../logs/2023-09-15/22/", "probas"),
]

In [None]:
models = []

for exp_folder, mode in exp_folders:
    models_ = []
    config = Config(json.load(open(exp_folder + "config.json", "r")))
    
    model = define_model(
        config.name,
        drop_rate=config.drop_rate,
        drop_path_rate=config.drop_path_rate,
        use_gem=config.use_gem,
        replace_pad_conv=config.replace_pad_conv,
        num_classes=config.num_classes,
        num_classes_aux=config.num_classes_aux,
        n_channels=config.n_channels,
        reduce_stride=config.reduce_stride,
        pretrained=False
    )
    model = model.cuda().eval()
    
    for fold in FOLDS:
        weights = exp_folder + f"{config.name}_{fold}.pt"
        model = load_model_weights(model, weights, verbose=config.local_rank == 0)
        models_.append(model)
        
    models.append(models_)

In [None]:
dfs = []
for patient in tqdm(sorted(os.listdir(DATA_PATH))):
    for study in sorted(os.listdir(DATA_PATH + patient)):
        
#         print("-> Patient", patient, '- Study', study)
        imgs = process(patient, study, data_path=DATA_PATH)

        df = pd.DataFrame({"path": imgs.keys()})
        df['patient'] = df['path'].apply(lambda x: x.split('_')[0])
        df['series'] = df['path'].apply(lambda x: x.split('_')[1])
        df['frame'] = df['path'].apply(lambda x: x.split('_')[2][:-4])
        dfs.append(df)
        
        for models_list, (exp_folder, _) in zip(models, exp_folders):
            exp_name = "_".join(exp_folder.split('/')[-3:-1])
            config = Config(json.load(open(exp_folder + "config.json", "r")))
            
            dataset = Abdominal2DInfDataset(
                df,
                transforms=get_transfos(augment=False, resize=config.resize),
                frames_chanel=config.frames_chanel if hasattr(config, "frames_chanel") else 0,
                imgs=imgs
            )

            preds = []
            for model in models_list:
                pred, _ = predict(
                    model,
                    dataset,
                    config.loss_config,
                    batch_size=BATCH_SIZE,
                    use_fp16=USE_FP16,
                    num_workers=NUM_WORKERS,
                )
                preds.append(pred)

            np.save(SAVE_FOLDER + f"{study}_{exp_name}.npy", np.mean(preds, 0))

In [None]:
# ref = np.load(exp_folder + "pred_val_0.npy")
# ref = ref[:len(pred)]
# np.abs(ref - pred).max()

In [None]:
# plt.plot(preds[0])
# plt.plot(preds[1])

In [None]:
df = pd.concat(dfs, ignore_index=True)
df = df.groupby(['patient', 'series']).max().reset_index()

## Level 2

In [None]:
EXP_FOLDERS_2 = [
    "../logs/2023-09-15/36/"
]

In [None]:
all_preds = []

for exp_folder in EXP_FOLDERS_2:
    config_2 = Config(json.load(open(exp_folder + "config.json", "r")))
    
    dataset = PatientFeatureInfDataset(
        df['series'], config_2.exp_folders, max_len=config_2.max_len, save_folder=SAVE_FOLDER
    )
    
    model = define_model_2(
        config_2.name,
        ft_dim=config_2.ft_dim,
        layer_dim=config_2.layer_dim,
        n_layers=config_2.n_layers,
        dense_dim=config_2.dense_dim,
        p=config_2.p,
        use_msd=config_2.use_msd,
        num_classes=config_2.num_classes,
        num_classes_aux=config_2.num_classes_aux,
        n_fts=config_2.n_fts,
    )
    model = model.eval().cuda()
    
    models = []
    for fold in FOLDS:
        weights = exp_folder + f"{config_2.name}_{fold}.pt"
        model = load_model_weights(model, weights, verbose=config.local_rank == 0)
    
        preds = predict_2(
            model,
            dataset,
            config_2.loss_config,
            batch_size=BATCH_SIZE_2,
            use_fp16=USE_FP16,
            num_workers=NUM_WORKERS,
        )
        all_preds.append(preds)

In [None]:
preds = np.mean(all_preds, 0)

for i in range(preds.shape[1]):
    df[f'pred_{i}'] = preds[:, i]
    
dfg = df.drop(['series', 'path', 'frame'], axis=1).groupby('patient').mean().reset_index()
sub = to_sub_format(dfg)

In [None]:
# (np.load(exp_folder + "pred_oof.npy")[0] - preds).max()

In [None]:
sub.to_csv(SAVE_FOLDER + "submission.csv", index=False)
sub

Done ! 