**About** : This notebook is used to infer models.

In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
cd ../src/

/home/tviel/work/kaggle_rsna_lumbar_spine/src


### Imports

In [8]:
import os
import gc
import re
import sys
import cv2
import glob
import json
import torch
import warnings
import numpy as np
import pandas as pd
import torch.nn.functional as F
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

In [48]:
from util.torch import load_model_weights
from util.plots import plot_mask, add_rect
from util.metrics import rsna_loss

from data.transforms import get_transfos
from data.dataset import CropDataset

from inference.seg import get_crops
from inference.processing import process
from inference.dataset import ImageInfDataset, FeatureInfDataset
from inference.lvl1 import predict, Config

from model_zoo.models import define_model
from model_zoo.models_lvl2 import define_model as define_model_2
from model_zoo.models_seg import define_model as define_model_seg
from model_zoo.models_seg import convert_3d

from params import CLASSES_SEG, MODES, LEVELS_, SEVERITIES

### Params

In [13]:
EVAL = True
DEBUG = True

ROOT_DATA_DIR = "../input/"
DEBUG_DATA_DIR = ""  # Todo
SAVE_FOLDER = "../output/tmp/"

# ROOT_DATA_DIR = "/kaggle/??"
# SAVE_FOLDER = "/tmp/"

In [14]:
DATA_PATH = ROOT_DATA_DIR + "test_images/"
folds_dict = {}

if DEBUG:
    df_meta = pd.read_csv(ROOT_DATA_DIR + 'train_series_descriptions.csv')
else:
    df_meta = pd.read_csv(ROOT_DATA_DIR + 'test_series_descriptions.csv')
df_meta = df_meta.set_index(["study_id", "series_id"])
df_meta['weighting'] = df_meta["series_description"].apply(lambda x: x.split()[1][:2])
df_meta['orient'] = df_meta["series_description"].apply(lambda x: x.split()[0])

if DEBUG:
    if EVAL:
        DATA_PATH = ROOT_DATA_DIR + "train_images/"
        FOLDS_FILE = '../input/folds_4.csv' 
        folds_dict = pd.read_csv(FOLDS_FILE).set_index('study_id').to_dict()['fold']
    else:
        DATA_PATH = ROOT_DATA_DIR + "debug_images/"


In [15]:
BATCH_SIZE = 32
BATCH_SIZE_2 = 512
USE_FP16 = True
NUM_WORKERS = 2


FOLD = 0 if DEBUG else "fullfit_0"
PLOT = DEBUG and not EVAL

In [16]:
EXP_FOLDERS = {
    "scs": ("../logs/2024-08-04/33/", [FOLD]),
    "nfn": ("../logs/2024-08-05/27/", [FOLD]),
    "ss": ("../logs/2024-08-06/17/", [FOLD]),
}

CROP_EXP_FOLDERS = {
    "scs_crop": ("../logs/2024-08-07/19/", [FOLD]),
    "nfn_crop": ("../logs/2024-08-07/32/", [FOLD]),
}

EXP_FOLDERS_2 = [
    "../logs/2024-08-08/1/",
]
FOLDS_2 = [0] if DEBUG else [0, 1, 2, 3]

EXP_FOLDER_3D = "../logs/2024-07-31/25/"
    
for f in EXP_FOLDERS_2:
    folders = Config(json.load(open(f + "config.json", 'r'))).exp_folders
    print("-> Level 2 model:", f)
    for k in folders:
        print(k, folders[k], EXP_FOLDERS.get(k, CROP_EXP_FOLDERS.get(k, ['?']))[0])
    print()

-> Level 2 model: ../logs/2024-08-08/1/
nfn ../logs/2024-08-05/27/ ../logs/2024-08-05/27/
scs ../logs/2024-08-04/33/ ../logs/2024-08-04/33/
ss ../logs/2024-08-06/17/ ../logs/2024-08-06/17/
ss_aux ../logs/2024-08-06/17/ ?
scs_crop ../logs/2024-08-07/19/ ../logs/2024-08-07/19/
nfn_crop ../logs/2024-08-07/32/ ../logs/2024-08-07/32/



In [66]:
# from util.logger import upload_to_kaggle

# folders = [EXP_FOLDERS[k][0] for k in EXP_FOLDERS]
# folders += [CROP_EXP_FOLDERS[k][0] for k in CROP_EXP_FOLDERS]
# folders += EXP_FOLDERS_2 + [EXP_FOLDER_3D]

# upload_to_kaggle(folders, "../output/dataset_1/", "RSNA 2024 Weights 1")

## Seg & Level 1

In [17]:
config_seg = Config(json.load(open(EXP_FOLDER_3D + "config.json", "r")))

model_seg = define_model_seg(
    config_seg.decoder_name,
    config_seg.name,
    num_classes=config_seg.num_classes,
    num_classes_aux=config_seg.num_classes_aux,
    increase_stride=config_seg.increase_stride,
    use_cls=config_seg.use_cls,
    n_channels=config_seg.n_channels,
    use_3d=config_seg.use_3d,
    pretrained=False,
)

model_seg = load_model_weights(model_seg, EXP_FOLDER_3D + f"{config_seg.name}_{FOLD}.pt")
model_seg = model_seg.cuda()
# model_seg = model_seg.eval()  # Hurts results ??


 -> Loading encoder weights from ../logs/2024-07-31/25/resnet34d_0.pt



In [18]:
models = {}
for mode in EXP_FOLDERS:
    exp_folder, folds = EXP_FOLDERS[mode]
    print(f'- Mode: {mode}')
    config = Config(json.load(open(exp_folder + "config.json", "r")))

    models_ = []
    for fold in folds:
        model = define_model(
            config.name,
            drop_rate=config.drop_rate,
            drop_path_rate=config.drop_path_rate,
            use_gem=config.use_gem,
            num_classes=config.num_classes,
            num_classes_aux=config.num_classes_aux,
            n_channels=config.n_channels,
            reduce_stride=config.reduce_stride,
            increase_stride=config.increase_stride if hasattr(config, "increase_stride") else False,
            pretrained=False,
        )
        model = model.cuda().eval()

        weights = exp_folder + f"{config.name}_{fold}.pt"
        model = load_model_weights(model, weights, verbose=config.local_rank == 0)
        models_.append(model)
        
    models[mode] = models_

- Mode: scs

 -> Loading encoder weights from ../logs/2024-08-04/33/coat_lite_medium_384_0.pt

- Mode: nfn

 -> Loading encoder weights from ../logs/2024-08-05/27/coatnet_rmlp_2_rw_384_0.pt

- Mode: ss

 -> Loading encoder weights from ../logs/2024-08-06/17/coat_lite_medium_0.pt



In [19]:
dfs = []
for study in tqdm(sorted(os.listdir(DATA_PATH))):
    if folds_dict.get(int(study), 0) != FOLD and EVAL:
        continue

    for series in sorted(os.listdir(DATA_PATH + study)):
        print("\n-> study", study, '- Series', series)

        imgs, orient, weighting = process(
            study,
            series,
            data_path=DATA_PATH,
            on_gpu=False,
        )

        try:
            weighting, orient = df_meta.loc[(int(study), int(series))].values[1:]
            # print(orient, weighting)
        except:
            pass

        dfs.append({
            "study_id": study,
            "series_id": series,
            "orient": orient,
            "weighting": weighting,
        })

        print(f'- Orient {orient} - Weighting {weighting}')
        
        # Segmentation
        if orient == "Sagittal":
            x = imgs[:, ::-1].copy().astype(np.float32)

            with torch.inference_mode():
                x = torch.from_numpy(x).cuda()
                x = F.interpolate(
                    x.unsqueeze(0).unsqueeze(0),
                    config_seg.img_size,
                    mode="trilinear",
                )
                x = (x - x.min()) / (x.max() - x.min())

                mask, _ = model_seg(x)
                mask = F.interpolate(
                    mask,
                    imgs.shape,
                    mode="trilinear",
                )[0].argmax(0)
            mask = mask.cpu().numpy()[:, ::-1].astype(np.uint8)

            if DEBUG and PLOT:
                img_ref = np.load(f"../input/npy/{study}_{series}.npy")
                mask_ref = np.load(f"../input/train_segs/{study}_{series}.npy")
                delta = (np.abs(mask - mask_ref) > 0).mean()
                print('Mask delta:', delta)
                delta = (np.abs(imgs - img_ref) > 0).mean()
                print('Img delta:', delta)

            if PLOT:
                f = len(imgs) // 2
                plt.figure(figsize=(4, 4))
                plot_mask(imgs[f], mask[f])
                plt.show()

            # Cropping
            disk_crops = {}        
            for disk in CLASSES_SEG[5:]:
                x0, x1, y0, y1, z0, z1 = get_crops(mask, disk=disk)
                disk_crops[disk] = (x0, x1, y0, y1, z0, z1)
                
                img_crop = imgs[x0: x1, y0:y1, z0:z1]
                # mask_crop = mask[x0: x1, y0:y1, z0:z1]
                
                d = re.sub('/', '_', disk.lower())
                np.save(SAVE_FOLDER + f'{study}_{series}_{d}.npy', img_crop.copy())


            if PLOT:
                plt.figure(figsize=(8, 8))
                plot_mask(imgs[f], mask[f])

                for d, disk in enumerate(disk_crops):
                    x0, x1, y0, y1, z0, z1 = disk_crops[disk]
                    add_rect(x0, x1, y0, y1, z0, z1, f, col="skyblue")
                    plt.text(10, (d + 1) * 20, f'{disk} disk center frame: {int((x1 + x0) / 2)}', color="skyblue")
                plt.show()

        # Cls
        mode = MODES[weighting + "_" + orient]
        exp_folder, models_list = EXP_FOLDERS[mode][0], models[mode]
        
        config = Config(json.load(open(exp_folder + "config.json", "r")))

        imgs = (imgs - imgs.min()) / (imgs.max() - imgs.min()) * 255
        imgs = imgs.astype(np.uint8)

        transforms = get_transfos(augment=False, resize=config.resize, crop=config.crop)
        dataset = ImageInfDataset(
            imgs,
            transforms=transforms,
            frames_chanel=config.frames_chanel if hasattr(config, "frames_chanel") else 0,
            n_frames=config.n_frames if hasattr(config, "n_frames") else 1,
            stride=config.stride if hasattr(config, "stride") else 1,
        )

        preds = []
        for model in models_list:
            pred, pred_aux = predict(
                model,
                dataset,
                config.loss_config,
                batch_size=BATCH_SIZE,
                use_fp16=USE_FP16,
                num_workers=NUM_WORKERS,
            )
            preds.append(pred)
        preds = np.mean(preds, 0)

        if PLOT:
            plt.figure(figsize=(8, 5))
            plt.plot(preds[:, :, 0])
            plt.show()

        np.save(SAVE_FOLDER + f"{study}_{series}_{mode}.npy", preds)

del model_seg, models, imgs, x, mask, dataset
torch.cuda.empty_cache()
gc.collect()

  0%|          | 0/1975 [00:00<?, ?it/s]


-> study 1004726367 - Series 1709080005
- Orient Sagittal - Weighting T2

-> study 1004726367 - Series 2526352865
- Orient Sagittal - Weighting T1

-> study 1004726367 - Series 992525108
- Orient Axial - Weighting T2

-> study 1047914296 - Series 2118341625
- Orient Sagittal - Weighting T1

-> study 1047914296 - Series 2327425347
- Orient Sagittal - Weighting T2

-> study 1047914296 - Series 3561285461
- Orient Axial - Weighting T2

-> study 1084486898 - Series 2349278505
- Orient Sagittal - Weighting T1

-> study 1084486898 - Series 2387875902
- Orient Sagittal - Weighting T2

-> study 1084486898 - Series 3897408956
- Orient Axial - Weighting T2

-> study 1085426528 - Series 1518511736
- Orient Sagittal - Weighting T1

-> study 1085426528 - Series 211738165
- Orient Sagittal - Weighting T2

-> study 1085426528 - Series 2303722192
- Orient Axial - Weighting T2

-> study 1093392148 - Series 2145460681
- Orient Sagittal - Weighting T1

-> study 1093392148 - Series 2224428583
- Orient Ax

5871

In [21]:
if DEBUG and not EVAL:
    for study in sorted(os.listdir(DATA_PATH)):
        for series in sorted(os.listdir(DATA_PATH + study)):
            print("-> study", study, '- Series', series)
            for mode in EXP_FOLDERS:
                exp_folder, folds = EXP_FOLDERS[mode]
                try:
                    preds_ref = np.load(exp_folder + f'preds/{study}_{series}.npy')
                except:
                    continue
            
                preds = np.load(SAVE_FOLDER + f'{study}_{series}_{mode}.npy')

                assert preds.shape == preds_ref.shape

                delta = ((preds - preds_ref) ** 2).max()
                print(f"{mode} delta :", delta)

## Crop models

In [22]:
df = pd.DataFrame(dfs)

df['mode'] = (df["weighting"] + "_" + df["orient"]).map(MODES) + "_crop"
df['target'] = 0
df['coords']= 0

df['level'] = [LEVELS_ for _ in range(len(df))]
df = df.explode('level').reset_index(drop=True)
df['img_path'] = SAVE_FOLDER + df['study_id'] + "_" + df['series_id'] + "_" + df['level'] + ".npy"

In [23]:
if DEBUG and not EVAL:
    df['img_path_ref'] = "../input/crops_fix/" + df['study_id'] + "_" + df['series_id'] + "_" + df['level'] + ".npy"

    for i in range(len(df)):
        path_ref = df['img_path_ref'][i]
        path = df['img_path'][i]

        if os.path.exists(path_ref):
            crop_ref = np.load(path_ref)
            crop = np.load(path)

            print(f"Crop {path.split('/')[-1][:-4]} delta:\t", ((crop_ref - crop) ** 2).max())

In [24]:
crop_fts = {}
for mode in CROP_EXP_FOLDERS:
    exp_folder, folds = CROP_EXP_FOLDERS[mode]
    print(f'- Model {mode} - {exp_folder}')
    
    config = Config(json.load(open(exp_folder + "config.json", "r")))

    df_mode = df[df['mode'] == mode].reset_index(drop=True)
    df_mode['side'] = "Center"

    if "scs" not in mode:
        df_mode['side'] = "Right"
        df_mode_left = df_mode.copy()
        df_mode_left['side'] = "Left"
        df_mode = pd.concat([df_mode, df_mode_left]).reset_index(drop=True)
        df_mode = df_mode.sort_values(['study_id', 'series_id', 'side', 'level'], ignore_index=True)

    transfos = get_transfos(augment=False, resize=config.resize, crop=config.crop)
    dataset = CropDataset(
        df_mode,
        targets="target",
        transforms=transfos,
        frames_chanel=config.frames_chanel,
        n_frames=config.n_frames,
        stride=config.stride,
        train=False,
        load_in_ram=False,
    )

    model = define_model(
        config.name,
        drop_rate=config.drop_rate,
        drop_path_rate=config.drop_path_rate,
        use_gem=config.use_gem,
        head_3d=config.head_3d,
        n_frames=config.n_frames,
        num_classes=config.num_classes,
        num_classes_aux=config.num_classes_aux,
        n_channels=config.n_channels,
        reduce_stride=config.reduce_stride,
        pretrained=False
    )
    model = model.cuda().eval()
    
    preds = []
    for fold in folds:
        weights = exp_folder + f"{config.name}_{fold}.pt"
        model = load_model_weights(model, weights, verbose=1)

        pred, _ = predict(
            model,
            dataset,
            config.loss_config,
            batch_size=BATCH_SIZE,
            use_fp16=USE_FP16,
            num_workers=NUM_WORKERS,
        )
        preds.append(pred)

    preds = np.mean(preds, 0)

    if PLOT:
        df_ref = pd.read_csv(exp_folder + "df_val_0.csv").head(len(preds))
        order_ref = df_ref.sort_values(['side', 'level']).index.values
        preds_ref = np.load(exp_folder + "pred_inf_0.npy")[:len(preds)][order_ref]

        # print(df_ref.iloc[order_ref]['img_path'].values)
        # print(df_mode['img_path'].values)

        plt.figure(figsize=(8, 4))
        plt.subplot(1, 2, 1)
        plt.plot(preds)
        plt.subplot(1, 2, 2)
        plt.plot(preds_ref)
        plt.show()

        delta = (np.abs(preds - preds_ref)).max()
        print(preds.shape, preds_ref.shape)
        print(f"{mode} delta:", delta)

    idx = df_mode[['study_id', 'series_id', 'level', 'side']].values.tolist()
    idx = ['_'.join(i) for i in idx]
    crop_fts[mode] = dict(zip(idx, preds))
    
# crop_fts = np.array(crop_fts) # n_models x 3*n_studies x n_classes
# np.save(SAVE_FOLDER + "crop_fts.npy", crop_fts)


 -> Loading encoder weights from ../logs/2024-08-07/19/coatnet_1_rw_224_0.pt


 -> Loading encoder weights from ../logs/2024-08-07/32/coatnet_1_rw_224_0.pt



## Level 2

In [25]:
df['series_description'] = (df['weighting'] + "_" + df['orient']).map(MODES)
df_2 = pd.DataFrame(df[["study_id", "series_id", "series_description"]].drop_duplicates(keep="first")).reset_index(drop=True)
df_2 = df_2.sort_values(["study_id", "series_id"], ignore_index=True)

In [32]:
all_preds = []

for exp_folder in EXP_FOLDERS_2:
    config_2 = Config(json.load(open(exp_folder + "config.json", "r")))

    dataset = FeatureInfDataset(
        df_2,
        config_2.exp_folders,
        crop_fts,
        resize=config_2.resize,
        save_folder=SAVE_FOLDER,
    )
    
    model = define_model_2(
        config_2.name,
        ft_dim=config_2.ft_dim,
        layer_dim=config_2.layer_dim,
        dense_dim=config_2.dense_dim,
        p=config_2.p,
        resize=config_2.resize,
        num_classes=config_2.num_classes,
        num_classes_aux=config_2.num_classes_aux,
    )
    model = model.eval().cuda()

    for fold in FOLDS_2:
        weights = exp_folder + f"{config_2.name}_{fold}.pt"
        model = load_model_weights(model, weights, verbose=config.local_rank == 0)
    
        preds, _ = predict(
            model,
            dataset,
            config_2.loss_config,
            batch_size=BATCH_SIZE_2,
            use_fp16=USE_FP16,
            num_workers=NUM_WORKERS,
        )

        if DEBUG and not EVAL:
            preds_ref = np.load(EXP_FOLDERS_2[0] + f"pred_val_{fold}.npy")[:1]
            delta = np.abs(preds - preds_ref).max()
            print(f"Model {exp_folder} delta:", delta)

        all_preds.append(preds)


 -> Loading encoder weights from ../logs/2024-08-08/1/baseline_0.pt



In [33]:
preds = np.mean(all_preds, 0).astype(np.float64)

In [64]:
sub = dataset.df[["study_id"]].astype(int)

rows = []
for i in range(len(sub)):
    for c, injury in enumerate(config_2.targets):
        rows.append(
            {
                "row_id": f'{sub["study_id"][i]}_{injury}',
                "normal_mild": preds[i, c, 0], 
                "moderate": preds[i, c, 1], 
                "severe": preds[i, c, 2],
            }
        )

sub = pd.DataFrame(rows)
sub.to_csv("submission.csv", index=False)
sub.head(25)

Unnamed: 0,row_id,normal_mild,moderate,severe
0,1004726367_spinal_canal_stenosis_l1_l2,0.998535,0.000877,0.000726
1,1004726367_spinal_canal_stenosis_l2_l3,0.994629,0.00523,0.000186
2,1004726367_spinal_canal_stenosis_l3_l4,0.997559,0.002417,7.5e-05
3,1004726367_spinal_canal_stenosis_l4_l5,0.995117,0.00473,0.000192
4,1004726367_spinal_canal_stenosis_l5_s1,0.991699,0.007202,0.001082
5,1004726367_left_neural_foraminal_narrowing_l1_l2,0.99707,0.002972,0.000166
6,1004726367_left_neural_foraminal_narrowing_l2_l3,0.989258,0.010803,0.00013
7,1004726367_left_neural_foraminal_narrowing_l3_l4,0.920898,0.077576,0.001429
8,1004726367_left_neural_foraminal_narrowing_l4_l5,0.870605,0.124451,0.005127
9,1004726367_left_neural_foraminal_narrowing_l5_s1,0.845703,0.132812,0.021622


In [63]:
if EVAL:
    y = pd.read_csv(ROOT_DATA_DIR + "train.csv")

    for c in y.columns[1:]:
        y[c] = y[c].map(dict(zip(SEVERITIES, [0, 1, 2]))).fillna(-1)
    y = y.astype(int)

    df_val = sub.copy().merge(y, how="left")

    avg_loss, losses = rsna_loss(df_val[config_2.targets].values, preds)

    for k, v in losses.items():
        print(f"- {k}_loss\t: {v:.3f}")

    print(f'\n -> CV Score : {avg_loss :.3f}')

- scs_loss	: 0.344
- nfn_loss	: 0.516
- ss_loss	: 0.639
- any_loss	: 0.375

 -> CV Score : 0.468


Done ! 