**About** : This notebook is used to validate models.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import torch

print(torch.__version__)
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
device = torch.cuda.get_device_name(0)
print(device)

In [None]:
import os
import re
import cv2
import sys
import glob
import json
import timm
import torch
import operator
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.metrics import *
from collections import Counter
from numerize.numerize import numerize

pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)
pd.set_option('display.max_columns', 30)

In [None]:
from params import *
from util.plots import *
from util.metrics import *

from data.dataset import AbdominalDataset
from data.transforms import get_transfos
from data.preparation import prepare_data
from util.logger import upload_to_kaggle

from inference.extract_features import kfold_inference, Config

### Data

In [None]:
df_patient, df_img = prepare_data(DATA_PATH, with_seg=True)
df_seg = pd.read_csv('../input/df_seg.csv')

In [None]:
if "fold" not in df_patient.columns:
    folds = pd.read_csv(DATA_PATH + "folds_4.csv")
    df_img = df_img.merge(folds)
    df_patient = df_patient.merge(folds)

### Extract features

In [None]:
USE_FP16 = True
SAVE = True

EXP_FOLDER = "../logs/2023-09-19/7//"

In [None]:
# preds = kfold_inference(df_patient, df_img, EXP_FOLDER, use_fp16=USE_FP16, save=SAVE)

In [None]:
FOLDS = [0, 1, 2, 3]

In [None]:
preds = [np.load(EXP_FOLDER + f"pred_val_{fold}.npy") for fold in FOLDS]

### Eval

In [None]:
df_val = df_img.copy()

In [None]:
df_val['pred'] = -1

for i, tgt in enumerate(PATIENT_TARGETS):
    if tgt in IMAGE_TARGETS:
        df_val[f"pred_{tgt}"] = -1
    else:
        if preds[0].shape[-1] == 11:
            for j, n in enumerate(['healthy', 'low', "high"]):
                df_val[f"pred_{tgt}_{n}"] = -1

    for fold in FOLDS:
        if tgt in IMAGE_TARGETS:
            df_val.loc[df_val[df_val['fold'] == fold].index, f"pred_{tgt}"] = preds[fold][:, i]
                
        else:
            if preds[fold].shape[-1] == 11:
                for j, n in enumerate(['healthy', 'low', "high"]):
#                     print(2 + (i - 2) *3 + j)
                    df_val.loc[df_val[df_val['fold'] == fold].index, f"pred_{tgt}_{n}"] = preds[fold][:, 2 + (i - 2) * 3 + j]

    
# df_val = df_val[df_val['fold'] == 0]

In [None]:
for tgt in IMAGE_TARGETS:
    auc = roc_auc_score(df_val[tgt] > 0, df_val[f"pred_{tgt}"])
        
    print(f'- {tgt} auc : {auc:.3f}')

In [None]:
USE_SEG = True

if USE_SEG:
    df_val["pred_bowel_injury"] *= df_val["pred_bowel"]

    df_val["pred_kidney_low"] *= df_val["pred_kidney"]
    df_val["pred_kidney_high"] *= df_val["pred_kidney"]
    df_val["pred_kidney_healthy"] = 1 - df_val["pred_kidney_low"] - df_val["pred_kidney_high"]
    
    df_val["pred_liver_low"] *= df_val["pred_liver"]
    df_val["pred_liver_high"] *= df_val["pred_liver"]
    df_val["pred_liver_healthy"] = 1 - df_val["pred_liver_low"] - df_val["pred_liver_high"]
    
    df_val["pred_spleen_low"] *= df_val["pred_spleen"]
    df_val["pred_spleen_high"] *= df_val["pred_spleen"]
    df_val["pred_spleen_healthy"] = 1 - df_val["pred_spleen_low"] - df_val["pred_spleen_high"]

In [None]:
for tgt in IMAGE_TARGETS:
    auc = roc_auc_score(df_val[tgt] > 0, df_val[f"pred_{tgt}"])
        
    print(f'- {tgt} auc : {auc:.3f}')

In [None]:
# series = np.random.choice(df_val[df_val['bowel_injury'] == 1]['series'].unique())
series = np.random.choice(df_val[df_val['any_injury'] == 1]['series'].unique())
# series = 41840
print(f'Series {series}')
df_series = df_val[df_val['series'] == series].reset_index(drop=True)

plt.figure(figsize=(15, 10))

plt.subplot(2, 1, 1)

colors = ['#8ed3c7', '#fb8072', '#b3de68', '#bc80bd', '#feed6f']
for i, col in enumerate(["pred_liver_healthy", "pred_spleen_healthy", 'pred_bowel_injury', "pred_kidney_healthy", 'pred_extravasation_injury']):
    plt.plot(df_series[col] if "injury" in col else 1 - df_series[col], label=col[:-7], color=colors[i])
    
for col in ['bowel_injury', 'extravasation_injury']:
    plt.scatter(np.arange(len(df_series))[df_series[col] == 1], [1.1] * df_series[col].sum(), label=col, marker=".")

tgts = df_patient[df_patient['patient_id'] == df_series['patient_id'][0]][PATIENT_TARGETS].values[0].tolist()
plt.title(f'{dict(zip(PATIENT_TARGETS, tgts))}')
plt.legend()

plt.subplot(2, 1, 2)

colors = ['#8ed3c7', '#fb8072', '#b3de68', '#bc80bd', '#ffed6f']
for i, col in enumerate(["pred_liver", "pred_spleen", "pred_bowel", "pred_kidney"]):
    plt.plot(df_series[col], label=col, color=colors[i])
plt.title('Seg')

plt.legend()
plt.show()

In [None]:
"###" ""

In [None]:
AGG = "mean"

if AGG == "max":
    df_val_patient = df_val[['patient_id'] + list(df_val.columns)[-preds[0].shape[-1]:]].groupby('patient_id').max()
    df_val_patient = df_val_patient.merge(df_patient, on="patient_id", how="left")
    
    for tgt in ['kidney', 'liver', 'spleen']:
        df_val_patient[f"pred_{tgt}_healthy"] = 1 - df_val_patient[f"pred_{tgt}_low"] - df_val_patient[f"pred_{tgt}_high"]
else:
    df_val_patient = df_val[['patient_id'] + list(df_val.columns)[-preds[0].shape[-1]:]].groupby('patient_id').mean()
    df_val_patient = df_val_patient.merge(df_patient, on="patient_id", how="left")

In [None]:
for tgt in PATIENT_TARGETS:
    if "injury" in tgt:
        auc = roc_auc_score(df_val_patient[tgt] > 0, df_val_patient[f"pred_{tgt}"])
    else:
        try:
            auc = roc_auc_score(df_val_patient[tgt] <= 0, df_val_patient[f"pred_{tgt}_healthy"])
        except:
            pass
        
    print(f'- {tgt} auc : {auc:.3f}')

In [None]:
pred_oof = df_val_patient[[p for p in df_val_patient.columns if "pred" in p]].values
df_oof = df_val_patient.copy()

pred_oof_ = pred_oof.copy()
losses, avg_loss = rsna_loss(pred_oof, df_oof)
best_score = avg_loss

for _ in range(2):
    factors = []
    for i in range(pred_oof.shape[1]):
        scores = {}
        for factor in np.round(np.arange(0.5, 1.5, 0.1), 2):
            for shift in np.round(np.arange(-0.1, 0.11, 0.1), 2):
#             for shift in [-0.1, 0, 0.1]:
                pred_oof_r = pred_oof_.copy()
                pred_oof_r[:, i] = pred_oof_r[:, i] * factor + shift
                pred_oof_r[:, i] = np.clip(pred_oof_r[:, i], 0.00001, 0.99999)

                losses, avg_loss = rsna_loss(pred_oof_r, df_oof)
                scores[(factor, shift)] = avg_loss

    #     print(scores)
        best_coefs, best_loss = min(scores.items(), key=operator.itemgetter(1))
        pred_oof_[:, i] = np.clip(pred_oof_[:, i] * best_coefs[0] + best_coefs[1], 0.00001, 0.99999)
        best_score = best_loss
        print(f'{i} - {best_coefs}  -  {best_loss :.3f}')
        factors.append(best_coefs)
        
    print("\n", factors, "\n")


In [None]:
# print(pred_oof_.min(0))
# print(pred_oof_.max(0))

In [None]:
# try:
losses, avg_loss = rsna_loss(df_val_patient[[p for p in df_val_patient.columns if "pred" in p]].values, df_val_patient)
# except:
#     losses, avg_loss = rsna_loss(df_val_patient[["pred_bowel_injury", "pred_extravasation_injury"]].values, df_val_patient)

for k, v in losses.items():
    print(f"- {k.split('_')[0][:8]} loss\t: {v:.3f}")
    
print(f'\n -> CV Score : {avg_loss :.3f}')

In [None]:
dummy = np.array(
    [
        [0.04] * len(df_val_patient), [0.3] * len(df_val_patient),
        [0.6] * len(df_val_patient), [0.05] * len(df_val_patient), [0.05] * len(df_val_patient),
        [0.4] * len(df_val_patient), [0.07] * len(df_val_patient), [0.03] * len(df_val_patient),
        [0.3] * len(df_val_patient), [0.04] * len(df_val_patient), [0.07] * len(df_val_patient),
    ]
).T
losses, avg_loss = rsna_loss(dummy, df_val_patient)

for k, v in losses.items():
    print(f"- {k.split('_')[0][:8]} loss\t: {v:.3f}")
    
print(f'\n -> CV Score : {avg_loss :.3f}')

## Seg

In [None]:
df_patient, df_img = prepare_data(DATA_PATH, with_seg=False)
df_seg = pd.read_csv('../input/df_seg.csv')

In [None]:
if "fold" not in df_patient.columns:
    folds = pd.read_csv(DATA_PATH + "folds_4.csv")
    df_img = df_img.merge(folds)
    df_patient = df_patient.merge(folds)

In [None]:
EXP_FOLDER = "../logs/2023-09-06/4/"  # seg
FOLDS = [0, 1, 2, 3]
preds = [np.load(EXP_FOLDER + f"pred_val_{fold}.npy") for fold in FOLDS]

In [None]:
df_val = df_img.copy()

In [None]:
for i, tgt in enumerate(SEG_TARGETS):
    df_val[f"pred_{tgt.split('_')[-1]}"] = -1
        
    for fold in FOLDS:
        df_val.loc[df_val[df_val['fold'] == fold].index, f"pred_{tgt.split('_')[-1]}"] = preds[fold][:, i]

# df_val = df_val[df_val['fold'] == 0]

In [None]:
# series = 21057
series = 51678
# series = np.random.choice(df_seg['series'])
df_series = df_val[df_val['series'] == series].reset_index(drop=True)

In [None]:
colors = ['#8ed3c7', '#fb8072', '#b3de68', '#bc80bd', '#ffed6f']

plt.figure(figsize=(15, 5))

for i, col in enumerate(df_val.columns[-5:]):
    plt.plot(df_series[col], label=col, color=colors[i])
    
for col in ['bowel_injury', 'extravasation_injury']:
    plt.scatter(np.arange(len(df_series))[df_series[col] == 1], [1.1] * df_series[col].sum(), label=col, marker=".")

plt.legend()
plt.show()

In [None]:
df_patient[df_patient['patient_id'] == df_series['patient_id'][0]][["bowel_injury", "extravasation_injury", "kidney", "liver", "spleen"]]

In [None]:
# n = 5
# plt.figure(figsize=(40, 8))

# for i, idx in enumerate(np.linspace(0, len(df_series), n + 1, dtype=int)[:-1]):
# # for i, idx in enumerate([12, 14, 16, 18, 20]):
#     img = cv2.imread(df_series['path'].values[idx], 0)
    
#     mask = None
#     mask_path = re.sub("imgs", "segs", df_series['path'].values[idx])
#     if os.path.exists(mask_path):
#         mask = cv2.imread(mask_path, 0)

#     pred = df_series.values[idx][-5:].astype(float)

#     plt.subplot(1, n, i + 1)
    
#     if mask is not None:
#         plot_mask(img, mask)
#     else:
#         plt.imshow(img, cmap="gray")
#         plt.axis(False)
        
#     for i, p in enumerate(pred):
#         plt.text(10, 10 + i * 20, f"{SEG_TARGETS[i].split('_')[-1]} - {p :.2f}", c=colors[i], verticalalignment='top')
        
#     extrav = int(df_series['extravasation_injury'][idx])
#     bowel = int(df_series['bowel_injury'][idx])
#     plt.title(f"Frame {idx} - extravasation: {extrav} - bowel injury: {bowel}")
    
# plt.show()

In [None]:
n = 10
plt.figure(figsize=(40, 14))
for i, idx in enumerate(np.linspace(0, len(df_series), n + 1, dtype=int)[:-1]):
    img = cv2.imread(df_series['path'].values[idx], 0)
    
    mask = None
    mask_path = re.sub("imgs", "segs", df_series['path'].values[idx])
    if os.path.exists(mask_path):
        mask = cv2.imread(mask_path, 0)

    pred = df_series.values[idx][-5:].astype(float)
    
    plt.subplot(2, n // 2, i + 1)
    
    if mask is not None:
        plot_mask(img, mask)
    else:
        plt.imshow(img, cmap="gray")
        plt.axis(False)
        
    for i, p in enumerate(pred):
        plt.text(10, 10 + i * 20, f"{SEG_TARGETS[i].split('_')[-1]} - {p :.2f}", c=colors[i], verticalalignment='top')
        
    extrav = int(df_series['extravasation_injury'][idx])
    bowel = int(df_series['bowel_injury'][idx])
    plt.title(f"Frame {idx} - extravasation: {extrav} - bowel injury: {bowel}")
    
plt.show()

In [None]:
# df_val['pred_kidney'] = df_val[['pred_left-kidney', 'pred_right-kidney']].max(1)
# df_val_ = df_val.drop(['pred_left-kidney', 'pred_right-kidney'], axis=1)

# df_val_.to_csv('../input/df_images_train_with_seg.csv', index=False)
# df_val_.head()

Done ! 