**About** : This notebook is used to validate models.

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import torch

print(torch.__version__)
os.environ['CUDA_VISIBLE_DEVICES'] = "1"
device = torch.cuda.get_device_name(0)
print(device)

In [None]:
import os
import sys
import glob
import json
import timm
import torch
import operator
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.metrics import *
from numerize.numerize import numerize

pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [None]:
from params import *
from util.plots import *
from util.metrics import *

from data.dataset import ContrailDataset
from data.transforms import get_transfos
from data.preparation import prepare_data
from util.plots import plot_sample
from util.logger import upload_to_kaggle

from inference_main import kfold_inference, Config

### Data

In [None]:
df = prepare_data(DATA_PATH, processed_folder="false_color/")

In [None]:
if "fold" not in df.columns:
    folds = pd.read_csv(DATA_PATH + "folds_4.csv")
    df = df.merge(folds)

In [None]:
df_val = df[df['fold'] == 0].reset_index(drop=True)

In [None]:
DEBUG = True

folders = [
    3687499407028137410,
    6558861185867890815,
    7355354609194882312,
    7547747455642200110,
    5456834089979970017,
    1501528175573804219,
    5728069425727341010,
    8604370548989406919,
    4746167155668084215,
    6094972442472366517,
]

if DEBUG:
    df_val = df_val[df_val["record_id"].isin(folders)].reset_index(drop=True)

In [None]:
dataset = ContrailDataset(df_val, get_transfos())

gts = []
for i in tqdm(range(len(dataset))):
    image, mask, _ = dataset[i]
    gts.append(mask[:1].numpy())
    
#     if not (i % 1000):
#         plot_sample(image.numpy().transpose(1, 2, 0), mask[:1].numpy().transpose(1, 2, 0))
gts = np.array(gts)

### Inference

In [None]:
USE_FP16 = True
SAVE = not DEBUG

In [None]:
EXP_FOLDER = "../logs/2023-07-03/35/"
# EXP_FOLDER = "../logs/2023-07-05/35/"

In [None]:
preds = kfold_inference(df_val, EXP_FOLDER, use_fp16=USE_FP16, save=SAVE)

In [None]:
preds_gpu = torch.from_numpy(preds).cuda()
truths_gpu = torch.from_numpy(gts).cuda()

dices = {}
for th in tqdm(np.round(np.arange(0.2, 0.6, 0.01), 2)):
    dices[th] = dice_score_torch((preds_gpu > th), truths_gpu)
    
th, dice = max(dices.items(), key=operator.itemgetter(1))
print(f'-> CV dice :  {dice:.4f}   (th={th:.2f})')

### Evaluate

In [None]:
EXP_FOLDERS = [
    "../logs/2023-07-03/35/",
    "../logs/2023-07-05/35/",
]

In [None]:
preds = np.mean([
    np.load(f + "pred_val_0.npy") for f in EXP_FOLDERS
], 0)

In [None]:
preds_gpu = torch.from_numpy(preds).cuda()
truths_gpu = torch.from_numpy(gts).cuda()

dices = {}
for th in tqdm(np.round(np.arange(0.2, 0.6, 0.01), 2)):
    dices[th] = dice_score_torch((preds_gpu > th), truths_gpu)
    
th, dice = max(dices.items(), key=operator.itemgetter(1))
print(f'-> CV dice :  {dice:.4f}   (th={th:.2f})')

In [None]:
plt.plot(np.array(list(dices.keys())).astype(float), dices.values())
plt.axvline(th, c="salmon")
plt.xlim(th - 0.1, th + 0.1)
plt.ylim(dice - 0.01, dice + 0.002)
plt.title(f'dice={dice:.3f}, th={th:.2f}')
plt.show()

In [None]:
upload_to_kaggle(EXP_FOLDERS, "../output/dataset_v1/", "Contrail Weights v1", update_folders=True)

### Plot

Done ! 