**About** : This notebook is used to validate models.

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import torch

print(torch.__version__)
os.environ['CUDA_VISIBLE_DEVICES'] = "1"
device = torch.cuda.get_device_name(0)
print(device)

In [None]:
import os
import sys
import glob
import json
import timm
import torch
import operator
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.metrics import *
from collections import Counter
from numerize.numerize import numerize

pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [None]:
from params import *
from util.plots import *
from util.metrics import *

from data.dataset import ContrailDataset
from data.transforms import get_transfos
from data.preparation import prepare_data
from util.plots import plot_sample
from util.logger import upload_to_kaggle

from inference_main import kfold_inference, Config

### Data

In [None]:
df = prepare_data(DATA_PATH, processed_folder="false_color/", use_raw=True)

In [None]:
if "fold" not in df.columns:
    folds = pd.read_csv(DATA_PATH + "folds_4.csv")
    df = df.merge(folds)

In [None]:
df_val = df[df['fold'] == 0].reset_index(drop=True)

In [None]:
# df_val[['record_id']].to_csv('../output/df_val.csv', index=False)

In [None]:
DEBUG = False

folders = [
    3687499407028137410,
    6558861185867890815,
    7355354609194882312,
    7547747455642200110,
    5456834089979970017,
    1501528175573804219,
    5728069425727341010,
    8604370548989406919,
    4746167155668084215,
    6094972442472366517,
]

if DEBUG:
    df_val = df_val[df_val["record_id"].isin(folders)].reset_index(drop=True)

In [None]:
dataset = ContrailDataset(df_val, get_transfos())

gts = []
for i in tqdm(range(len(dataset))):
    image, mask, _ = dataset[i]
    gts.append(mask[:1].numpy())
    
#     if not (i % 1000):
#         plot_sample(image.numpy().transpose(1, 2, 0), mask[:1].numpy().transpose(1, 2, 0))
gts = np.array(gts)

### Inference

In [None]:
USE_FP16 = True
SAVE = not DEBUG

In [None]:
# cvnxt-nano

# 2D
EXP_FOLDER = "../logs/2023-07-17/10/"
EXP_FOLDER = "../logs/2023-07-18/4/" 
EXP_FOLDER = "../logs/2023-07-18/7/" 
EXP_FOLDER = "../logs/2023-07-18/12/" 
EXP_FOLDER = "../logs/2023-07-19/1/" 

# CNN frames 1234
EXP_FOLDER = "../logs/2023-07-18/0/" 
EXP_FOLDER = "../logs/2023-07-18/6/" 
EXP_FOLDER = "../logs/2023-07-18/10/" 
EXP_FOLDER = "../logs/2023-07-19/0/" 
EXP_FOLDER = "../logs/2023-07-19/2/" 

# LSTM frames 1234
EXP_FOLDER = "../logs/2023-07-19/10/"
EXP_FOLDER = "../logs/2023-07-19/12/"
EXP_FOLDER = "../logs/2023-07-19/15/"
EXP_FOLDER = "../logs/2023-07-19/17/"

# cnn nocutmix
EXP_FOLDER = "../logs/2023-07-31/21/"
EXP_FOLDER = "../logs/2023-07-31/22/"
EXP_FOLDER = "../logs/2023-08-01/6/"
EXP_FOLDER = "../logs/2023-08-01/7/"
EXP_FOLDER = "../logs/2023-08-01/14/"
EXP_FOLDER = "../logs/2023-08-01/16/"

In [None]:
# v2s

# 2D
EXP_FOLDER = "../logs/2023-07-19/22/"
EXP_FOLDER = "../logs/2023-07-19/21/"
EXP_FOLDER = "../logs/2023-07-19/20/"
EXP_FOLDER = "../logs/2023-07-19/18/"
EXP_FOLDER = "../logs/2023-07-20/1/"

# LSTM frames 2345
EXP_FOLDER = "../logs/2023-07-19/25/"
EXP_FOLDER = "../logs/2023-07-19/24/"
EXP_FOLDER = "../logs/2023-07-19/23/"
EXP_FOLDER = "../logs/2023-07-19/19/"
EXP_FOLDER = "../logs/2023-07-20/2/"

# LSTM frames 01234567 stride 1
EXP_FOLDER = "../logs/2023-07-22/8/"
EXP_FOLDER = "../logs/2023-07-22/10/"
EXP_FOLDER = "../logs/2023-07-22/16/"
EXP_FOLDER = "../logs/2023-07-23/3/"
EXP_FOLDER = "../logs/2023-07-23/7/" 

# 2D v2s ext stride 2 100eps
EXP_FOLDER = "../logs/2023-07-29/1/"
EXP_FOLDER = "../logs/2023-07-28/9/" 
EXP_FOLDER = "../logs/2023-07-29/13/" 
EXP_FOLDER = "../logs/2023-07-30/0/" 
EXP_FOLDER = "../logs/2023-07-30/6/"

# v2-s Finetune ext
EXP_FOLDER = "../logs/2023-07-30/16/"
EXP_FOLDER = "../logs/2023-07-30/19/"
EXP_FOLDER = "../logs/2023-07-30/20/"
EXP_FOLDER = "../logs/2023-08-02/0/"
EXP_FOLDER = "../logs/2023-08-02/1/"
EXP_FOLDER = "../logs/2023-08-04/23/"  # + 5 fullfit

# # nocutmix
# EXP_FOLDER = "../logs/2023-07-30/26/"
# EXP_FOLDER = "../logs/2023-07-30/27/"
# EXP_FOLDER = "../logs/2023-07-31/3/"
# EXP_FOLDER = "../logs/2023-07-31/4/"
# EXP_FOLDER = "../logs/2023-07-31/5/"

# # v2-s june-july finetune cnn
# EXP_FOLDER = "../logs/2023-08-01/0/"
# EXP_FOLDER = "../logs/2023-08-01/1/"
# EXP_FOLDER = "../logs/2023-08-01/8/"
# EXP_FOLDER = "../logs/2023-08-01/9/"
# EXP_FOLDER = "../logs/2023-08-01/15/"


# # 200 eps
# EXP_FOLDER = "../logs/2023-07-28/10/"
# EXP_FOLDER = "../logs/2023-08-01/27/"


# # # june-july 2D
# # EXP_FOLDER = "../logs/2023-07-31/7/"
# # EXP_FOLDER = "../logs/2023-07-31/8/"
# EXP_FOLDER = "../logs/2023-08-01/24/"
# EXP_FOLDER = "../logs/2023-08-02/2/"
# EXP_FOLDER = "../logs/2023-08-02/3/"
# EXP_FOLDER = "../logs/2023-08-02/4/"
# EXP_FOLDER = "../logs/2023-08-02/5/"

In [None]:
# preds = kfold_inference(df_val if DEBUG else df, EXP_FOLDER, use_fp16=USE_FP16, save=SAVE)

In [None]:
# preds = []
# os.makedirs(EXP_FOLDER + "pl_masks/", exist_ok=True)

# for fold in [1, 2, 3]:
#     print(f"\n- Fold {fold + 1}")
#     df_ = df[df['fold'] == fold].reset_index(drop=True)
    
#     pred_val = np.load(EXP_FOLDER + f"pred_val_{fold}.npy")
    
#     for i in tqdm(range(len(df_))):
#         mask = pred_val[i][0].astype(np.float32)
#         record_id = df_['record_id'][i]
        
#         np.save(EXP_FOLDER + "pl_masks/" + str(record_id) + ".npy", mask)

In [None]:
# preds_gpu = torch.from_numpy(preds[0]).cuda()
# truths_gpu = torch.from_numpy(gts).cuda()

# dices = {}
# for th in tqdm(np.round(np.arange(0.2, 0.6, 0.01), 2)):
#     dices[th] = dice_score_torch((preds_gpu > th), truths_gpu)
    
# th, dice = max(dices.items(), key=operator.itemgetter(1))
# print(f'-> CV dice :  {dice:.4f}   (th={th:.2f})')

In [None]:
# preds_gpu = torch.from_numpy(preds[0]).cuda()
# truths_gpu = torch.from_numpy(gts).cuda()

# dices = {}
# for th in tqdm(np.round(np.arange(0.2, 0.6, 0.01), 2)):
#     dices[th] = dice_score_torch((preds_gpu > th), truths_gpu)
    
# th, dice = max(dices.items(), key=operator.itemgetter(1))
# print(f'-> CV dice :  {dice:.4f}   (th={th:.2f})')

### Blends

In [None]:
# 5 runs - cvnxt-nano cnn frames 1234 stride 2 - 0.6863
EXP_FOLDERS = [
#     # cvnxt-nano 
#     # CNN 1234 - 0.6900
#     "../logs/2023-07-18/0/" ,
#     "../logs/2023-07-18/6/" ,
#     "../logs/2023-07-18/10/" ,
#     "../logs/2023-07-19/0/" ,
#     "../logs/2023-07-19/2/", 

#     # LSTM 1234 - 0.6890
#     "../logs/2023-07-19/10/",
#     "../logs/2023-07-19/12/",
#     "../logs/2023-07-19/15/",
#     "../logs/2023-07-19/17/",

    # v2s
#     # LSTM 2345  - 0.6905
#     "../logs/2023-07-19/19/",
#     "../logs/2023-07-19/23/",
#     "../logs/2023-07-19/24/",
#     "../logs/2023-07-19/25/",
#     "../logs/2023-07-20/2/",

    # 2D Ext data 100 epochs - 0.6973
    "../logs/2023-07-28/9/" ,
    "../logs/2023-07-29/1/",
    "../logs/2023-07-29/13/",
    "../logs/2023-07-30/0/",
    "../logs/2023-07-30/6/",
    "../logs/2023-08-04/23/",

#     # 2D Ext data 100 epochs + LSTM - 0.6968
#     "../logs/2023-07-30/16/",
#     "../logs/2023-07-30/19/",
#     "../logs/2023-07-30/20/",
#     "../logs/2023-08-01/9/",
#     "../logs/2023-08-01/15/",

    # 2D Ext data 200 eps - 0.6956
    "../logs/2023-07-28/10/",
    "../logs/2023-08-01/27/",
    "../logs/2023-08-02/2/",
    "../logs/2023-08-02/3/",
    "../logs/2023-08-02/4/",
    "../logs/2023-08-02/5/",
]

PREDS = {f: torch.from_numpy(np.load(f + "pred_val_0.npy")) for f in tqdm(EXP_FOLDERS)} # .cuda()

In [None]:
# upload_to_kaggle(EXP_FOLDERS, "../output/dataset_v2/", "Contrail Weights 2D", update_folders=False)

In [None]:
truths_gpu = torch.from_numpy(gts) # .cuda()
scores = []

# for exp_folder in EXP_FOLDERS:
#     preds = PREDS[exp_folder]

#     dices = {}
#     for th in np.round(np.arange(0.2, 0.6, 0.01), 2):
#         dices[th] = dice_score_torch((preds > th), truths_gpu)

#     th, dice = max(dices.items(), key=operator.itemgetter(1))
#     print(f'-> {exp_folder}\t-  CV dice :  {dice:.4f}   (th={th:.2f})')
#     scores.append(dice)
    
# print(f'\n  --> Avg of CV : {np.mean(scores):.4f} +/- {np.std(scores):.4f}')


preds = torch.stack([PREDS[f] for f in EXP_FOLDERS], 0).mean(0)

dices = {}
for th in np.round(np.arange(0.4, 0.6, 0.01), 2):
    dices[th] = dice_score_torch((preds > th), truths_gpu)
    
th, dice = max(dices.items(), key=operator.itemgetter(1))
print(f'  --> CV of avg : {dice:.4f}   (th={th:.2f})')

In [None]:
# np.save("../output/v2s.npy", preds.numpy()[:, 0])

In [None]:
plt.plot(np.array(list(dices.keys())).astype(float), dices.values())
plt.axvline(th, c="salmon")
plt.xlim(th - 0.1, th + 0.1)
plt.ylim(dice - 0.01, dice + 0.002)
plt.title(f'dice={dice:.4f}, th={th:.2f}')
plt.show()

### With team

In [None]:
# probs = [cv2.imread(f'../output/drhb/{p}.png', cv2.IMREAD_GRAYSCALE) for p in tqdm(df_val['record_id'].values)]
# probs = np.array(probs).astype(np.float16) / 255
# np.save("../output/psam_single.npy", probs)

# for f in os.listdir('../output/iafoss/'):
#     if not os.path.isdir(f'../output/iafoss/{f}'):
#         continue

#     print(f'- {f}')
#     probs = [cv2.imread(f'../output/iafoss/{f}/{p}.png', cv2.IMREAD_GRAYSCALE) for p in tqdm(df_val['record_id'].values)]
#     probs = np.array(probs).astype(np.float16) / 255
#     np.save(f"../output/iafoss/{f}.npy", probs)
# #     break

In [None]:
FILES = {
    "Psam-Drhb": "../output/psam_single.npy",                       # 0.6933 
    "Ex-v2s-Theo": "../output/v2s.npy",                             # 0.6992   

    "CoaT_ULSTM": '../output/iafoss/Seq_CoaT_512_0.npy',            # 0.7038 
    "CoaT_UT  ": '../output/iafoss/Seq_CoaT_512_1.npy',             # 0.7052 
    "Ex-Coat_ULSTM": '../output/iafoss/Seq_ExCoaT_512_0.npy',       # 0.7063 
    "Ex-Coat_UT": '../output/iafoss/Seq_ExCoaT_512_1.npy',          # 0.7040
    
    "NeXtViT_ULSTM": '../output/iafoss/Seq_NextViT_512_0.npy',      # 0.7010 
    "Ex-NeXtViT_ULSTM": '../output/iafoss/Seq_ExNeXtViT_512_0.npy', # 0.7024
    "Ex-NeXtViT_UT": '../output/iafoss/Seq_ExNeXtViT_512_1.npy',    # 0.7002 

#     "NeXtViT_512_1": '../output/iafoss/NeXtViT_512_1.npy',             # 0.6912 - 
#     "CoaT_512_1    ": '../output/iafoss/CoaT_512_1.npy',               # 0.6978
}


PREDS = {f: torch.from_numpy(np.load(FILES[f])).cuda() for f in tqdm(FILES)}

In [None]:
truths_gpu = torch.from_numpy(gts).cuda()
for exp_folder in PREDS:
    preds = PREDS[exp_folder]

    dices = {}
    for th in np.round(np.arange(0.2, 0.6, 0.01), 2):
        dices[th] = dice_score_torch((preds > th), truths_gpu)

    th, dice = max(dices.items(), key=operator.itemgetter(1))
    print(f'-> {exp_folder}    \t-  CV dice :  {dice:.4f}   (th={th:.2f})')

In [None]:
WEIGHTS = {
    'Psam-Drhb': 1,        # 0.6933
    'Ex-v2s-Theo': 1,      # 0.6992
    'CoaT_ULSTM': 1,       # 0.7038
    'CoaT_UT  ': 1,        # 0.7052
#     'Ex-Coat_ULSTM': 0.1,    # 0.7063
#     'Ex-Coat_UT': 1,       # 0.7040
    'NeXtViT_ULSTM': 0.5,    # 0.7010
    'Ex-NeXtViT_ULSTM': 0.5, # 0.7024
#     'Ex-NeXtViT_UT': 0.5,  # 0.7002
}

WEIGHTS = [WEIGHTS.get(k, 0) for k in PREDS]

In [None]:
preds = torch.stack([PREDS[f] for f in PREDS], 0)
weights = torch.tensor(WEIGHTS).to(preds.device).view(-1, 1, 1, 1)
preds = (preds * weights).sum(0) / weights.sum(0)

dices = {}
for th in np.round(np.arange(0.4, 0.6, 0.01), 2):
    dices[th] = dice_score_torch((preds > th), truths_gpu)
    
th, dice = max(dices.items(), key=operator.itemgetter(1))
# th = 0.48
# dice = dices[th]
print(f'\n  --> CV of avg : {dice:.4f}   (th={th:.2f})')

In [None]:
plt.plot(np.array(list(dices.keys())).astype(float), dices.values())
plt.axvline(th, c="salmon")
plt.xlim(th - 0.1, th + 0.1)
plt.ylim(dice - 0.01, dice + 0.002)
plt.title(f'dice={dice:.4f}, th={th:.2f}')
plt.show()

### Plot errors
- TODO

Done ! 