In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

## Initialization

### Imports

In [None]:
import os
import sys
import json
import torch
import warnings
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

sys.path.append("../code/")
# warnings.simplefilter("ignore", UserWarning)

In [None]:
from params import *

from inference.main import k_fold_inf

from data.dataset import InferenceDataset
from data.transforms import HE_preprocess

### Load

In [None]:
df_info = pd.read_csv(DATA_PATH + f"HuBMAP-20-dataset_information.csv")
df_mask = pd.read_csv(DATA_PATH + "train.csv")


df = pd.read_csv(OUT_PATH + f"df_images_{256}_{4}.csv")

In [None]:
folds = df["5fold"].unique()

for i, fold in enumerate(folds):
    df_val = df[df["5fold"] == fold].reset_index()
    val_images = df_val["tile_name"].apply(lambda x: x.split("_")[0]).unique()
    print(val_images)

### Data

In [None]:
root = TIFF_PATH_4
rle_path = DATA_PATH + "train_4.csv"
reduce_factor = 1
rles = pd.read_csv(rle_path)

In [None]:
img = "2f6ecfcdf" # "aaa6a05cc"

In [None]:
dataset = InferenceDataset(
    f"{root}/{img}.tiff",
    rle=rles[rles['id'] == img]["encoding"],
    overlap_factor=2,
    reduce_factor=reduce_factor,
    tile_size=256,
    transforms=HE_preprocess(augment=False, visualize=True),
)

In [None]:
img, pos = dataset[len(dataset) // 3]

plt.imshow(img.numpy().transpose(1, 2, 0))
plt.show()

### Inference

In [None]:
# log_folder = "../logs/2021-03-18/0/"  # b5
# log_folder = "../logs/2021-03-26/3/"  # b5 512
# log_folder = "../logs/2021-03-27/1/"  # seresnext
# log_folder = "../logs/2021-03-28/1/"  # b6

# log_folder = "../logs/2021-03-29/7/"  # b5 + 10 ep
# log_folder = "../logs/2021-03-30/0/"  # b5 + 20 ep

# log_folder = "../logs/2021-03-31/0/"  # b4 512
# log_folder = "../logs/2021-04-01/2/"  # bot unext
# log_folder = "../logs/2021-04-02/3/"  # b6
log_folder = "../logs/2021-04-04/1/"  # b0
log_folder = "../logs/2021-04-05/4/"  # b1

In [None]:
class Config:
    def __init__(self, **entries):
        self.__dict__.update(entries)

config = json.load(open(log_folder + 'config.json', 'r'))
config = Config(**config)

In [None]:
# Fields not defined in old configs :
try:
    _ = config.tile_size
except:
    config.tile_size = 256
    config.reduce_factor = 4
    
try:
    _ = config.use_bot
except:
    config.use_bot = False
    config.use_fpn = False
    
try:
    _ = config.double_model
except:
    config.double_model = False

In [None]:
df = pd.read_csv(OUT_PATH + f"df_images_{config.tile_size}_{config.reduce_factor}.csv")

In [None]:
config.overlap_factor = 1.5
use_tta = False
global_threshold = 0.4

In [None]:
%%time
scores = k_fold_inf(
    config,
    df,
    log_folder=log_folder,
    use_full_size=False,
    global_threshold=global_threshold,
    use_tta=use_tta,
    save=False,
)

In [None]:
print(f'Local CV score is {np.mean(scores):.4f} for threshold {global_threshold}')   # 1.5 tta

In [None]:
print(f'Local CV score is {np.mean(scores):.4f} for threshold {global_threshold}')   # 1.5