**About** : This notebook is used to train models.

Ideas :
- https://www.kaggle.com/c/open-images-2019-instance-segmentation/discussion/110983

- https://www.kaggle.com/c/imaterialist-fashion-2019-FGVC6/discussion/95247#latest-567841
- https://www.kaggle.com/c/imaterialist-fashion-2019-FGVC6/discussion/95233#latest-551075
- https://www.kaggle.com/c/imaterialist-fashion-2019-FGVC6/discussion/95234#latest-555537

- https://www.kaggle.com/c/airbus-ship-detection/discussion/71595#latest-457550
- https://www.kaggle.com/c/airbus-ship-detection/discussion/71667#latest-558876
- https://www.kaggle.com/c/data-science-bowl-2018/discussion/56326

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import gc
import ast
import sys
import cv2
import torch
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from sklearn.model_selection import StratifiedKFold
warnings.simplefilter("ignore", UserWarning)

In [None]:
from params import *

from utils.plots import *
from utils.metrics import iou_map
from utils.rle import rles_to_mask_fix
from utils.logger import prepare_log_folder, create_logger, save_config

from data.preparation import prepare_data, prepare_extra_data
from data.dataset import SartoriusDataset
from data.transforms import define_pipelines, to_mosaic

from training.main import k_fold
from inference.post_process import *

In [None]:
import mmdet
import mmdet.models
from mmcv import Config

from mmcv.utils import build_from_cfg
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import Compose

## Data

In [None]:
class Config:
    """
    Parameters used for training
    """
    # Images
    use_mosaic = False
    use_tta = False  # TODO
    data_config = "data/config_mosaic.py" if use_mosaic else "data/config.py"
#     data_config = "data/config_rescale.py"

In [None]:
df = prepare_data()
df_extra = prepare_extra_data("livecell_shsy5y")

In [None]:
pipelines = define_pipelines(Config.data_config)

In [None]:
# dataset = SartoriusDataset(df, pipelines['test_viz'], precompute_masks=False)
dataset = SartoriusDataset(
    df.head(1), pipelines['val_viz'], precompute_masks=False, df_extra=df_extra
)
# dataset = SartoriusDataset(df, pipelines['train_viz'], precompute_masks=False)

In [None]:
for idx in range(len(dataset)):
    idx = np.random.choice(len(dataset))
    data = dataset[idx]
    
    img = data['img']
    boxes = data['gt_bboxes']

    plt.figure(figsize=(15, 15))
    plot_sample(img, data['gt_masks'], boxes, plotly=False)
    plt.axis(False)
#     plt.title(df['cell_type'][idx])
    plt.show()
    
    break

In [None]:
# sizes_, ratios_, ns = [], [], []
# for i in tqdm(range(len(dataset))):
#     data = dataset[i]
# #     img = data['img']
#     boxes = data['gt_bboxes'].astype(float)

#     sizes = np.max([boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]], 0)
#     ratios = (boxes[:, 2] - boxes[:, 0]) / (boxes[:, 3] - boxes[:, 1])
#     ratios = np.max([ratios, 1 / ratios], 0)
#     ns.append(len(boxes))

# #     if np.max(ratios) > 10:
#     if len(boxes) > 600:
# #     if np.max(sizes) > 256:
#         plt.figure(figsize=(15, 15))
#         plot_sample(data['img'], data['gt_masks'], boxes, plotly=False)
#         plt.axis(False)
#         plt.show()
        
    
#     sizes_.append(sizes)
#     ratios_.append(ratios)

In [None]:
dataset = SartoriusDataset(df, pipelines['train_viz'], precompute_masks=False)
# dataset = to_mosaic(Config, dataset, 'mosaic_viz')

In [None]:
for _ in range(1):
    plt.figure(figsize=(15, 15))
    
    for i in range(4):
        plt.subplot(2, 2, i + 1)
        idx = np.random.choice(len(dataset))
#         idx = 581

        data = dataset[idx]
        print(data['img'].shape)
        plot_sample(data['img'], data['gt_masks'], data['gt_bboxes'], plotly=False)
#         print(data['img'].shape)

        plt.axis(False)
    plt.show()

## Training

In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    first_epoch_eval = 1
    compute_val_loss = False
    verbose_eval = 5

    device = "cuda" if torch.cuda.is_available() else "cpu"
    save_weights = True

    # Images
    fix = True
    extra_name = "livecell_no_shsy5y"
    use_extra_samples = False
    num_classes = 3
    pretrained_livecell = True

    use_mosaic = False
    data_config = "data/config_mosaic.py" if use_mosaic else "data/config.py"

    # k-fold
    k = 5
    random_state = 0
    selected_folds = [0] #, 1, 2, 3, 4]

    # Model
    name = "maskrcnn"  # "cascade" "maskrcnn"
    encoder = "resnext101"
    model_config = f"model_zoo/config_{name}.py"
    pretrained_livecell = True

    # Training
    optimizer = "Adam"
    scheduler = "linear"  # "plateau" "linear"
    weight_decay = 0.0005 if optimizer == "SGD" else 0
    batch_size = 4
    val_bs = batch_size

    epochs = 40

    lr = 3e-4
    warmup_prop = 0.05

    use_fp16 = False  # TODO

In [None]:
DEBUG = True
log_folder = None

In [None]:
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f"Logging results to {log_folder}")
    save_config(Config, log_folder)
    create_logger(directory=log_folder, name="logs.txt")

results = k_fold(Config, log_folder=log_folder)