**About** : This notebook is used to train detection models.

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [None]:
import os
import cv2
import sys
import ast
import glob
import json
import yaml
import shutil
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm

warnings.filterwarnings("ignore", category=UserWarning)
pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [None]:
from params import *

from data.preparation import *
from data.transforms import *
from data.dataset import *

from model_zoo.models import define_model

from training.main import k_fold

from util.torch import init_distributed
from util.logger import (
    prepare_log_folder,
    save_config,
    create_logger,
    init_neptune
)


### Load data

In [None]:
df = prepare_data()

In [None]:
df.loc[df["source"] == "extracted", "split"] = "val"

In [None]:
# df_gen = prepare_gen_data(DATA_PATH)

In [None]:
# df_gen_a = prepare_gen_data(DATA_PATH, img_folder="gen_andrija/")

In [None]:
# df = pd.concat([df, df_gen, df_gen_a], ignore_index=True)

In [None]:
sns.countplot(x='chart-type', hue="split", data=df)
plt.yscale('log')
plt.show()

In [None]:
transforms = get_transfos(augment=True, strength=3, resize=(256, 384))

In [None]:
dataset = ClsDataset(df, transforms)

In [None]:
# for i in tqdm(range(len(dataset))):
#     i = np.random.choice(len(dataset))
    
#     plt.figure(figsize=(15, 15))
#     for k in range(9):
#         plt.subplot(3, 3, k + 1)
#         img, y, _ = dataset[i]

#         plt.imshow(img.numpy().transpose(1, 2, 0))
#         plt.axis(False)
# #     plt.title(CLASSES[int(y)])
#     plt.tight_layout()
#     plt.show()
    
#     break

In [None]:
for i in tqdm(range(len(dataset))):
    i = np.random.choice(len(dataset))
    img, y, _ = dataset[i]
    
    plt.imshow(img.numpy().transpose(1, 2, 0))
    plt.title(CLASSES[int(y)])
    plt.show()
    
    break

### Model

In [None]:
model = define_model("tf_efficientnetv2_b0")

In [None]:
y, y_aux = model(img.unsqueeze(0))

### Main

In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True

    # Images
    img_folder = "train/images/"
    window = img_folder.endswith("_w/")
    aug_strength = 3
    resize = (256, 384)

    # k-fold
    k = 4
    folds_file = None
    selected_folds = [0]

    # Model
    name = "tf_efficientnetv2_b0"  # "eca_nfnet_l2"  # "tf_efficientnetv2_s" "eca_nfnet_l1"
    pretrained_weights = None
    num_classes = len(CLASSES)
    num_classes_aux = 0
    n_channels = 3
    reduce_stride = False
    drop_rate = 0.1
    drop_path_rate = 0.1
    use_gem = True
    syncbn = False

    # Training
    loss_config = {
        "name": "ce",
        "smoothing": 0.0,
        "activation": "softmax",
        "aux_loss_weight": 0.,
        "pos_weight": None,
        "activation_aux": "softmax",
    }

    data_config = {
        "batch_size": 16,
        "val_bs": 32,
        "mix": "cutmix",
        "mix_proba": 1,
        "mix_alpha": 4.0,
        "num_classes": num_classes,
        "additive_mix": False,
    }

    optimizer_config = {
        "name": "Ranger",
        "lr": 5e-4,
        "warmup_prop": 0.0,
        "betas": (0.9, 0.999),
        "max_grad_norm": 10.0,
        "weight_decay": 0,  # 1e-2,
    }

    epochs = 1
    use_fp16 = True

    verbose = 1
    verbose_eval = 200

    fullfit = False
    n_fullfit = 1


In [None]:
DEBUG = True
log_folder = None
run = None

In [None]:
# if not DEBUG:
#     log_folder = prepare_log_folder(LOG_PATH)
#     print(f"Logging results to {log_folder}")
#     config_df = save_config(Config, log_folder + "config.json")
#     create_logger(directory=log_folder, name="logs.txt")
# #     run = init_neptune(Config, log_folder)

# df = prepare_data(DATA_PATH, DATA_PATH + Config.img_folder)
# # df_gen = prepare_gen_data(DATA_PATH)
# # df = pd.concat([df, df_gen], ignore_index=True)

# # df= df.sample(1000).reset_index(drop=True)

# # df['cancer'] = (df['BIRADS'] <= 0).astype(int)
# # df = df.dropna(axis=0).reset_index(drop=True)
# # df = df.head(10000) if DEBUG else df
# config = Config
# init_distributed(config)

# preds = k_fold(config, df, log_folder=log_folder, run=run)

### Val

In [None]:
EXP_FOLDERS = [
    "../logs/2023-05-22/47/",
    "../logs/2023-05-22/44/",
#     "../logs/2023-05-22/45/",
]

In [None]:
df_val = df[df['split'] == "val"].reset_index(drop=True)

In [None]:
pred_val = np.mean([
    np.load(f + "pred_val.npy") for f in EXP_FOLDERS
], 0)

In [None]:
df_val['pred'] = pred_val.argmax(-1)

In [None]:
from util.plots import plot_confusion_matrix

In [None]:
plot_confusion_matrix(df_val['pred'], df_val['target'], display_labels=CLASSES, normalize=None)
plt.xticks(rotation=45)
plt.title(f"Acc = {(df_val['pred'] == df_val['target']).mean() :.4f}")
plt.show()

In [None]:
# df_err = df_val[df_val['pred'] != df_val['target']].reset_index(drop=True)

# dataset = ClsDataset(df_err, get_transfos(augment=False, resize=(256, 384)))

# for i in tqdm(range(len(dataset))):
#     img, y, _ = dataset[i]
    
#     plt.imshow(img.numpy().transpose(1, 2, 0))
#     plt.title(f"GT  {CLASSES[int(y)]} - PRED {CLASSES[df_err.pred[i]]}")
#     plt.show()

# #     break

### Inf

In [None]:
from inference.main import kfold_inference

In [None]:
pred_val = kfold_inference(df_val, EXP_FOLDERS[0])

In [None]:
df_val['pred_inf'] = pred_val.argmax(-1)

In [None]:
plot_confusion_matrix(df_val['pred_inf'], df_val['target'], display_labels=CLASSES, normalize=None)
plt.xticks(rotation=45)
plt.title(f"Acc = {(df_val['pred_inf'] == df_val['target']).mean() :.4f}")
plt.show()

Done !