**About** : This notebook is used to train models.

In [167]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [168]:
cd ../src/

/workspace/kaggle_islr/src


## Initialization

### Imports

In [169]:
import os
import torch

print(torch.__version__)
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
device = torch.cuda.get_device_name(0)
print(device)

1.14.0a0+410ce96
Tesla V100-SXM2-32GB-LS


In [170]:
import os
import sys
import glob
import json
import cudf
import torch
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.metrics import *

pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [171]:
# from training.main import k_fold

# from utils.logger import (
#     prepare_log_folder,
#     save_config,
#     create_logger,
#     init_neptune
# )
# from utils.metrics import pfbeta

from params import *
from utils.plots import *
# from data.dataset import *
from data.preparation import *
# from data.transforms import get_transfos

# from model_zoo.models import define_model

## Data

In [172]:
df = prepare_data(DATA_PATH)

In [173]:
df.head()

Unnamed: 0,path,participant_id,sequence_id,sign,target
0,../input/train_landmark_files/26734/1000035562.parquet,26734,1000035562,blow,25
1,../input/train_landmark_files/28656/1000106739.parquet,28656,1000106739,wait,232
2,../input/train_landmark_files/16069/100015657.parquet,16069,100015657,cloud,48
3,../input/train_landmark_files/25571/1000210073.parquet,25571,1000210073,bird,23
4,../input/train_landmark_files/62590/1000240708.parquet,62590,1000240708,owie,164


In [196]:
def process_sign(path):
    sign = cudf.read_parquet(path, columns=['frame', 'type', 'landmark_index', 'x', 'y', 'z'])
    
    sign["landmark_index"] = sign["landmark_index"] + sign["type"].map({"face": 0, "pose": 468, "left_hand": 501, "right_hand": 522})
    sign['type'] = sign['type'].map({"face": 0, "pose": 1, "left_hand": 2, "right_hand": 3})
    
    sign[['x', 'y', 'z']].fillna(-10, inplace=True)
    
    assert not sign.isna().sum().max()
    
    dfg = sign[["frame", "type", "landmark_index", "x", "y", "z"]].astype('float32').groupby('frame').agg(list)
    return np.array(dfg.to_pandas().values.tolist())

In [None]:
save_folder = '../input/train_landmark_files_processed/'
    
for i in tqdm(range(len(df))):
    path = df['path'][i]
    x = process_sign(path)
    
    name = f"{path.split('/')[-2]}_{path.split('/')[-1].split('.')[0]}.npy"
    np.save(save_folder + name, x)
    
    
#     break

 14%|█▍        | 13157/94477 [25:55<4:54:56,  4.60it/s] 

### Loader

In [None]:
transforms = get_transfos(augment=False, resize=None, strength=3)

dataset = BreastCropDataset(df, transforms=transforms)

In [None]:
# df.drop_duplicates(subset=['patient_id', 'laterality'], keep="first")[['site_id', 'cancer']].groupby("site_id").mean()

## Model

In [None]:
model = define_model('eca_nfnet_l1', num_classes=1, num_classes_aux=3, n_channels=3, reduce_stride=False, drop_path_rate=0., use_gem=True, crops=True)

In [None]:
x = torch.cat([imgs.unsqueeze(0)] * 2)

pred, pred_aux = model(x)

In [None]:
loss = model.loss(pred, torch.ones(2), torch.nn.CrossEntropyLoss(reduction="mean"), 0.1)

## Training

In [None]:
MODELS = [
    # ResNets
    "resnet18",
    "resnext50_32x4d",
    "seresnext50_32x4d",

    # EfficientNets
    "tf_efficientnetv2_s",
    "tf_efficientnetv2_m",
    "tf_efficientnetv2_b0",
    "tf_efficientnetv2_b1",
]

In [None]:
# import torch_performance_linter

In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True

    # Images
    img_folder = "crop_1024_w/"
    resize = None
    aug_strength = 1

    # k-fold
    k = 4
    folds_file = f"../input/folds_{k}.csv"
    selected_folds = [0, 1, 2, 3]

    # Model
    name = "tf_efficientnet_b0"
    pretrained_weights = None # PRETRAINED_WEIGHTS[name]  # None
    num_classes = 1
    num_classes_aux = 3
    n_channels = 3
    reduce_stride = False

    # Training    
    loss_config = {
        "name": "bce",
        "smoothing": 0.,
        "activation": "sigmoid",
        "aux_loss_weight": 0.1,
        "pos_weight": None,
        "activation_aux": "softmax",
    }

    data_config = {
        "batch_size": 16,
        "val_bs": 16,
        "mix": "mixup",
        "mix_proba": 0.,
        "mix_alpha": 4.,
        "additive_mix": False,
        "use_len_sampler": False,
        "use_balanced_sampler": False,
        "use_weighted_sampler": False,
        "sampler_weights": [1, 1, 1, 1],  # pos, birads 0, 1, 2
        "use_custom_collate": False,
    }

    optimizer_config = {
        "name": "Ranger",
        "lr": 5e-4,
        "warmup_prop": 0.,
        "betas": (0.9, 0.999),
        "max_grad_norm": 10.,
    }

    epochs = 5

    use_fp16 = True

    verbose = 1
    verbose_eval = 500

In [None]:
DEBUG = True
log_folder = None
run = None

In [None]:
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f"Logging results to {log_folder}")
    config_df = save_config(Config, log_folder + "config.json")
    create_logger(directory=log_folder, name="logs.txt")
#     run = init_neptune(Config, log_folder)

df = prepare_data(DATA_PATH, DATA_PATH + Config.img_folder)

# df['cancer'] = (df['BIRADS'] <= 0).astype(int)
# df = df.dropna(axis=0).reset_index(drop=True)
# df = df.head(10000) if DEBUG else df

preds = k_fold(Config, df, log_folder=log_folder, run=run)

Done ! 