In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src

In [None]:
import os
import ast
import json
import glob
import torch
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from datetime import datetime
from collections import Counter

os.environ['CUDA_VISIBLE_DEVICES'] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
# from utils.tokenizers import update_tokenizers
# package_path = "/opt/conda/lib/python3.8/site-packages/transformers"
# input_dir = "../input/deberta_fast_tokenizer"

# update_tokenizers(package_path, input_dir)

In [None]:
from data.dataset import OttoDataset
from data.preparation import prepare_data
# from training.main import k_fold
from models import OttoTransformer

from utils.metrics import *
from utils.logger import prepare_log_folder, save_config, create_logger

from params import *

## Data

In [None]:
df, df_test = prepare_data(DATA_PATH)

In [None]:
df_val = df[df['fold'] == 0].reset_index(drop=True)

In [None]:
dataset = OttoDataset(df_test.head(10000), max_len=410, max_trunc=100, train=False, test=True, pad=False)
dataset = OttoDataset(df_val.head(10000), max_len=410, max_trunc=100, train=False, test=False, pad=False)

In [None]:
lens = []
for idx in tqdm(range(10000)):
    data = dataset[idx]
    lens.append(data['ids'].size(0))
#     break

if len(lens) > 100:
    plt.figure(figsize=(15, 5))
    sns.countplot(x=np.clip(lens, 0, 70))
    plt.show()

In [None]:
# dataset = OttoTrainDataset(df_test, max_len=410, train=False)
# lens = []

# for idx in tqdm(range(10000)):
#     data = dataset[idx]
#     lens.append(data.shape[0])
    
# plt.figure(figsize=(15, 5))
# sns.countplot(x=np.clip(lens, 0, 70))
# plt.show()

In [None]:
y = [dataset.targets[k] for k in sorted(dataset.targets.keys())]
recall(copy.deepcopy(y[:100]), copy.deepcopy(y[:100]), k=20)

### Model

In [None]:
# model = NERTransformer("microsoft/deberta-v3-base", num_classes=3)
model = OttoTransformer("roberta-base", num_classes=3, n_ids=N_IDS)

In [None]:
x = data['ids'].unsqueeze(0)
types = data['token_type_ids'].unsqueeze(0).cuda()

x = torch.cat([x] * 16, 0)
types = torch.cat([types] * 16, 0)

In [None]:
model = model.cuda()
x = x.cuda()
types = types.cuda()

In [None]:
pred = model(x, types)

In [None]:
pred.size()

# Training

In [None]:
BATCH_SIZES = {
    "microsoft/deberta-v3-base": 32,
    "microsoft/deberta-v3-large": 32,
}

LRS = {
    "microsoft/deberta-v3-base": 3e-5,
    "microsoft/deberta-v3-large": 3e-5,
}

In [None]:
class Config:
    # General
    seed = 2222
    device = "cuda"
    
    # Splits
    k = 4
    random_state = 2222
    selected_folds = [0, 1, 2, 3]
    folds_file = "/workspace/folds_kgd_4.csv"

    # Architecture
    name = "microsoft/deberta-v3-base"

    pretrained_weights = None 

    no_dropout = False
    use_conv = False
    use_lstm = False
    nb_layers = 1
    nb_ft = 128
    conv_kernel = 5
    drop_p = 0 if no_dropout else 0.1
    multi_sample_dropout = False

    num_classes = 3
    n_ids = N_IDS

    # Texts
    max_len_train = 410
    max_len = 410

#     extra_data_path = OUT_PATH + "pl_case5/"
    extra_data_path = None  # OUT_PATH + "pl_6/df_pl.csv"

    # Training    
    loss_config = {
        "name": "bce",  # ce, bce
        "smoothing": 0,  # 0.01
        "activation": "sigmoid",  # "sigmoid", "softmax"
    }

    data_config = {
        "batch_size": BATCH_SIZES[name],
        "val_bs": BATCH_SIZES[name] * 2,
        "use_len_sampler": True,
        "pad_token": 1 if "roberta" in name else 0,
    }

    optimizer_config = {
        "name": "AdamW",
        "lr": 5e-5,
        "lr_transfo": LRS[name],
        "lr_decay": 0.99,
        "warmup_prop": 0.1,
        "weight_decay": 1,
        "betas": (0.5, 0.99),
        "max_grad_norm": 1.,
        # AWP
        "use_awp": False,
        "awp_start_step": 1000,
        "awp_lr": 1,
        "awp_eps": 5e-5 if "xlarge" in name else 1e-3,
        "awp_period": 3,
        # SWA
        "use_swa": False,
        "swa_start": 9400,
        "swa_freq": 500,
    }

    gradient_checkpointing = False
    acc_steps = 1
    epochs = 1

    use_fp16 = True

    verbose = 1
    verbose_eval = 1000

In [None]:
DEBUG = True
log_folder = None

In [None]:
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f"Logging results to {log_folder}")
    save_config(Config, log_folder + "config.json")
    create_logger(directory=log_folder, name="logs.txt")

pred_val, pred_test = k_fold(
    Config,
    df,
    df_test=df_test,
    log_folder=log_folder
)

Done