In [None]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [None]:
import os

import torch
from accelerate import Accelerator
from torch import nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# init accelerator
# accelerator = Accelerator(device_placement=True, fp16=True, mixed_precision='fp16')
# device = accelerator.device

EPOCHS = 10
BATCH_SIZE = 8
GRAD_ACCUM_STEPS = 32 // BATCH_SIZE

In [None]:
from torch.utils.data import DataLoader
from evaluation.DatasetLoaders import KlejDataset
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-base-cased")


def get_dataloaders(tokenizer, path_train, path_test):
    train_set = KlejDataset(path_train, tokenizer, device)
    print(train_set.labels_map)
    test_set = KlejDataset(path_test, tokenizer, device, train_set.labels_map)
    labels = train_set.labels_map
    train = DataLoader(dataset=train_set, shuffle=True, batch_size=BATCH_SIZE)
    test = DataLoader(dataset=test_set, shuffle=False, batch_size=BATCH_SIZE)

    return train, test, labels

dataset_train_path = "datasets/klej_polemo2.0-in/train.tsv"
dataset_test_path = "datasets/klej_polemo2.0-in/dev.tsv"

train_loader, test_loader, labels_map = get_dataloaders(tokenizer, dataset_train_path, dataset_test_path)

In [None]:
distil_path = 'weights/plain_distil/2022-06-26_03-19-38/checkpoints/student_orginal_training.pth'

from models.klej.bert_polemo import BertPolemo
from models.distil_student import creat_student
student = creat_student()
student.load_state_dict(torch.load(distil_path, map_location=device))

student.config.hidden_dropout_prob = 0.2

polemo_model = BertPolemo(student.config, len(labels_map))
polemo_model.bert.load_state_dict(student.state_dict(), strict=False)
polemo_model = polemo_model

In [None]:
# set accelerator
from transformers import AdamW, get_cosine_schedule_with_warmup
from trainers.utils import configure_optimizer

optim = configure_optimizer(polemo_model, AdamW, weight_decay=1e-3, lr=2e-5)

# TU ZMIENIŁEM
# train_loader, test_loader, polemo_model, optim = accelerator.prepare(
#     train_loader, test_loader, polemo_model, optim)

loaders  = {'train': train_loader, 'test': test_loader}
from torch.optim.lr_scheduler import CosineAnnealingLR
scheduler = CosineAnnealingLR(optim, len(train_loader) // GRAD_ACCUM_STEPS * EPOCHS, 0)

# NUM_TRAINING_STEPS = len(train_loader) // GRAD_ACCUM_STEPS * EPOCHS
# scheduler = get_cosine_schedule_with_warmup(
#         optimizer=optim,
#         num_cycles=EPOCHS,
#         num_warmup_steps=int(0.01 * NUM_TRAINING_STEPS),
#         num_training_steps=NUM_TRAINING_STEPS)

In [None]:
from trainers.vanillaTrainerClassifier import VanillaTrainerClassifier

# TU ZMIENIŁEM
params_trainer = {
    'model': polemo_model.to(device),
    'tokenizer': tokenizer,
    'loaders': loaders,
    'criterion': nn.CrossEntropyLoss().to(device),
    'optim': optim,
    'scheduler': scheduler,
    # 'accelerator': accelerator,
    'device': device
}
trainer = VanillaTrainerClassifier(**params_trainer)

In [None]:
%tensorboard --logdir=classification

In [None]:
import collections
config_run_epoch = collections.namedtuple('RE', ['save_interval', 'grad_accum_steps', 'running_step'])(20, GRAD_ACCUM_STEPS, 40)

# TU ZMIENIŁEM
params_run = {
    'epoch_start': 0,
    'epoch_end': EPOCHS,
    'exp_name': f'classification_polemo-in',
    'config_run_epoch': config_run_epoch,
    'random_seed': 42
}

trainer.run_exp(**params_run)

In [None]:
trainer.n_logger.run.stop()

In [None]:
student.config