In [None]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [None]:
import os

import torch
from accelerate import Accelerator
from torch import nn
from torchvision.io import read_image

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# init accelerator
accelerator = Accelerator(device_placement=True, fp16=True, mixed_precision='fp16')
device = accelerator.device

EPOCHS = 2
BATCH_SIZE = 8
GRAD_ACCUM_STEPS = 200 // BATCH_SIZE

In [None]:
# TU ZMIENIﾅ・M

from models.distil_student import creat_student
student = creat_student()

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-base-cased")

In [None]:
from torch.utils.data import DataLoader
from data.datasets import CustomImageDataset, collate_fn

train_dataset = CustomImageDataset('data/klej_polemo2.0-in/train.tsv', sep='\t', tokenizer=tokenizer)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, collate_fn=collate_fn)

test_dataset = CustomImageDataset('data/klej_polemo2.0-in/dev.tsv', sep='\t', tokenizer=tokenizer)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True, collate_fn=collate_fn)

# batch = next(iter(test_loader))
# batch

In [None]:
# set accelerator
from transformers import AdamW, get_cosine_schedule_with_warmup
from trainers.utils import configure_optimizer

optim = configure_optimizer(student, AdamW, weight_decay=1e-3, lr=1e-4)

# TU ZMIENIﾅ・M
train_loader, test_loader, student, optim = accelerator.prepare(
    train_loader, test_loader, student, optim)

loaders  = {'train': train_loader, 'test': test_loader}

NUM_TRAINING_STEPS = len(train_loader) // GRAD_ACCUM_STEPS * EPOCHS
scheduler = get_cosine_schedule_with_warmup(
        optimizer=optim,
        num_cycles=EPOCHS,
        num_warmup_steps=int(0.01 * NUM_TRAINING_STEPS),
        num_training_steps=NUM_TRAINING_STEPS)

In [None]:
from trainers.vanillaTrainer import VanillaTrainer

# TU ZMIENIﾅ・M
params_trainer = {
    'model': student,#.to(device),
    'tokenizer': tokenizer,
    'loaders': loaders,
    'criterion': nn.CrossEntropyLoss().to(device),
    'optim': optim,
    'scheduler': scheduler,
    'accelerator': accelerator,
    'device': device
}
trainer = VanillaTrainer(**params_trainer)

In [None]:
%tensorboard --logdir=exps

In [None]:
import collections
config_run_epoch = collections.namedtuple('RE', ['save_interval', 'grad_accum_steps', 'running_step'])(20, GRAD_ACCUM_STEPS, 30)

# TU ZMIENIﾅ・M
params_run = {
    'epoch_start': 0,
    'epoch_end': EPOCHS,
    'exp_name': f'vanilla_training_student',
    'config_run_epoch': config_run_epoch,
    'random_seed': 42
}

trainer.run_exp(**params_run)

In [None]:
trainer.n_logger.run.stop()

In [None]:
student.config