In [None]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [None]:
import os

import torch
from accelerate import Accelerator
from torch import nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# init accelerator
# accelerator = Accelerator(fp16=True)
# device = accelerator.device

EPOCHS = 20
BATCH_SIZE = 4
GRAD_ACCUM_STEPS = 32 // BATCH_SIZE

device

## Get datasets: alternative - Huggingface

In [None]:
from datasets import load_dataset
from torch.utils.data import DataLoader
from transformers.models.herbert.tokenization_herbert_fast import HerbertTokenizerFast
tokenizer = HerbertTokenizerFast.from_pretrained("allegro/herbert-base-cased")

raw_datasets = load_dataset("allegro/klej-nkjp-ner")
target_mapper = {label: i for i, label in enumerate(raw_datasets['train'].unique('target'), 0)}

def tokenize_function(example):
    tokenized = tokenizer(example['sentence'], truncation=True)
    tokenized['labels'] = [target_mapper[target] for target in example['target']] 
    return tokenized

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['sentence', 'token_type_ids', 'target'])
tokenized_datasets = tokenized_datasets.with_format('torch')

from transformers import DataCollatorWithPadding

def get_dataloaders(tokenizer, tokenized_datasets):
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    train_set = tokenized_datasets['train']
    test_set = tokenized_datasets['validation']
    train = DataLoader(dataset=train_set, shuffle=True, batch_size=BATCH_SIZE, collate_fn=data_collator)
    test = DataLoader(dataset=test_set, shuffle=False, batch_size=BATCH_SIZE, collate_fn=data_collator)

    return train, test


train_loader, test_loader = get_dataloaders(tokenizer, tokenized_datasets)

In [None]:
from transformers import AutoModelForSequenceClassification
polemo_herbert = AutoModelForSequenceClassification.from_pretrained("allegro/herbert-base-cased", num_labels=len(target_mapper))
# polemo_herbert

In [None]:
# set accelerator
from transformers import AdamW, get_cosine_schedule_with_warmup
from trainers.utils import configure_optimizer

optim = configure_optimizer(polemo_herbert.bert, polemo_herbert.classifier, AdamW,
                            lr_backbone=5e-5, lr_head=5e-4, weight_decay=1e-3)

# TU ZMIENIŁEM
# train_loader, test_loader, polemo_model, optim = accelerator.prepare(
#     train_loader, test_loader, polemo_model, optim)

loaders  = {'train': train_loader, 'test': test_loader}
# from torch.optim.lr_scheduler import CosineAnnealingLR
# scheduler = CosineAnnealingLR(optim, len(train_loader) // GRAD_ACCUM_STEPS * EPOCHS, 0)

NUM_TRAINING_STEPS = len(train_loader) // GRAD_ACCUM_STEPS * EPOCHS
scheduler = get_cosine_schedule_with_warmup(
        optimizer=optim,
        num_cycles=EPOCHS,
        num_warmup_steps=int(0.5 * NUM_TRAINING_STEPS),
        num_training_steps=NUM_TRAINING_STEPS)

In [None]:
# polemo_herbert.bert.requires_grad_(False)
# polemo_herbert.bert.pooler.requires_grad_(True)

# for name, params in polemo_herbert.named_parameters():
#     if params.requires_grad and ('encoder' in name or 'embeddings' in name):
#         params.requires_grad = False
#     print(name, params.requires_grad)

In [None]:
from trainers.vanillaTrainerClassifier import VanillaTrainerClassifier

# TU ZMIENIŁEM
params_trainer = {
    'model': polemo_herbert.to(device),
    'tokenizer': tokenizer,
    'loaders': loaders,
    'criterion': nn.CrossEntropyLoss().to(device),
    'optim': optim,
    'scheduler': scheduler,
    # 'accelerator': accelerator,
    'device': device
}
trainer = VanillaTrainerClassifier(**params_trainer)

In [None]:
%tensorboard --logdir=classification

In [None]:
import collections
config_run_epoch = collections.namedtuple('RE', ['save_interval', 'grad_accum_steps', 'running_step'])(20, GRAD_ACCUM_STEPS, 40)

# TU ZMIENIŁEM
params_run = {
    'epoch_start': 0,
    'epoch_end': EPOCHS,
    'exp_name': f'herbert_ner_replication, hf_dataset, backbone classifier split, dynamically_freeze_layers, step:2',
    'config_run_epoch': config_run_epoch,
    'random_seed': 42
}

trainer.run_exp(**params_run)

In [None]:
trainer.n_logger.run.stop()