In [None]:
from transformers import RobertaTokenizerFast, RobertaForTokenClassification, TrainingArguments, Trainer
from datasets import load_dataset
from tqdm.notebook import tqdm

import utils, torch, os, pandas as pd, numpy as np, importlib, json, wandb, pickle

importlib.reload(utils);

In [None]:
with open('keys.json') as f:
    keys = json.load(f)

In [None]:
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base', add_prefix_space=True)

In [None]:
# https://huggingface.co/datasets/tner/ontonotes5
ds = load_dataset("tner/ontonotes5", keep_in_memory=True, num_proc=os.cpu_count())

ds_label_tag_mapping, ds_tag_label_mapping = utils.load_label_mapping()

In [None]:
train_tokenized_dataset = utils.process_dataset(ds['train'], tokenizer)
validation_tokenized_dataset = utils.process_dataset(ds['validation'], tokenizer)

In [None]:
os.environ["WANDB_PROJECT"]="roberta-training"
os.environ["WANDB_API_KEY"]=keys['wandb-api-key']

batch_size = 8
training_steps = len(train_tokenized_dataset['input_ids']) * 3 / batch_size
steps_per_actions = int(training_steps * 0.10)

run_name = "roberta-training-default-dataset"

training_args = TrainingArguments(
    run_name=run_name,
    output_dir=f"./checkpoints/{run_name}",
    eval_strategy="steps",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=3,
    weight_decay=0.01,
    save_steps=steps_per_actions,
    eval_steps=steps_per_actions,
    logging_steps=1,
    report_to="wandb"
)

In [None]:
from sklearn.metrics import accuracy_score

model = RobertaForTokenClassification.from_pretrained('roberta-base', num_labels=len(ds_label_tag_mapping))


def compute_metrics(eval_pred):
    # with open('eval_pred.pickle', 'wb') as handle:
    #     pickle.dump(eval_pred, handle, protocol=pickle.HIGHEST_PROTOCOL)

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    r_labels = np.ravel(labels)
    r_predictions = np.ravel(predictions)

    # lookup ignored tokens and equalize them in the prediction matrix, so it is ignored in the accuracy score
    ignored_tokens_indexes = np.where(r_labels == -100)    
    r_predictions[ignored_tokens_indexes] = -100

    return {"accuracy" : accuracy_score(r_labels, r_predictions)}

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized_dataset,
    eval_dataset=validation_tokenized_dataset,
    compute_metrics=compute_metrics
)

trainer.train()
wandb.finish()