In [2]:
import torch
from dataset import prepare_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification

import sys

sys.path.append("..")
from config import NUM_LABELS

# Define the model

In [6]:
model_checkpoint = "distilbert/distilroberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, num_labels=NUM_LABELS
)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
dataset = prepare_dataset(tokenizer)

100%|██████████| 211225/211225 [00:01<00:00, 144347.19it/s]
Map: 100%|██████████| 168980/168980 [00:07<00:00, 21313.06 examples/s]
Map: 100%|██████████| 21122/21122 [00:00<00:00, 23128.87 examples/s]
Map: 100%|██████████| 21123/21123 [00:01<00:00, 19552.48 examples/s]


# Trainer

In [7]:
from transformers import Trainer, TrainingArguments, DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer, padding=True)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    save_total_limit=10,
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1",
    greater_is_better=True,
)

## Computer Metrics

In [8]:
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    accuracy_score,
    hamming_loss,
    roc_curve,
)


def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions
    threshold = 0.5

    preds = (preds > threshold).astype(int)

    precision_micro = precision_score(labels, preds, average="macro", zero_division=0)
    recall_micro = recall_score(labels, preds, average="macro", zero_division=0)
    f1_micro = f1_score(labels, preds, average="macro", zero_division=0)
    accuracy = accuracy_score(labels, preds)

    # Add the expected keys
    metrics = {
        "precision_micro": precision_micro,
        "recall_micro": recall_micro,
        "eval_accuracy": accuracy,  # For Hugging Face Trainer
        "eval_f1": f1_micro,  # For Hugging Face Trainer
    }

    return metrics

## Define the trainer

In [17]:
class MultiLabelTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        labels = labels[:, :NUM_LABELS].float()
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fn = torch.nn.BCEWithLogitsLoss()
        loss = loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss


trainer = MultiLabelTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["val"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [18]:
trainer.train()

Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 