In [1]:
# from datasets import load_dataset

In [1]:
!pip install transformers[torch]

Collecting torch (from transformers[torch])
  Obtaining dependency information for torch from https://files.pythonhosted.org/packages/a7/ad/fbe7d4cffb76da4e478438853b51305361c719cff929ab70a808e7fb75e7/torch-2.2.1-cp310-cp310-manylinux1_x86_64.whl.metadata
  Downloading torch-2.2.1-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting accelerate>=0.21.0 (from transformers[torch])
  Obtaining dependency information for accelerate>=0.21.0 from https://files.pythonhosted.org/packages/1b/da/24a54b9205fce3bdbaad521c35944d0b0a2d292ac5ae921e484b76312b43/accelerate-0.27.2-py3-none-any.whl.metadata
  Using cached accelerate-0.27.2-py3-none-any.whl.metadata (18 kB)
Collecting sympy (from torch->transformers[torch])
  Obtaining dependency information for sympy from https://files.pythonhosted.org/packages/d2/05/e6600db80270777c4a64238a98d442f0fd07cc8915be2a1c16da7f2b9e74/sympy-1.12-py3-none-any.whl.metadata
  Using cached sympy-1.12-py3-none-any.whl.metadata (12 kB)
Collecting netw

In [None]:

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Load the translated Dutch dataset from the Hugging Face Hub
dataset = load_dataset("GroNLP/ik-nlp-22_transqe")

In [ ]:

model_name = "bert-base-multilingual-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

In [ ]:

def tokenize_function(examples):
    return tokenizer(examples["premise_nl"], examples["hypothesis_nl"], padding=True, truncation=True)

# Tokenize the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

In [ ]:

import os

output_dir = "./results"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    weight_decay=0.01,
    save_steps=1000,  # Save every 1000 steps (adjust as needed)
    save_total_limit=2,  # Keep only the last 2 saved models
)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall}

In [ ]:

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train(resume_from_checkpoint=True)

In [ ]:

eval_results = trainer.evaluate()
print(eval_results)