In [None]:
!pip install transformers datasets evaluate -q

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import evaluate
import numpy as np

# Dataset
dataset = load_dataset("PlanTL-GOB-ES/tass2020")
tokenizer = AutoTokenizer.from_pretrained("dccuchile/bert-base-spanish-wwm-cased")

def preprocess(examples):
    return tokenizer(examples["tweet"], truncation=True, padding="max_length")

dataset = dataset.map(preprocess, batched=True)

# Modelo
model = AutoModelForSequenceClassification.from_pretrained("dccuchile/bert-base-spanish-wwm-cased", num_labels=3)

accuracy = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=preds, references=labels)

# Entrenamiento
args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    num_train_epochs=1
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=dataset["train"].shuffle(seed=42).select(range(500)),
    eval_dataset=dataset["test"].shuffle(seed=42).select(range(100)),
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()