In [1]:
from datasets import load_dataset

ds = load_dataset("zeroshot/twitter-financial-news-sentiment")

In [2]:
from transformers import AutoTokenizer
 
model_id = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
 

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)
 

if "label" in ds["train"].features.keys():
    split_dataset =  ds.rename_column("label", "labels") 
tokenized_dataset = split_dataset.map(tokenize, batched=True, remove_columns=["text"])
 
tokenized_dataset["train"]

Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 9543
})

In [3]:
from transformers import AutoModelForSequenceClassification
 
# Model id to load the tokenizer
model_id = "answerdotai/ModernBERT-base"
 
label2id = {
    "bearish": 0,
    "bullish": 1,
    "neutral": 2,
}

id2label = {v: k for k, v in label2id.items()}

# Download the model from huggingface.co/models
model = AutoModelForSequenceClassification.from_pretrained(
    model_id, num_labels=len(label2id), label2id=label2id, id2label=id2label,
)


Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score
 
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    f1 = f1_score(
            labels, predictions, labels=labels, pos_label=1, average="weighted"
        )
    precision = precision_score(
        labels, predictions, labels=labels, pos_label=1, average="weighted"
    )
    recall = recall_score(
        labels, predictions, labels=labels, pos_label=1, average="weighted"
    )

    return {"f1": float(f1) if f1 == 1 else f1, "precision": precision, "recall": recall}


In [5]:
from huggingface_hub import HfFolder
from transformers import Trainer, TrainingArguments
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

 
# Define training args
training_args = TrainingArguments(
    output_dir= "ModernBERT-tweet-classifier-classhead",
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    learning_rate=5e-5,
        num_train_epochs=5,
    bf16=True, # bfloat16 training 
    optim="adamw_torch_fused", # improved optimizer 
    # logging & evaluation strategies
    logging_strategy="steps",
    logging_steps=100,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    # push to hub parameters
    push_to_hub=True,
    hub_strategy="every_save",
    hub_token=HfFolder.get_token(),
    report_to="wandb",
    eval_on_start=True,
)
 
# Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    compute_metrics=compute_metrics,
)
trainer.train()

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlaz4rz[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
0,No log,1.064507,0.546752,0.635066,0.484772
1,0.419100,0.363271,0.89102,0.926499,0.861485
2,0.189500,0.348081,0.920915,0.921402,0.920526
3,0.059200,0.574013,0.908952,0.900123,0.918499
4,0.020200,0.744886,0.913626,0.912068,0.915428
5,0.006900,0.774458,0.915512,0.915282,0.915758


TrainOutput(global_step=1495, training_loss=0.15074130059484653, metrics={'train_runtime': 127.9271, 'train_samples_per_second': 372.986, 'train_steps_per_second': 11.686, 'total_flos': 2879473011343668.0, 'train_loss': 0.15074130059484653, 'epoch': 5.0})