In [6]:
from datasets import load_dataset

ds = load_dataset("zeroshot/twitter-financial-news-sentiment")

In [7]:
from transformers import AutoTokenizer
 
model_id = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
 

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)
 

if "label" in ds["train"].features.keys():
    split_dataset =  ds.rename_column("label", "labels") 
tokenized_dataset = split_dataset.map(tokenize, batched=True, remove_columns=["text"])
 
tokenized_dataset["train"]

Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 9543
})

In [8]:
from transformers import AutoModelForSequenceClassification
 
# Model id to load the tokenizer
model_id = "answerdotai/ModernBERT-base"
 
label2id = {
    "bearish": 0,
    "bullish": 1,
    "neutral": 2,
}

id2label = {v: k for k, v in label2id.items()}

# Download the model from huggingface.co/models
model = AutoModelForSequenceClassification.from_pretrained(
    model_id, num_labels=len(label2id), label2id=label2id, id2label=id2label,
)


Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score
 
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    f1 = f1_score(
            labels, predictions, labels=labels, pos_label=1, average="weighted"
        )
    precision = precision_score(
        labels, predictions, labels=labels, pos_label=1, average="weighted"
    )
    recall = recall_score(
        labels, predictions, labels=labels, pos_label=1, average="weighted"
    )

    return {"f1": float(f1) if f1 == 1 else f1, "precision": precision, "recall": recall}


In [10]:
from huggingface_hub import HfFolder
from transformers import Trainer, TrainingArguments
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

 
# Define training args
training_args = TrainingArguments(
    output_dir= "ModernBERT-tweet-classifier-classhead",
    per_device_train_batch_size=32,
    per_device_eval_batch_size=16,
    learning_rate=5e-5,
        num_train_epochs=5,
    bf16=True, # bfloat16 training 
    optim="adamw_torch_fused", # improved optimizer 
    # logging & evaluation strategies
    logging_strategy="steps",
    logging_steps=100,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    load_best_model_at_end=True,
    use_mps_device=True,
    metric_for_best_model="f1",
    # push to hub parameters
    push_to_hub=True,
    hub_strategy="every_save",
    hub_token=HfFolder.get_token(),
    report_to="wandb",
    eval_on_start=True,
)
 
# Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    compute_metrics=compute_metrics,
)
trainer.train()



Epoch,Training Loss,Validation Loss,F1,Precision,Recall
0,No log,1.201814,0.188182,0.494794,0.160465
1,0.462800,0.311006,0.907214,0.921499,0.89423
2,0.215200,0.315851,0.920609,0.906906,0.935709
3,0.070700,0.417724,0.922647,0.912639,0.933362
4,0.034900,0.56136,0.916846,0.916824,0.916868
5,0.006000,0.619553,0.917486,0.920047,0.915038


'(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/1d/65/1d6533ac13c36c96174f62b730498542d905c3c289a1b108d3204a5f2e90c053/b0e489542055a4f19f310cccbfa5ee0ed4bc5ccebcaf1e6d59ebe855723c7425?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20250210%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250210T140709Z&X-Amz-Expires=86400&X-Amz-Signature=30dd45ae999d3a7d9394244782a0277773c53dc3cb8625eec7adaf3048652220&X-Amz-SignedHeaders=host&partNumber=16&uploadId=2kjDwAuegeN76rOtnLZhzS1cN1Zx985RLoSkvbdhraWM7nWGyqJmhjrtYqqR1Txw8_OxEfC6Ae1XjA8lfLgaxnIw90u4RHyYjvnl7wZ1xYRopjwzAqVIPzZ0Oxb.0DUS&x-id=UploadPart (Caused by SSLError(SSLError(5, '[SYS] unknown error (_ssl.c:2427)')))"), '(Request ID: fd25a737-fed6-4502-900d-d99feb63e0d0)')' thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/1d/65/1d6533ac13c36c96174f6

TrainOutput(global_step=1495, training_loss=0.16563701725325058, metrics={'train_runtime': 1125.8532, 'train_samples_per_second': 42.381, 'train_steps_per_second': 1.328, 'total_flos': 2879473011343668.0, 'train_loss': 0.16563701725325058, 'epoch': 5.0})