In [1]:
from datasets import load_dataset

ds = load_dataset("zeroshot/twitter-financial-news-sentiment")

In [2]:
from transformers import AutoTokenizer
 
model_id = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
 

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)
 

if "label" in ds["train"].features.keys():
    split_dataset =  ds.rename_column("label", "labels") 
tokenized_dataset = split_dataset.map(tokenize, batched=True, remove_columns=["text"])
 
tokenized_dataset["train"]

Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 9543
})

In [48]:
from transformers import AutoModelForMaskedLM
 
model_id = "answerdotai/ModernBERT-Large-Instruct"
 
label2id = {
    "bearish": 0,
    "bullish": 1,
    "neutral": 2,
}

id2label = {v: k for k, v in label2id.items()}

model = AutoModelForMaskedLM.from_pretrained(model_id)


config.json:   0%|          | 0.00/1.41k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/792M [00:00<?, ?B/s]

In [49]:
input_ids = tokenizer("This is a bearish tweet [MASK]", return_tensors="pt").input_ids
for token in input_ids[0]:
    print(tokenizer.decode(token.item()))

[CLS]
This
 is
 a
 bear
ish
 tweet
[MASK]
[SEP]


In [50]:
tokenizer._convert_id_to_token(model(input_ids).logits[:, -2, :].argmax(-1))

Compiling the model with `torch.compile` and using a `torch.cpu` device is not supported. Falling back to non-compiled mode.


'[MASK]'

In [51]:
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score
 
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    f1 = f1_score(
            labels, predictions, labels=labels, pos_label=1, average="weighted"
        )
    precision = precision_score(
        labels, predictions, labels=labels, pos_label=1, average="weighted"
    )
    recall = recall_score(
        labels, predictions, labels=labels, pos_label=1, average="weighted"
    )

    return {"f1": float(f1) if f1 == 1 else f1, "precision": precision, "recall": recall}


In [52]:
from tqdm import tqdm
from transformers import pipeline

fill_mask = pipeline("fill-mask", model=model_id, tokenizer=model_id)

template = """You will be given a question and options. Select the right answer.
    QUESTION: What is the financial sentiment of the following tweet: "{tweet}"?
    CHOICES:
    - A: bullish
    - B: neutral
    - C: bearish
    ANSWER: [unusedo] [MASK]"""

model.to("cuda")

predictions = []
ground_truth = ds["validation"]["label"]
for example in tqdm(ds["validation"]):
    prediction = fill_mask(template.format(tweet=example["text"]))[0]["token_str"].strip()
    predictions.append(prediction)
    

tokenizer_config.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.58M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

Device set to use cuda:0
100%|██████████| 2388/2388 [01:55<00:00, 20.68it/s]


In [53]:
label2id = {
    "bearish": 0,
    "bullish": 1,
    "neutral": 2,
}

letter2id = {
    "A": 1,
    "B": 2,
    "C": 0,
}

answers = [letter2id.get(p, -1) for p in predictions]
answers = np.array(answers)

f1 = f1_score(ground_truth, answers, average="weighted")

print("f1 score for the fill-mask pipeline:", f1)

f1 score for the fill-mask pipeline: 0.5702823610636942


In [54]:
np.unique_counts(predictions)

UniqueCountsResult(values=array(['A', 'B', 'C', 'negative', 'neutral', 'positive'], dtype='<U8'), counts=array([  95, 1814,   35,  250,  187,    7]))