In [1]:
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, pipeline

# 1. Load dataset
ds = load_dataset("zeroshot/twitter-financial-news-sentiment")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Use train split (you can change to 'test' if you want)
dataset = ds["train"]

# 2. Load FinBERT model + tokenizer
model_baseline = BertForSequenceClassification.from_pretrained(
    "ahmedrachid/FinancialBERT-Sentiment-Analysis",
    num_labels=3
)
tokenizer = BertTokenizer.from_pretrained(
    "ahmedrachid/FinancialBERT-Sentiment-Analysis"
)

nlp_baseline = pipeline(
    "sentiment-analysis",
    model=model_baseline,
    tokenizer=tokenizer
)

# 3. Take first 10 rows
sample = dataset.select(range(10))

texts = list(sample["text"])
true_labels = list(sample["label"])

# Optional: label mapping (check dataset card if unsure)
label_map = {
    0: "negative",
    1: "neutral",
    2: "positive"
}

# 4. Run inference
predictions = nlp(texts)

# 5. Print results
for i in range(len(texts)):
    print(f"Text: {texts[i]}")
    print(f"True label: {label_map[true_labels[i]]}")
    print(f"Predicted: {predictions[i]['label']} "
          f"(score={predictions[i]['score']:.4f})")
    print("-" * 80)

In [6]:
correct = 0
for pred, true in zip(predictions, true_labels):
    if pred["label"] == label_map[true]:
        correct += 1

print("Accuracy:", correct / len(true_labels))

Accuracy: 0.4


Fine Tune Hyperparamters of the model

In [None]:
import numpy as np
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
)
import evaluate

# --------- CUDA sanity check ----------
print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1) Load dataset
ds = load_dataset("zeroshot/twitter-financial-news-sentiment")
train_ds = ds["train"]
val_ds = ds["validation"]

# 2) Load model/tokenizer
model_name = "ahmedrachid/FinancialBERT-Sentiment-Analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)

id2label = {0: "Bearish", 1: "Bullish", 2: "Neutral"}
label2id = {v: k for k, v in id2label.items()}

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    id2label=id2label,
    label2id=label2id,
)

# Move model to GPU (Trainer will also handle this, but this is explicit and harmless)
model.to(device)

# 3) Tokenize
def tokenize_fn(batch):
    return tokenizer(batch["text"], truncation=True)

train_tok = train_ds.map(tokenize_fn, batched=True)
val_tok = val_ds.map(tokenize_fn, batched=True)

train_tok = train_tok.rename_column("label", "labels")
val_tok = val_tok.rename_column("label", "labels")

cols_to_keep = ["input_ids", "attention_mask", "labels"]
train_tok.set_format(type="torch", columns=cols_to_keep)
val_tok.set_format(type="torch", columns=cols_to_keep)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# 4) Metrics
acc = evaluate.load("accuracy")
f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": acc.compute(predictions=preds, references=labels)["accuracy"],
        "macro_f1": f1.compute(predictions=preds, references=labels, average="macro")["f1"],
    }

# 5) Training config
use_fp16 = torch.cuda.is_available()  # fp16 only makes sense on GPU

training_args = TrainingArguments(
    output_dir="./finbert_twitter_ft",
    evaluation_strategy="epoch",   # <-- use this name; some versions don't accept eval_strategy
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    weight_decay=0.01,
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
    fp16=use_fp16,                 # <-- enables mixed precision on NVIDIA GPU
    dataloader_num_workers=0,      # safer on Windows; avoids hanging
    report_to="none",              # avoids needing wandb, etc.
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tok,
    eval_dataset=val_tok,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()
trainer.evaluate()

trainer.save_model("./finbert_twitter_ft/best")
tokenizer.save_pretrained("./finbert_twitter_ft/best")


Torch: 2.9.1+cpu
CUDA available: False


KeyboardInterrupt: 

Testing Inference based on Best Model

In [None]:
save_dir = "./finbert_twitter_ft/best"

example_sentences = [
    "TSLA beats earnings expectations and raises full-year guidance.",
    "Apple shares fall after reporting weaker-than-expected iPhone sales.",
    "The company reported results largely in line with analyst expectations.",
    "Amazon warns of margin pressure due to rising logistics costs.",
    "NVIDIA stock surges as demand for AI chips remains strong.",
    "The firm announced a restructuring plan, sending shares lower.",
    "Revenue growth slowed quarter-over-quarter, but profitability improved.",
    "Investors remain cautious ahead of the Federal Reserve meeting.",
    "Strong cash flow and reduced debt boosted investor confidence.",
    "The outlook remains uncertain amid macroeconomic headwinds."
]

tokenizer = AutoTokenizer.from_pretrained(save_dir)
model = AutoModelForSequenceClassification.from_pretrained(save_dir)

# optional: move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

def predict_sentiment(text: str):
    inputs = tokenizer(text, return_tensors="pt", truncation=True).to(device)
    with torch.no_grad():
        out = model(**inputs)
    pred_id = out.logits.argmax(dim=-1).item()
    return model.config.id2label[pred_id]

for text in sentences:
    label = predict_sentiment(text)
    print(f"{label.upper():8} | {text}")

NEUTRAL  | TSLA beats earnings expectations and raises full-year guidance.
NEGATIVE | Apple shares fall after reporting weaker-than-expected iPhone sales.
POSITIVE | The company reported results largely in line with analyst expectations.
NEGATIVE | Amazon warns of margin pressure due to rising logistics costs.
NEUTRAL  | NVIDIA stock surges as demand for AI chips remains strong.
NEGATIVE | The firm announced a restructuring plan, sending shares lower.
POSITIVE | Revenue growth slowed quarter-over-quarter, but profitability improved.
NEGATIVE | Investors remain cautious ahead of the Federal Reserve meeting.
NEUTRAL  | Strong cash flow and reduced debt boosted investor confidence.
NEGATIVE | The outlook remains uncertain amid macroeconomic headwinds.
