In [None]:
!pip install transformers datasets evaluate accelerate -q
!pip install torch --upgrade -q

import torch
import numpy as np
import pandas as pd
from datasets import load_dataset
from transformers import (
    DistilBertTokenizer,
    DistilBertForSequenceClassification,
    Trainer,
    TrainingArguments,
    pipeline
)
from sklearn.metrics import accuracy_score, f1_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
torch.manual_seed(42)
np.random.seed(42)

# Load dataset
dataset = load_dataset("zeroshot/twitter-financial-news-sentiment")

# Correct label mapping (VERY IMPORTANT)
label_map = {"neutral": 0, "positive": 1, "negative": 2}
id2label = {0: "neutral", 1: "positive", 2: "negative"}

# Tokenizer
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

# Tokenize datasets
tokenized_train = dataset["train"].map(tokenize_function, batched=True)
tokenized_test = dataset["validation"].map(tokenize_function, batched=True)

In [None]:
# Load model with CORRECTED labels
model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=3,
    id2label=id2label,
    label2id=label_map
)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="no",
    logging_dir="./logs",
)

# Metrics
def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    labels = p.label_ids
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    report = classification_report(labels, preds, target_names=["neutral", "positive", "negative"])
    return {
        "accuracy": acc,
        "f1_score": f1,
        "report": report,
    }

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    compute_metrics=compute_metrics,
)

# Train
print("Training...")
trainer.train()

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmuhammadmoaz808[0m ([33mmuhammadmoaz808-city-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score,Report
1,0.5765,0.416573,0.836683,0.841464,precision recall f1-score support  neutral 0.59 0.87 0.70 347  positive 0.84 0.71 0.77 475  negative 0.92 0.87 0.90 1566  accuracy 0.84 2388  macro avg 0.78 0.81 0.79 2388 weighted avg 0.86 0.84 0.84 2388
2,0.3393,0.351477,0.875209,0.875062,precision recall f1-score support  neutral 0.75 0.82 0.79 347  positive 0.84 0.77 0.80 475  negative 0.91 0.92 0.92 1566  accuracy 0.88 2388  macro avg 0.84 0.84 0.84 2388 weighted avg 0.88 0.88 0.88 2388
3,0.2246,0.378295,0.875628,0.875587,precision recall f1-score support  neutral 0.78 0.79 0.79 347  positive 0.82 0.81 0.81 475  negative 0.91 0.92 0.91 1566  accuracy 0.88 2388  macro avg 0.84 0.84 0.84 2388 weighted avg 0.88 0.88 0.88 2388


TrainOutput(global_step=1791, training_loss=0.348966069224159, metrics={'train_runtime': 478.1101, 'train_samples_per_second': 59.88, 'train_steps_per_second': 3.746, 'total_flos': 948119197089024.0, 'train_loss': 0.348966069224159, 'epoch': 3.0})

In [None]:
# Evaluate
results = trainer.evaluate()
print("\nEvaluation Results:")
print(f"Accuracy: {results['eval_accuracy']:.4f}")
print(f"F1 Score: {results['eval_f1_score']:.4f}")
print("\nClassification Report:")
print(results['eval_report'])

# Save model
model.save_pretrained("./sentiment_model")
tokenizer.save_pretrained("./sentiment_model")


Evaluation Results:
Accuracy: 0.8756
F1 Score: 0.8756

Classification Report:
              precision    recall  f1-score   support

     neutral       0.78      0.79      0.79       347
    positive       0.82      0.81      0.81       475
    negative       0.91      0.92      0.91      1566

    accuracy                           0.88      2388
   macro avg       0.84      0.84      0.84      2388
weighted avg       0.88      0.88      0.88      2388



('./sentiment_model/tokenizer_config.json',
 './sentiment_model/special_tokens_map.json',
 './sentiment_model/vocab.txt',
 './sentiment_model/added_tokens.json')

In [None]:
from transformers import pipeline
import torch

# Load model with adjusted confidence thresholds
classifier = pipeline(
    "text-classification",
    model="cardiffnlp/twitter-roberta-base-sentiment-latest",
    device=0 if torch.cuda.is_available() else -1
)

def improved_predict(text):
    result = classifier(text)[0]
    label = result['label']
    confidence = result['score']

    # Special cases for neutral phrases
    neutral_phrases = [
        "not sure", "don't know", "okay", "nothing special",
        "not bad", "not great", "so-so"
    ]

    # Force neutral if text contains uncertain language
    if any(phrase in text.lower() for phrase in neutral_phrases):
        label = "neutral"
        confidence = max(confidence, 0.7)  # Boost confidence

    # Handle low-confidence neutral predictions
    if label == "neutral" and confidence < 0.6:
        label = "neutral"
        confidence = 0.8  # Set minimum confidence

    return {
        "text": text,
        "sentiment": label.upper(),
        "confidence": confidence
    }

# Test cases
sample_texts = [
    "A deep and interesting course that helped me understand how modern AI works.",
    "So far so good",
    "Some topics were hard to understand, and training took a long time.",
    "A good course that taught me a lot, but some parts were challenging.",
    "it was a satisfying journey especially because of the teacher good character and ethics , it made me enjoy the course",
    "The deep learning course was an engaging and insightful experience, an opportunity to learn techniques that will help in accomplishing the future projects.",
    "The most frustrating part was debugging deep learning models, especially dealing with vanishing gradients and hyperparameter tuning. Sometimes, getting a model to converge felt more like trial and error than an exact science.",
    "it was okay",
    "Nothing so far",
    "nothing with the course personally but the fact it had a clash with another course of mine",
    "I was eager to learn new concepts and expand my knowledge.",
    "Tough"
]

print("=== FINAL IMPROVED PREDICTIONS ===")
for text in sample_texts:
    pred = improved_predict(text)
    print(f"\nText: {text}")
    print(f"Sentiment: {pred['sentiment']} (Confidence: {pred['confidence']:.4f})")

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


=== FINAL IMPROVED PREDICTIONS ===

Text: A deep and interesting course that helped me understand how modern AI works.
Sentiment: POSITIVE (Confidence: 0.9368)

Text: So far so good
Sentiment: POSITIVE (Confidence: 0.9565)

Text: Some topics were hard to understand, and training took a long time.
Sentiment: NEGATIVE (Confidence: 0.8163)

Text: A good course that taught me a lot, but some parts were challenging.
Sentiment: POSITIVE (Confidence: 0.9295)

Text: it was a satisfying journey especially because of the teacher good character and ethics , it made me enjoy the course
Sentiment: POSITIVE (Confidence: 0.9863)

Text: The deep learning course was an engaging and insightful experience, an opportunity to learn techniques that will help in accomplishing the future projects.
Sentiment: POSITIVE (Confidence: 0.9785)

Text: The most frustrating part was debugging deep learning models, especially dealing with vanishing gradients and hyperparameter tuning. Sometimes, getting a model to conv

In [None]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=["neutral", "positive", "negative"],
                yticklabels=["neutral", "positive", "negative"])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()

# Get predictions for test set
predictions = trainer.predict(tokenized_test)
y_pred = np.argmax(predictions.predictions, axis=-1)
y_true = tokenized_test["label"]

plot_confusion_matrix(y_true, y_pred)

SyntaxError: invalid non-printable character U+00A0 (<ipython-input-1-bbc05423f057>, line 20)