In [1]:
! pip install optuna

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.6.0


In [16]:
#!/usr/bin/env python3
"""
Fine-tune FinBERT model for sentiment analysis using raw_dataset.csv
Uses HuggingFace Transformers with robust hyperparameter optimization
"""
import os
import json
import re
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
try:
    from transformers import EarlyStoppingCallback
except ImportError:
    from transformers.trainer_callback import EarlyStoppingCallback
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
import optuna
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')


class SentimentDataset(Dataset):
    """Dataset class for sentiment analysis"""

    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }


def clean_text(text):
    """Clean text by removing URLs, special characters, extra spaces, etc."""
    if pd.isna(text):
        return ""
    text = str(text)
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = re.sub(r'\S+@\S+', '', text)
    text = re.sub(r'[^a-zA-Z0-9\s.,!?]', '', text)
    text = re.sub(r'\s+', ' ', text)
    text = text.strip()
    return text


def compute_metrics(eval_pred):
    """Compute metrics for evaluation"""
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average='weighted')

    return {
        'accuracy': accuracy,
        'f1': f1
    }


def load_and_prepare_data(csv_path):
    """Load and prepare data from CSV"""
    print(f"Loading data from {csv_path}...")
    df = pd.read_csv(csv_path)

    # Drop null values
    df = df.dropna(subset=['description', 'sentiment'])

    # Map sentiment labels
    sent_map = {'neutral': 0, 'positive': 1, 'negative': 2}
    df['label'] = df['sentiment'].map(sent_map)
    df = df.dropna(subset=['label'])

    # Clean text
    print("Cleaning text...")
    df['cleaned_description'] = df['description'].apply(clean_text)
    df = df[df['cleaned_description'].str.len() > 0]

    # Convert to lowercase
    df['cleaned_description'] = df['cleaned_description'].str.lower()

    texts = df['cleaned_description'].tolist()
    labels = df['label'].astype(int).tolist()

    print(f"Loaded {len(texts)} samples")
    print(f"Label distribution:")
    print(pd.Series(labels).value_counts().sort_index())

    return texts, labels


def objective(trial, texts, labels, tokenizer, model_name, output_dir_base):
    """Optuna objective function for hyperparameter optimization"""

    # Suggest hyperparameters
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 5e-4, log=True)
    batch_size = trial.suggest_categorical('batch_size', [8, 16, 32])
    weight_decay = trial.suggest_float('weight_decay', 0.0, 0.3)
    warmup_steps = trial.suggest_int('warmup_steps', 100, 1000)
    max_length = trial.suggest_categorical('max_length', [128, 256, 512])

    # Split data
    X_train, X_val, y_train, y_val = train_test_split(
        texts, labels, test_size=0.2, random_state=42, stratify=labels
    )

    # Create datasets
    train_dataset = SentimentDataset(X_train, y_train, tokenizer, max_length)
    val_dataset = SentimentDataset(X_val, y_val, tokenizer, max_length)

    # Load model
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=3
    )

    # Training arguments
    output_dir = os.path.join(output_dir_base, f"trial_{trial.number}")
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=5,  # Shorter for hyperparameter search
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        warmup_steps=warmup_steps,
        logging_dir=os.path.join(output_dir, 'logs'),
        logging_steps=50,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        greater_is_better=True,
        save_total_limit=2,
        fp16=torch.cuda.is_available(),
        report_to="none"
    )

    # Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
    )

    # Train
    trainer.train()

    # Evaluate
    eval_results = trainer.evaluate()

    # Clean up
    import shutil
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)

    return eval_results['eval_accuracy']


def main():
    """Main training function"""
    print("="*60)
    print("FinBERT Fine-tuning for Sentiment Analysis")
    print("="*60)

    # Configuration
    csv_path = "/content/raw_dataset.csv"
    model_name = "yiyanghkust/finbert-tone"  # FinBERT specifically for sentiment analysis
    output_dir_base = "finbert_models"
    os.makedirs(output_dir_base, exist_ok=True)

    # Load data
    texts, labels = load_and_prepare_data(csv_path)

    # Load tokenizer
    print(f"\nLoading tokenizer: {model_name}")
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Hyperparameter optimization
    print("\n" + "="*60)
    print("Starting Hyperparameter Optimization with Optuna")
    print("="*60)
    print("This will test different hyperparameter combinations...\n")

    study = optuna.create_study(
        direction='maximize',
        study_name='finbert_optimization'
    )

    study.optimize(
        lambda trial: objective(trial, texts, labels, tokenizer, model_name, output_dir_base),
        n_trials=10,  # Number of hyperparameter search trials
        show_progress_bar=True
    )

    print("\n" + "="*60)
    print("Hyperparameter Optimization Complete!")
    print("="*60)
    print(f"\nBest trial:")
    best_trial = study.best_trial
    print(f"  Accuracy: {best_trial.value:.4f}")
    print(f"\n  Best hyperparameters:")
    for key, value in best_trial.params.items():
        print(f"    {key}: {value}")

    # Use best hyperparameters for final training
    best_params = best_trial.params

    # Split data for final training
    X_train, X_test, y_train, y_test = train_test_split(
        texts, labels, test_size=0.2, random_state=42, stratify=labels
    )

    X_train, X_val, y_train, y_val = train_test_split(
        X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
    )

    print(f"\nTrain samples: {len(X_train)}")
    print(f"Validation samples: {len(X_val)}")
    print(f"Test samples: {len(X_test)}")

    # Create datasets
    train_dataset = SentimentDataset(X_train, y_train, tokenizer, best_params['max_length'])
    val_dataset = SentimentDataset(X_val, y_val, tokenizer, best_params['max_length'])
    test_dataset = SentimentDataset(X_test, y_test, tokenizer, best_params['max_length'])

    # Load model
    print(f"\nLoading model: {model_name}")
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=3
    )

    # Final training arguments with best hyperparameters
    final_output_dir = os.path.join(output_dir_base, "final_model")
    training_args = TrainingArguments(
        output_dir=final_output_dir,
        num_train_epochs=50,  # Full 50 epochs as requested
        per_device_train_batch_size=best_params['batch_size'],
        per_device_eval_batch_size=best_params['batch_size'],
        learning_rate=best_params['learning_rate'],
        weight_decay=best_params['weight_decay'],
        warmup_steps=best_params['warmup_steps'],
        logging_dir=os.path.join(final_output_dir, 'logs'),
        logging_steps=100,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        greater_is_better=True,
        save_total_limit=3,
        fp16=torch.cuda.is_available(),
        report_to="none",
        seed=42
    )

    # Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
    )

    # Train
    print("\n" + "="*60)
    print("Starting Final Training (50 epochs)")
    print("="*60)
    print(f"Using best hyperparameters from optimization")
    print(f"Training will stop early if validation accuracy doesn't improve for 5 epochs\n")

    trainer.train()

    # Evaluate on test set
    print("\n" + "="*60)
    print("Evaluating on Test Set")
    print("="*60)

    test_results = trainer.evaluate(test_dataset)
    print(f"\nTest Accuracy: {test_results['eval_accuracy']:.4f}")
    print(f"Test F1 Score: {test_results['eval_f1']:.4f}")

    # Get predictions for detailed metrics
    predictions = trainer.predict(test_dataset)
    y_pred = np.argmax(predictions.predictions, axis=1)
    y_true = y_test

    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, target_names=['positive', 'negative', 'neutral']))

    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

    # Save model
    print(f"\nSaving model to {final_output_dir}...")
    trainer.save_model()
    tokenizer.save_pretrained(final_output_dir)

    # Save metadata
    metadata = {
        "model_name": model_name,
        "model_type": "FinBERT",
        "task": "sentiment_classification",
        "num_labels": 3,
        "label_mapping": {"0": "positive", "1": "negative", "2": "neutral"},
        "best_hyperparameters": {k: float(v) if isinstance(v, (int, float)) else v
                                 for k, v in best_params.items()},
        "test_accuracy": float(test_results['eval_accuracy']),
        "test_f1": float(test_results['eval_f1']),
        "training_date": datetime.now().isoformat(),
        "num_epochs": 50,
        "train_samples": len(X_train),
        "val_samples": len(X_val),
        "test_samples": len(X_test),
        "max_length": best_params['max_length']
    }

    metadata_path = os.path.join(final_output_dir, "model_metadata.json")
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=2)
    print(f"Metadata saved to {metadata_path}")

    print("\n" + "="*60)
    print("Training Complete!")
    print("="*60)
    print(f"\nModel saved to: {final_output_dir}")
    print(f"Test Accuracy: {test_results['eval_accuracy']:.4f}")


if __name__ == "__main__":
    main()



FinBERT Fine-tuning for Sentiment Analysis
Loading data from /content/raw_dataset.csv...
Cleaning text...
Loaded 13386 samples
Label distribution:
0    3631
1    8296
2    1459
Name: count, dtype: int64

Loading tokenizer: yiyanghkust/finbert-tone


[I 2025-11-17 06:17:44,558] A new study created in memory with name: finbert_optimization



Starting Hyperparameter Optimization with Optuna
This will test different hyperparameter combinations...



  0%|          | 0/10 [00:00<?, ?it/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2251,0.20821,0.958178,0.957695
2,0.1121,0.165859,0.96826,0.968253
3,0.0477,0.11853,0.975355,0.97548
4,0.027,0.125192,0.978715,0.978756
5,0.0047,0.12436,0.979836,0.979824


[I 2025-11-17 06:23:03,089] Trial 0 finished with value: 0.9798356982823002 and parameters: {'learning_rate': 9.631949600148405e-05, 'batch_size': 16, 'weight_decay': 0.21205592102337273, 'warmup_steps': 444, 'max_length': 512}. Best is trial 0 with value: 0.9798356982823002.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.1677,0.203537,0.964152,0.963983
2,0.1083,0.120918,0.974981,0.974904
3,0.0029,0.142995,0.980956,0.980984
4,0.0004,0.152077,0.980956,0.980922
5,0.0003,0.156183,0.98245,0.982449


[I 2025-11-17 06:29:58,199] Trial 1 finished with value: 0.9824495892457058 and parameters: {'learning_rate': 4.182259279695663e-05, 'batch_size': 8, 'weight_decay': 0.23018756357034373, 'warmup_steps': 271, 'max_length': 512}. Best is trial 1 with value: 0.9824495892457058.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.166,0.154617,0.970127,0.969908
2,0.096,0.122041,0.973488,0.97352
3,0.0368,0.123865,0.978342,0.978333
4,0.024,0.132844,0.981703,0.981696
5,0.0087,0.149374,0.981703,0.981709


[I 2025-11-17 06:33:29,094] Trial 2 finished with value: 0.9817027632561613 and parameters: {'learning_rate': 4.00331512967185e-05, 'batch_size': 16, 'weight_decay': 0.28786750000831074, 'warmup_steps': 618, 'max_length': 128}. Best is trial 1 with value: 0.9824495892457058.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.7121,0.644998,0.799851,0.752789
2,0.999,0.896178,0.619866,0.474401
3,0.9637,0.898933,0.619866,0.474401


[I 2025-11-17 06:35:37,771] Trial 3 finished with value: 0.7998506348020911 and parameters: {'learning_rate': 0.00027593739032178664, 'batch_size': 16, 'weight_decay': 0.21495260500016614, 'warmup_steps': 225, 'max_length': 128}. Best is trial 1 with value: 0.9824495892457058.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.1007,0.118791,0.971994,0.97198
2,0.0796,0.133213,0.976848,0.976777
3,0.0257,0.154547,0.976475,0.976447
4,0.0116,0.149026,0.980583,0.980534
5,0.0034,0.150013,0.979836,0.979806


[I 2025-11-17 06:39:17,636] Trial 4 finished with value: 0.9805825242718447 and parameters: {'learning_rate': 3.928879345220552e-05, 'batch_size': 16, 'weight_decay': 0.03354173427908128, 'warmup_steps': 161, 'max_length': 256}. Best is trial 1 with value: 0.9824495892457058.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.132,0.118168,0.972367,0.972418
2,0.0674,0.095195,0.976848,0.976877
3,0.0331,0.13156,0.977969,0.977926
4,0.0155,0.10915,0.981703,0.981707
5,0.0076,0.111598,0.983196,0.983185


[I 2025-11-17 06:41:29,406] Trial 5 finished with value: 0.9831964152352501 and parameters: {'learning_rate': 2.7109460186975377e-05, 'batch_size': 32, 'weight_decay': 0.21613644651013658, 'warmup_steps': 138, 'max_length': 128}. Best is trial 5 with value: 0.9831964152352501.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2413,0.214355,0.954444,0.954341
2,0.1422,0.181207,0.955564,0.955909
3,0.0836,0.129748,0.972741,0.9726
4,0.0242,0.151311,0.974608,0.974521
5,0.0334,0.149076,0.978715,0.978686


[I 2025-11-17 06:44:59,816] Trial 6 finished with value: 0.9787154592979835 and parameters: {'learning_rate': 9.965583027391777e-05, 'batch_size': 16, 'weight_decay': 0.18509701586154342, 'warmup_steps': 627, 'max_length': 128}. Best is trial 5 with value: 0.9831964152352501.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.3399,0.167964,0.945482,0.945301
2,0.2133,0.28763,0.928305,0.930059
3,0.1501,0.139114,0.960045,0.960672
4,0.0683,0.145191,0.96938,0.969526
5,0.0335,0.147188,0.9705,0.97049


[I 2025-11-17 06:49:50,847] Trial 7 finished with value: 0.9705003734129948 and parameters: {'learning_rate': 0.0002186062828343901, 'batch_size': 32, 'weight_decay': 0.13669197362893865, 'warmup_steps': 385, 'max_length': 512}. Best is trial 5 with value: 0.9831964152352501.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.0733,0.096013,0.976102,0.976211
2,0.0768,0.088918,0.981329,0.98131
3,0.0229,0.109701,0.978715,0.978735
4,0.0095,0.096461,0.98245,0.982449
5,0.0125,0.092895,0.98357,0.98356


[I 2025-11-17 06:55:08,943] Trial 8 finished with value: 0.9835698282300224 and parameters: {'learning_rate': 1.0308139592563334e-05, 'batch_size': 16, 'weight_decay': 0.2237015172724854, 'warmup_steps': 417, 'max_length': 512}. Best is trial 8 with value: 0.9835698282300224.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.0419,0.893594,0.619866,0.474401
2,0.9033,0.895228,0.619866,0.474401
3,0.9999,0.906497,0.619866,0.474401


[I 2025-11-17 06:59:21,112] Trial 9 finished with value: 0.619865571321882 and parameters: {'learning_rate': 0.00025851200048274424, 'batch_size': 8, 'weight_decay': 0.23086622604054047, 'warmup_steps': 114, 'max_length': 512}. Best is trial 8 with value: 0.9835698282300224.

Hyperparameter Optimization Complete!

Best trial:
  Accuracy: 0.9836

  Best hyperparameters:
    learning_rate: 1.0308139592563334e-05
    batch_size: 16
    weight_decay: 0.2237015172724854
    warmup_steps: 417
    max_length: 512

Train samples: 8566
Validation samples: 2142
Test samples: 2678

Loading model: yiyanghkust/finbert-tone

Starting Final Training (50 epochs)
Using best hyperparameters from optimization
Training will stop early if validation accuracy doesn't improve for 5 epochs



Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.1316,0.192762,0.964986,0.964983
2,0.0694,0.151387,0.974323,0.974343
3,0.0496,0.150927,0.97619,0.976203
4,0.0298,0.185857,0.972456,0.972517
5,0.026,0.191361,0.978525,0.978512
6,0.0019,0.234066,0.972923,0.973044
7,0.0077,0.223401,0.972923,0.972932
8,0.0078,0.25235,0.974323,0.974414
9,0.007,0.22887,0.97479,0.974787
10,0.002,0.236678,0.974323,0.974338



Evaluating on Test Set



Test Accuracy: 0.9810
Test F1 Score: 0.9809

Classification Report:
              precision    recall  f1-score   support

    positive       0.97      0.96      0.97       726
    negative       0.99      0.99      0.99      1660
     neutral       0.97      0.96      0.96       292

    accuracy                           0.98      2678
   macro avg       0.98      0.97      0.97      2678
weighted avg       0.98      0.98      0.98      2678


Confusion Matrix:
[[ 699   18    9]
 [  11 1648    1]
 [   9    3  280]]

Saving model to finbert_models/final_model...
Metadata saved to finbert_models/final_model/model_metadata.json

Training Complete!

Model saved to: finbert_models/final_model
Test Accuracy: 0.9810


In [4]:
# from google.colab import files
# import os

# zip_filename = '/content/finbert_models/final_model.zip'

# if os.path.exists(zip_filename):
#     files.download(zip_filename)
#     print(f"Downloading {zip_filename}...")
# else:
#     print(f"Error: The file {zip_filename} was not found.")
#     print("Please ensure the folder was successfully zipped in the previous step.")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading /content/finbert_models/final_model.zip...


In [24]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Load the fine-tuned model and tokenizer
model_path = "finbert_models/final_model"  # Path to your saved model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Option 1: Using pipeline (easiest for quick inference)
sentiment_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)

# Example texts
new_texts = [
    "AAPL slipped briefly in early trading, but strong demand for its latest devices helped the stock recover and close the day with modest gains."
]

# Run inference
results = sentiment_pipeline(new_texts)
print(results)


Device set to use cuda:0


[{'label': 'Positive', 'score': 0.971278727054596}]
