In [1]:
# 04b_model_finetuning_small_data.ipynb
# Purpose: Small-data optimized fine-tuning of 4 models on multi-bank manually labeled data
# Banks: JP Morgan (JPM) and HSBC
# Models: FinBERT (yiyanghkust), FinBERT (ProsusAI), DistilRoBERTa, CardiffNLP (Twitter-RoBERTa)
# Input: Manual validation data from both banks (small datasets)
# Output: Fine-tuned models optimized for small-data financial sentiment analysis

print("="*70)
print("SMALL-DATA OPTIMIZED FINE-TUNING PIPELINE")
print("Multi-Bank Financial Sentiment Analysis")
print("="*70)

## 1. INITIAL SETUP AND IMPORTS

# Disable wandb logging to prevent API key prompts
import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "disabled"

# Core imports
import pandas as pd
import numpy as np
import json
import torch
import torch.nn as nn
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import warnings
warnings.filterwarnings('ignore')

# Enhanced transformers and ML libraries
import transformers
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    TrainingArguments, Trainer, DataCollatorWithPadding,
    EarlyStoppingCallback, get_linear_schedule_with_warmup
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
import gc
import random

# Data augmentation imports
from collections import Counter
import re

print(f"Small-data optimized libraries loaded successfully")
print(f"Transformers version: {transformers.__version__}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")



SMALL-DATA OPTIMIZED FINE-TUNING PIPELINE
Multi-Bank Financial Sentiment Analysis
Small-data optimized libraries loaded successfully
Transformers version: 4.56.1
PyTorch version: 2.8.0+cu126
CUDA available: True


In [2]:
## 2. MOUNT DRIVE AND LOAD CONFIGURATION

from google.colab import drive
drive.mount("/content/drive")

# Load enhanced configuration
config_path = Path("/content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/configs/enhanced_config.json")
try:
    with open(config_path, "r") as f:
        enhanced_config = json.load(f)

    SEED = enhanced_config["SEED"]
    BANKS = enhanced_config["BANKS"]
    QUARTERS = enhanced_config["QUARTERS"]
    MODELS = enhanced_config["MODELS"]
    drive_base = Path(enhanced_config["drive_base"])
    colab_base = Path(enhanced_config["colab_base"])

    print(f"Enhanced configuration loaded for banks: {', '.join([bank.upper() for bank in BANKS])}")
except Exception as e:
    print(f"Configuration loading failed: {e}")
    # Fallback configuration
    SEED = 42
    BANKS = ["jpm", "hsbc"]
    MODELS = {
        "finbert_yiyanghkust": "yiyanghkust/finbert-tone",
        "finbert_prosusai": "ProsusAI/finbert",
        "distilroberta": "j-hartmann/emotion-english-distilroberta-base",
        "cardiffnlp_roberta": "cardiffnlp/twitter-roberta-base-sentiment-latest"
    }
    drive_base = Path("/content/drive/MyDrive/CAM_DS_AI_Project_Enhanced")
    print("Using fallback configuration")

# Set seeds for reproducibility
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)



Mounted at /content/drive
Enhanced configuration loaded for banks: JPM, HSBC


In [3]:
## 3. DEFINE ENHANCED PATHS

finetuning_paths = {}
for bank in BANKS:
    finetuning_paths[bank] = {
        "manual_validation": drive_base / f"data/manual_validation/{bank}",
        "results_finetuning": drive_base / f"results/finetuning/{bank}",
        "models_finetuned": drive_base / f"models/finetuned/{bank}",
        "results_comparison": drive_base / f"results/comparison/{bank}",
        "results_sentiment": drive_base / f"results/sentiment/{bank}"
    }

    # Ensure directories exist
    for path in finetuning_paths[bank].values():
        path.mkdir(parents=True, exist_ok=True)

print(f"Enhanced paths configured for {len(BANKS)} banks and {len(MODELS)} models")



Enhanced paths configured for 2 banks and 4 models


In [4]:
## 4. SMALL DATA AUGMENTATION UTILITIES

class TextAugmenter:
    """Text augmentation techniques for small datasets."""

    def __init__(self, seed=42):
        self.seed = seed
        np.random.seed(seed)
        random.seed(seed)

        # Financial domain synonyms
        self.financial_synonyms = {
            'profit': ['earnings', 'revenue', 'income', 'gains'],
            'loss': ['deficit', 'shortfall', 'decline', 'decrease'],
            'growth': ['expansion', 'increase', 'rise', 'improvement'],
            'strong': ['robust', 'solid', 'healthy', 'positive'],
            'weak': ['poor', 'disappointing', 'concerning', 'negative'],
            'exceed': ['surpass', 'outperform', 'beat', 'top'],
            'below': ['under', 'beneath', 'short of', 'less than']
        }

        # Negation patterns
        self.negation_words = ['not', 'no', 'never', 'none', 'nothing', 'neither', 'nor']

    def synonym_replacement(self, text: str, n: int = 1) -> str:
        """Replace words with financial domain synonyms."""
        words = text.split()
        new_words = words.copy()

        replaced = 0
        for i, word in enumerate(words):
            word_lower = word.lower().strip('.,!?;')
            if word_lower in self.financial_synonyms and replaced < n:
                synonyms = self.financial_synonyms[word_lower]
                new_word = random.choice(synonyms)
                # Preserve original case
                if word[0].isupper():
                    new_word = new_word.capitalize()
                new_words[i] = new_word
                replaced += 1

        return ' '.join(new_words)

    def random_insertion(self, text: str, n: int = 1) -> str:
        """Insert financial context words."""
        words = text.split()
        context_words = ['financial', 'quarterly', 'annual', 'market', 'business']

        for _ in range(n):
            if len(words) > 1:
                new_word = random.choice(context_words)
                random_idx = random.randint(0, len(words))
                words.insert(random_idx, new_word)

        return ' '.join(words)

    def random_swap(self, text: str, n: int = 1) -> str:
        """Randomly swap two words."""
        words = text.split()

        for _ in range(n):
            if len(words) >= 2:
                idx1, idx2 = random.sample(range(len(words)), 2)
                words[idx1], words[idx2] = words[idx2], words[idx1]

        return ' '.join(words)

    def augment_text(self, text: str, num_augmentations: int = 1) -> List[str]:
        """Generate multiple augmented versions of text."""
        augmentations = []

        # Original text
        augmentations.append(text)

        # Generate augmentations
        techniques = [
            self.synonym_replacement,
            self.random_insertion,
            self.random_swap
        ]

        for _ in range(num_augmentations):
            technique = random.choice(techniques)
            try:
                augmented = technique(text)
                if augmented != text and len(augmented.strip()) > 0:
                    augmentations.append(augmented)
                else:
                    augmentations.append(text)  # Fallback to original
            except:
                augmentations.append(text)  # Fallback on error

        return augmentations[:num_augmentations + 1]  # Include original

def balance_small_dataset(df: pd.DataFrame, text_col: str = 'text',
                         label_col: str = 'human_label',
                         min_samples_per_class: int = 10) -> pd.DataFrame:
    """Balance small dataset through targeted augmentation."""

    print(f"Balancing small dataset...")
    label_counts = df[label_col].value_counts()
    print(f"Original distribution: {label_counts.to_dict()}")

    augmenter = TextAugmenter(seed=SEED)
    balanced_data = []

    # Add all original data
    for _, row in df.iterrows():
        row_dict = row.to_dict()
        row_dict['augmented'] = False  # Mark as original
        balanced_data.append(row_dict)

    # Augment minority classes
    max_samples = int(label_counts.max())  # Convert to int to avoid numpy array issues
    target_samples = max(min_samples_per_class, max_samples)

    for label in label_counts.index:
        current_count = int(label_counts[label])  # Convert to int
        needed_samples = target_samples - current_count

        if needed_samples > 0:
            label_data = df[df[label_col] == label]
            print(f"Augmenting {label}: need {needed_samples} more samples")

            # Generate augmentations
            augmentations_per_sample = max(1, needed_samples // len(label_data) + 1)

            for _, row in label_data.iterrows():
                if needed_samples <= 0:
                    break

                augmented_texts = augmenter.augment_text(
                    str(row[text_col]),
                    num_augmentations=augmentations_per_sample
                )[1:]  # Skip original

                for aug_text in augmented_texts:
                    if needed_samples <= 0:
                        break

                    aug_row = row.to_dict()
                    aug_row[text_col] = aug_text
                    aug_row['augmented'] = True
                    balanced_data.append(aug_row)
                    needed_samples -= 1

    # Create balanced dataframe
    balanced_df = pd.DataFrame(balanced_data)

    # Ensure augmented column exists
    if 'augmented' not in balanced_df.columns:
        balanced_df['augmented'] = False

    final_counts = balanced_df[label_col].value_counts()
    print(f"Balanced distribution: {final_counts.to_dict()}")
    print(f"Total samples: {len(balanced_df)} (original: {len(df)})")

    return balanced_df


In [5]:
## 5. SMALL DATA OPTIMIZED DATASET CLASS

class SmallDataSentimentDataset(Dataset):
    """Dataset optimized for small data fine-tuning."""

    def __init__(self, texts: List[str], labels: List[str], tokenizer, max_length: int = 128,
                 bank_codes: List[str] = None, use_longer_sequences: bool = True):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        # Use longer sequences for small data to capture more context
        self.max_length = min(max_length * 2, 256) if use_longer_sequences else max_length
        self.bank_codes = bank_codes or ['unknown'] * len(texts)

        # Enhanced label mapping with validation
        unique_labels = sorted(list(set(labels)))
        self.label_to_id = {label: i for i, label in enumerate(unique_labels)}
        self.id_to_label = {i: label for label, i in self.label_to_id.items()}

        print(f"Small-data optimized dataset created:")
        print(f"  Samples: {len(texts)}")
        print(f"  Banks: {len(set(self.bank_codes))}")
        print(f"  Max length: {self.max_length}")
        print(f"  Label mapping: {self.label_to_id}")

        # Validate label distribution
        label_dist = pd.Series(labels).value_counts()
        print(f"  Label distribution: {label_dist.to_dict()}")

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        # Enhanced tokenization with longer sequences for small data
        try:
            encoding = self.tokenizer(
                text,
                truncation=True,
                padding='max_length',
                max_length=self.max_length,
                return_tensors='pt',
                add_special_tokens=True
            )
        except Exception as e:
            print(f"Tokenization error for sample {idx}: {e}")
            # Fallback to empty text
            encoding = self.tokenizer(
                "",
                truncation=True,
                padding='max_length',
                max_length=self.max_length,
                return_tensors='pt',
                add_special_tokens=True
            )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(self.label_to_id[label], dtype=torch.long)
        }


In [6]:
## 6. ENHANCED DATA LOADING AND PREPARATION

def load_and_augment_training_data():
    """Load and augment training data for small dataset optimization."""
    print("\n" + "="*60)
    print("LOADING AND AUGMENTING SMALL TRAINING DATA")
    print("="*60)

    all_train_data = []
    all_val_data = []
    bank_statistics = {}

    for bank in BANKS:
        print(f"\n📂 Loading {bank.upper()} training data...")

        # Load training data
        train_path = finetuning_paths[bank]["manual_validation"] / f"train_manual_labels_{bank}.csv"
        val_path = finetuning_paths[bank]["manual_validation"] / f"val_manual_labels_{bank}.csv"

        bank_stats = {"train_samples": 0, "val_samples": 0, "label_distribution": {}}

        if train_path.exists():
            try:
                train_df = pd.read_csv(train_path)

                # Filter valid records
                valid_mask = (
                    train_df['human_label'].notna() &
                    (train_df['human_label'] != '') &
                    train_df['text'].notna() &
                    (train_df['text'] != '')
                )
                train_df = train_df[valid_mask]

                if len(train_df) > 0:
                    # Balance small dataset with augmentation
                    print(f"  Original training samples: {len(train_df)}")
                    train_df = balance_small_dataset(train_df, min_samples_per_class=8)

                    # Add bank code for tracking
                    train_df['bank_code'] = bank
                    all_train_data.append(train_df)
                    bank_stats["train_samples"] = len(train_df)
                    bank_stats["label_distribution"] = train_df['human_label'].value_counts().to_dict()

                    print(f"  ✅ Augmented training: {len(train_df)} samples")
                else:
                    print(f"  ⚠️ No valid training samples after filtering")

            except Exception as e:
                print(f"  ❌ Error loading training data: {e}")
        else:
            print(f"  ⚠️ Training file not found: {train_path}")

        # Load validation data (no augmentation for validation)
        if val_path.exists():
            try:
                val_df = pd.read_csv(val_path)

                # Filter valid records
                valid_mask = (
                    val_df['human_label'].notna() &
                    (val_df['human_label'] != '') &
                    val_df['text'].notna() &
                    (val_df['text'] != '')
                )
                val_df = val_df[valid_mask]

                if len(val_df) > 0:
                    # Add bank code for tracking
                    val_df['bank_code'] = bank
                    val_df['augmented'] = False  # Mark as original
                    all_val_data.append(val_df)
                    bank_stats["val_samples"] = len(val_df)

                    print(f"  ✅ Validation: {len(val_df)} samples")
                else:
                    print(f"  ⚠️ No valid validation samples after filtering")

            except Exception as e:
                print(f"  ❌ Error loading validation data: {e}")
        else:
            print(f"  ⚠️ Validation file not found: {val_path}")

        bank_statistics[bank] = bank_stats

    # Combine all data
    combined_train_df = pd.concat(all_train_data, ignore_index=True) if all_train_data else pd.DataFrame()
    combined_val_df = pd.concat(all_val_data, ignore_index=True) if all_val_data else pd.DataFrame()

    print(f"\n📊 Small-data augmented summary:")
    print(f"  Total training samples: {len(combined_train_df)}")
    print(f"  Total validation samples: {len(combined_val_df)}")

    if len(combined_train_df) > 0:
        print(f"  Combined label distribution:")
        combined_label_dist = combined_train_df['human_label'].value_counts()
        for label, count in combined_label_dist.items():
            pct = (count / len(combined_train_df)) * 100
            print(f"    {label}: {count} ({pct:.1f}%)")

        # Augmentation statistics
        aug_stats = combined_train_df['augmented'].value_counts()
        print(f"  Augmentation breakdown:")
        print(f"    Original: {aug_stats.get(False, 0)}")
        print(f"    Augmented: {aug_stats.get(True, 0)}")

    return combined_train_df, combined_val_df, bank_statistics

def prepare_small_data_training(train_df: pd.DataFrame, val_df: pd.DataFrame) -> Tuple[Dict, Dict]:
    """Enhanced preparation for small dataset training."""
    print("\n" + "="*50)
    print("SMALL DATA TRAINING PREPARATION")
    print("="*50)

    if len(train_df) == 0 or len(val_df) == 0:
        print("❌ Insufficient data for training")
        return None, None

    # Extract features
    train_texts = train_df['text'].astype(str).tolist()
    train_labels = train_df['human_label'].tolist()
    train_banks = train_df['bank_code'].tolist()

    val_texts = val_df['text'].astype(str).tolist()
    val_labels = val_df['human_label'].tolist()
    val_banks = val_df['bank_code'].tolist()

    print(f"Training samples: {len(train_texts)}")
    print(f"Validation samples: {len(val_texts)}")

    # Enhanced class weight calculation for small data
    unique_labels = sorted(list(set(train_labels)))
    try:
        # Use more aggressive class weighting for small datasets
        class_weights = compute_class_weight(
            class_weight='balanced',
            classes=np.array(unique_labels),
            y=np.array(train_labels)
        )
        # Apply smoothing to avoid extreme weights
        class_weights = np.clip(class_weights, 0.5, 3.0)
        class_weight_dict = {label: weight for label, weight in zip(unique_labels, class_weights)}
    except Exception as e:
        print(f"Error computing class weights: {e}")
        class_weight_dict = {label: 1.0 for label in unique_labels}

    print(f"Small-data class weights: {class_weight_dict}")

    training_data = {
        'texts': train_texts,
        'labels': train_labels,
        'bank_codes': train_banks,
        'class_weights': class_weight_dict,
        'unique_labels': unique_labels,
        'label_distribution': pd.Series(train_labels).value_counts().to_dict(),
        'is_small_dataset': True,
        'total_samples': len(train_texts)
    }

    validation_data = {
        'texts': val_texts,
        'labels': val_labels,
        'bank_codes': val_banks
    }

    return training_data, validation_data

# Load and prepare small dataset
combined_train_df, combined_val_df, bank_statistics = load_and_augment_training_data()
training_data, validation_data = prepare_small_data_training(combined_train_df, combined_val_df)




LOADING AND AUGMENTING SMALL TRAINING DATA

📂 Loading JPM training data...
  Original training samples: 39
Balancing small dataset...
Original distribution: {'positive': 15, 'neutral': 15, 'negative': 9}
Augmenting negative: need 6 more samples
Balanced distribution: {'negative': 15, 'positive': 15, 'neutral': 15}
Total samples: 45 (original: 39)
  ✅ Augmented training: 45 samples
  ✅ Validation: 10 samples

📂 Loading HSBC training data...
  Original training samples: 55
Balancing small dataset...
Original distribution: {'positive': 31, 'neutral': 16, 'negative': 8}
Augmenting neutral: need 15 more samples
Augmenting negative: need 23 more samples
Balanced distribution: {'negative': 31, 'positive': 31, 'neutral': 31}
Total samples: 93 (original: 55)
  ✅ Augmented training: 93 samples
  ✅ Validation: 14 samples

📊 Small-data augmented summary:
  Total training samples: 138
  Total validation samples: 24
  Combined label distribution:
    negative: 46 (33.3%)
    positive: 46 (33.3%)
  

In [7]:
## 7. SMALL DATA OPTIMIZED TRAINING CONFIGURATION

def compute_small_data_metrics(eval_pred):
    """Metrics computation optimized for small datasets."""
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    # Calculate metrics with zero_division handling for small datasets
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, predictions, average='weighted', zero_division=0
    )
    accuracy = accuracy_score(labels, predictions)

    # Per-class metrics for small datasets
    per_class_precision, per_class_recall, per_class_f1, support = precision_recall_fscore_support(
        labels, predictions, average=None, zero_division=0
    )

    # Macro averages (important for imbalanced small datasets)
    macro_precision, macro_recall, macro_f1, _ = precision_recall_fscore_support(
        labels, predictions, average='macro', zero_division=0
    )

    # Confusion matrix
    try:
        cm = confusion_matrix(labels, predictions)
    except:
        cm = None

    metrics = {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'f1_macro': macro_f1,
        'precision_macro': macro_precision,
        'recall_macro': macro_recall,
        'confusion_matrix': cm.tolist() if cm is not None else None
    }

    # Add per-class metrics
    unique_labels = sorted(list(set(labels)))
    for i, label in enumerate(unique_labels):
        if i < len(per_class_precision):
            metrics[f'precision_class_{label}'] = per_class_precision[i]
            metrics[f'recall_class_{label}'] = per_class_recall[i]
            metrics[f'f1_class_{label}'] = per_class_f1[i]
            metrics[f'support_class_{label}'] = support[i]

    return metrics


class SmallDataTrainer(Trainer):
    """Trainer optimized for small datasets with enhanced regularization."""

    def __init__(self, class_weights=None, **kwargs):
        super().__init__(**kwargs)
        self.class_weights = class_weights

        if class_weights is not None:
            # Convert class weights to tensor
            weight_tensor = torch.tensor(list(class_weights.values()), dtype=torch.float)
            if torch.cuda.is_available():
                weight_tensor = weight_tensor.cuda()
            self.loss_fn = nn.CrossEntropyLoss(weight=weight_tensor, label_smoothing=0.1)
        else:
            self.loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)

    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        """Enhanced loss computation with label smoothing for small data."""
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")

        # Apply custom loss function with label smoothing
        loss = self.loss_fn(logits.view(-1, self.model.config.num_labels), labels.view(-1))

        return (loss, outputs) if return_outputs else loss


def get_small_data_training_args(model_name: str, output_dir: Path, data_size: int) -> TrainingArguments:
    """Training arguments optimized for small datasets."""

    # Base configuration for small data
    base_config = {
        "learning_rate": 5e-6,  # Lower learning rate
        "warmup_steps": min(50, data_size // 4),  # Proportional warmup
        "weight_decay": 0.1,  # Higher weight decay for regularization
        "num_train_epochs": 10,  # More epochs for small data
        "per_device_train_batch_size": 4,  # Smaller batch size
        "per_device_eval_batch_size": 8,
        "gradient_accumulation_steps": 4,  # Simulate larger batch size
    }

    # Model-specific adjustments
    model_configs = {
        "finbert_yiyanghkust": {
            "learning_rate": 3e-6,
            "warmup_steps": min(100, data_size // 3),
            "weight_decay": 0.05
        },
        "finbert_prosusai": {
            "learning_rate": 3e-6,
            "warmup_steps": min(100, data_size // 3),
            "weight_decay": 0.05
        },
        "distilroberta": {
            "learning_rate": 5e-6,
            "warmup_steps": min(150, data_size // 2),
            "weight_decay": 0.1
        },
        "cardiffnlp_roberta": {
            "learning_rate": 2e-6,
            "warmup_steps": min(100, data_size // 3),
            "weight_decay": 0.05
        }
    }

    # Apply model-specific config
    config = base_config.copy()
    if model_name in model_configs:
        config.update(model_configs[model_name])

    return TrainingArguments(
        output_dir=str(output_dir),
        num_train_epochs=config["num_train_epochs"],
        per_device_train_batch_size=config["per_device_train_batch_size"],
        per_device_eval_batch_size=config["per_device_eval_batch_size"],
        gradient_accumulation_steps=config["gradient_accumulation_steps"],
        learning_rate=config["learning_rate"],
        weight_decay=config["weight_decay"],
        warmup_steps=config["warmup_steps"],
        logging_dir=str(output_dir / "logs"),
        logging_steps=10,  # More frequent logging for small data
        eval_strategy="steps",
        eval_steps=max(20, data_size // 8),  # Proportional evaluation
        save_strategy="steps",
        save_steps=max(40, data_size // 4),  # Proportional saving
        save_total_limit=5,  # Keep more checkpoints
        load_best_model_at_end=True,
        metric_for_best_model="f1_macro",  # Use macro F1 for imbalanced small data
        greater_is_better=True,
        fp16=torch.cuda.is_available(),
        dataloader_pin_memory=False,
        remove_unused_columns=True,
        report_to="none",
        seed=SEED,
        max_grad_norm=0.5,  # Gradient clipping for stability
        lr_scheduler_type="cosine",  # Cosine scheduler for small data
        dataloader_num_workers=0  # Avoid multiprocessing issues with small data
    )



In [8]:
## 8. SMALL DATA FINE-TUNING FUNCTION

def fine_tune_small_data_model(
    model_name: str,
    model_path: str,
    training_data: Dict,
    validation_data: Dict,
    bank: str
) -> Dict:
    """Fine-tuning optimized for small datasets."""

    print(f"\n🚀 Starting small-data fine-tuning: {model_name}")
    print(f"   Bank: {bank.upper()}")
    print(f"   Training samples: {len(training_data['texts'])}")
    print(f"   Validation samples: {len(validation_data['texts'])}")

    try:
        # Clear GPU memory
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

        # Initialize tokenizer and model with small-data optimizations
        print(f"📥 Loading model: {model_path}")
        tokenizer = AutoTokenizer.from_pretrained(model_path)

        # Add padding token if missing
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token

        # Load model with dropout adjustments for small data
        num_labels = len(training_data['unique_labels'])
        model = AutoModelForSequenceClassification.from_pretrained(
            model_path,
            num_labels=num_labels,
            ignore_mismatched_sizes=True,
            hidden_dropout_prob=0.3,  # Increased dropout for regularization
            attention_probs_dropout_prob=0.2
        )

        # Resize token embeddings if needed
        model.resize_token_embeddings(len(tokenizer))

        print(f"✅ Model loaded with {num_labels} labels")

        # Create small-data optimized datasets
        train_dataset = SmallDataSentimentDataset(
            texts=training_data['texts'],
            labels=training_data['labels'],
            tokenizer=tokenizer,
            max_length=128,
            bank_codes=training_data['bank_codes'],
            use_longer_sequences=True  # Use longer sequences for small data
        )

        val_dataset = SmallDataSentimentDataset(
            texts=validation_data['texts'],
            labels=validation_data['labels'],
            tokenizer=tokenizer,
            max_length=128,
            bank_codes=validation_data['bank_codes'],
            use_longer_sequences=True
        )

        # Prepare output directory
        output_dir = finetuning_paths[bank]["models_finetuned"] / model_name
        output_dir.mkdir(parents=True, exist_ok=True)

        # Small-data training arguments
        training_args = get_small_data_training_args(
            model_name, output_dir, len(training_data['texts'])
        )

        # Create small-data trainer
        trainer = SmallDataTrainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            tokenizer=tokenizer,
            data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
            compute_metrics=compute_small_data_metrics,
            class_weights=training_data['class_weights'],
            callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]  # More patience for small data
        )

        print(f"🎯 Starting small-data training...")

        # Train the model
        training_result = trainer.train()

        print(f"✅ Training completed!")
        print(f"   Final training loss: {training_result.training_loss:.4f}")

        # Evaluate the model
        print(f"📊 Evaluating model...")
        eval_results = trainer.evaluate()

        # Save the model and tokenizer
        print(f"💾 Saving small-data optimized model...")
        model.save_pretrained(output_dir)
        tokenizer.save_pretrained(output_dir)

        # Create label mapping file
        label_mapping = {
            'label_to_id': train_dataset.label_to_id,
            'id_to_label': train_dataset.id_to_label
        }

        with open(output_dir / "label_mapping.json", "w") as f:
            json.dump(label_mapping, f, indent=2)

        # Save detailed results with small-data metrics
        detailed_results = {
            'model_name': model_name,
            'bank': bank,
            'training_samples': len(training_data['texts']),
            'validation_samples': len(validation_data['texts']),
            'num_labels': num_labels,
            'training_loss': training_result.training_loss,
            'evaluation_results': eval_results,
            'class_weights': training_data['class_weights'],
            'label_distribution': training_data['label_distribution'],
            'label_mapping': label_mapping,
            'model_path': str(output_dir),
            'tokenizer_path': str(output_dir),
            'small_data_optimized': True,
            'augmentation_used': True,
            'optimization_techniques': [
                'data_augmentation', 'increased_dropout', 'label_smoothing',
                'cosine_scheduler', 'gradient_clipping', 'higher_weight_decay'
            ]
        }

        results_path = finetuning_paths[bank]["results_finetuning"] / f"{model_name}_results.json"
        with open(results_path, "w") as f:
            json.dump(detailed_results, f, indent=2, default=str)

        # Also save enhanced finetuning results for comparison notebook compatibility
        enhanced_results_path = finetuning_paths[bank]["results_finetuning"] / f"enhanced_finetuning_results_{bank}.json"
        if not enhanced_results_path.exists():
            enhanced_summary = {}
        else:
            try:
                with open(enhanced_results_path, "r") as f:
                    enhanced_summary = json.load(f)
            except:
                enhanced_summary = {}

        enhanced_summary[model_name] = detailed_results
        with open(enhanced_results_path, "w") as f:
            json.dump(enhanced_summary, f, indent=2, default=str)

        print(f"📋 Results saved to: {results_path}")
        print(f"💾 Model saved to: {output_dir}")

        # Clear memory
        del model, trainer, train_dataset, val_dataset
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

        return detailed_results

    except Exception as e:
        print(f"❌ Error fine-tuning {model_name}: {str(e)}")

        # Clear memory on error
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

        return {
            'model_name': model_name,
            'bank': bank,
            'error': str(e),
            'status': 'failed'
        }


In [9]:
## 9. MAIN SMALL-DATA FINE-TUNING PIPELINE

def run_small_data_finetuning_pipeline():
    """Run the complete small-data optimized fine-tuning pipeline."""

    print("\n" + "="*80)
    print("SMALL-DATA OPTIMIZED FINE-TUNING PIPELINE")
    print("="*80)

    if training_data is None or validation_data is None:
        print("❌ Cannot proceed - insufficient training data")
        return

    # Check if dataset is actually small
    total_samples = training_data['total_samples']
    print(f"\nDataset size analysis:")
    print(f"  Total training samples: {total_samples}")

    if total_samples < 100:
        print("  ✅ Small dataset detected - applying all optimizations")
    elif total_samples < 500:
        print("  ⚠️ Medium dataset - applying selective optimizations")
    else:
        print("  ℹ️ Large dataset - consider using standard fine-tuning")

    all_results = {}
    successful_models = []
    failed_models = []

    # Fine-tune each model for each bank
    for bank in BANKS:
        print(f"\n🏦 Processing bank: {bank.upper()}")
        bank_results = {}

        for model_name, model_path in MODELS.items():
            print(f"\n🤖 Fine-tuning model: {model_name}")
            print(f"   Source: {model_path}")

            try:
                # Fine-tune the model with small-data optimizations
                result = fine_tune_small_data_model(
                    model_name=model_name,
                    model_path=model_path,
                    training_data=training_data,
                    validation_data=validation_data,
                    bank=bank
                )

                bank_results[model_name] = result

                if 'error' not in result:
                    successful_models.append(f"{bank}_{model_name}")
                    print(f"✅ {model_name} fine-tuned successfully for {bank.upper()}")

                    # Print key metrics
                    if 'evaluation_results' in result:
                        eval_res = result['evaluation_results']
                        print(f"   📊 Accuracy: {eval_res.get('eval_accuracy', 0):.4f}")
                        print(f"   📊 F1 (weighted): {eval_res.get('eval_f1', 0):.4f}")
                        print(f"   📊 F1 (macro): {eval_res.get('eval_f1_macro', 0):.4f}")
                        print(f"   📊 Precision: {eval_res.get('eval_precision', 0):.4f}")
                        print(f"   📊 Recall: {eval_res.get('eval_recall', 0):.4f}")
                else:
                    failed_models.append(f"{bank}_{model_name}")
                    print(f"❌ {model_name} failed for {bank.upper()}: {result.get('error', 'Unknown error')}")

            except Exception as e:
                error_result = {
                    'model_name': model_name,
                    'bank': bank,
                    'error': str(e),
                    'status': 'failed'
                }
                bank_results[model_name] = error_result
                failed_models.append(f"{bank}_{model_name}")
                print(f"❌ Unexpected error with {model_name} for {bank.upper()}: {str(e)}")

        all_results[bank] = bank_results

    # Generate summary report
    print("\n" + "="*80)
    print("SMALL-DATA FINE-TUNING SUMMARY REPORT")
    print("="*80)

    print(f"\n📊 Overall Statistics:")
    print(f"   Total models attempted: {len(BANKS) * len(MODELS)}")
    print(f"   Successfully fine-tuned: {len(successful_models)}")
    print(f"   Failed: {len(failed_models)}")
    print(f"   Success rate: {len(successful_models)/(len(BANKS) * len(MODELS)) * 100:.1f}%")

    if successful_models:
        print(f"\n✅ Successfully fine-tuned models:")
        for model in successful_models:
            print(f"   • {model}")

    if failed_models:
        print(f"\n❌ Failed models:")
        for model in failed_models:
            print(f"   • {model}")

    # Create comprehensive summary
    summary = {
        'pipeline_summary': {
            'total_models': len(BANKS) * len(MODELS),
            'successful': len(successful_models),
            'failed': len(failed_models),
            'success_rate': len(successful_models)/(len(BANKS) * len(MODELS)) * 100,
            'banks_processed': BANKS,
            'models_processed': list(MODELS.keys()),
            'small_data_optimized': True,
            'optimization_techniques_applied': [
                'data_augmentation', 'class_balancing', 'increased_dropout',
                'label_smoothing', 'cosine_scheduling', 'gradient_clipping',
                'longer_sequences', 'more_epochs', 'lower_learning_rate'
            ]
        },
        'training_data_summary': {
            'total_training_samples': len(training_data['texts']),
            'total_validation_samples': len(validation_data['texts']),
            'label_distribution': training_data['label_distribution'],
            'class_weights': training_data['class_weights'],
            'augmentation_applied': True,
            'dataset_classification': 'small' if total_samples < 100 else 'medium'
        },
        'bank_statistics': bank_statistics,
        'detailed_results': all_results,
        'successful_models': successful_models,
        'failed_models': failed_models
    }

    # Save comprehensive summary
    summary_path = drive_base / "results/finetuning/small_data_finetuning_summary.json"
    summary_path.parent.mkdir(parents=True, exist_ok=True)

    with open(summary_path, "w") as f:
        json.dump(summary, f, indent=2, default=str)

    # Save bank-specific enhanced finetuning results for comparison notebook
    for bank in BANKS:
        bank_finetuning_results = {}
        if bank in all_results:
            for model_name, result in all_results[bank].items():
                if 'error' not in result:
                    bank_finetuning_results[model_name] = result

        bank_results_path = finetuning_paths[bank]["results_finetuning"] / f"enhanced_finetuning_results_{bank}.json"
        with open(bank_results_path, "w") as f:
            json.dump(bank_finetuning_results, f, indent=2, default=str)

    print(f"\n📋 Comprehensive summary saved to: {summary_path}")

    # Performance comparison table
    if successful_models:
        print(f"\n📈 Small-Data Model Performance Comparison:")
        print(f"{'Bank':<8} {'Model':<20} {'Accuracy':<10} {'F1 (W)':<10} {'F1 (M)':<10} {'Precision':<12} {'Recall':<10}")
        print("-" * 80)

        for bank in BANKS:
            if bank in all_results:
                for model_name, result in all_results[bank].items():
                    if 'evaluation_results' in result:
                        eval_res = result['evaluation_results']
                        acc = eval_res.get('eval_accuracy', 0)
                        f1_w = eval_res.get('eval_f1', 0)
                        f1_m = eval_res.get('eval_f1_macro', 0)
                        prec = eval_res.get('eval_precision', 0)
                        rec = eval_res.get('eval_recall', 0)

                        print(f"{bank.upper():<8} {model_name:<20} {acc:<10.4f} {f1_w:<10.4f} {f1_m:<10.4f} {prec:<12.4f} {rec:<10.4f}")

    print(f"\n🎉 Small-data fine-tuning pipeline completed!")
    print(f"📁 All results saved in: {drive_base}/results/finetuning/")
    print(f"🤖 Fine-tuned models saved in: {drive_base}/models/finetuned/")

    return summary


In [10]:
## 10. EXECUTE SMALL-DATA FINE-TUNING PIPELINE

# Run the complete small-data optimized fine-tuning pipeline
if __name__ == "__main__":
    # Verify data availability
    if training_data is None or validation_data is None:
        print("❌ No training data available. Please ensure manual validation data exists.")
        print("Expected files:")
        for bank in BANKS:
            train_file = finetuning_paths[bank]["manual_validation"] / f"train_manual_labels_{bank}.csv"
            val_file = finetuning_paths[bank]["manual_validation"] / f"val_manual_labels_{bank}.csv"
            print(f"  {train_file}")
            print(f"  {val_file}")
    else:
        # Execute the pipeline
        pipeline_summary = run_small_data_finetuning_pipeline()

        # Additional memory cleanup
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

        print(f"\n🏁 All small-data fine-tuning operations completed!")
        print(f"📊 Check the summary for detailed results and model performance metrics.")




SMALL-DATA OPTIMIZED FINE-TUNING PIPELINE

Dataset size analysis:
  Total training samples: 138
  ⚠️ Medium dataset - applying selective optimizations

🏦 Processing bank: JPM

🤖 Fine-tuning model: finbert_yiyanghkust
   Source: yiyanghkust/finbert-tone

🚀 Starting small-data fine-tuning: finbert_yiyanghkust
   Bank: JPM
   Training samples: 138
   Validation samples: 24
📥 Loading model: yiyanghkust/finbert-tone


config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

✅ Model loaded with 3 labels
Small-data optimized dataset created:
  Samples: 138
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'negative': 46, 'positive': 46, 'neutral': 46}
Small-data optimized dataset created:
  Samples: 24
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'positive': 12, 'neutral': 8, 'negative': 4}


model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

🎯 Starting small-data training...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,F1 Macro,Precision Macro,Recall Macro,Confusion Matrix,Precision Class 0,Recall Class 0,F1 Class 0,Support Class 0,Precision Class 1,Recall Class 1,F1 Class 1,Support Class 1,Precision Class 2,Recall Class 2,F1 Class 2,Support Class 2
20,8.4608,10.491673,0.208333,0.120635,0.086247,0.208333,0.209524,0.146853,0.375,"[[4, 0, 0], [7, 1, 0], [0, 12, 0]]",0.363636,1.0,0.533333,4,0.076923,0.125,0.095238,8,0.0,0.0,0.0,12
40,7.1288,8.881717,0.208333,0.113519,0.081585,0.208333,0.19195,0.132867,0.375,"[[4, 0, 0], [7, 1, 0], [2, 10, 0]]",0.307692,1.0,0.470588,4,0.090909,0.125,0.105263,8,0.0,0.0,0.0,12
60,4.6846,6.45546,0.208333,0.173183,0.325758,0.208333,0.21604,0.287879,0.319444,"[[3, 0, 1], [7, 1, 0], [1, 10, 1]]",0.272727,0.75,0.4,4,0.090909,0.125,0.105263,8,0.5,0.083333,0.142857,12
80,3.2667,5.392152,0.208333,0.173183,0.325758,0.208333,0.21604,0.287879,0.319444,"[[3, 0, 1], [7, 1, 0], [1, 10, 1]]",0.272727,0.75,0.4,4,0.090909,0.125,0.105263,8,0.5,0.083333,0.142857,12


✅ Training completed!
   Final training loss: 5.9209
📊 Evaluating model...


💾 Saving small-data optimized model...
📋 Results saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/results/finetuning/jpm/finbert_yiyanghkust_results.json
💾 Model saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/models/finetuned/jpm/finbert_yiyanghkust
✅ finbert_yiyanghkust fine-tuned successfully for JPM
   📊 Accuracy: 0.2083
   📊 F1 (weighted): 0.1732
   📊 F1 (macro): 0.2160
   📊 Precision: 0.3258
   📊 Recall: 0.2083

🤖 Fine-tuning model: finbert_prosusai
   Source: ProsusAI/finbert

🚀 Starting small-data fine-tuning: finbert_prosusai
   Bank: JPM
   Training samples: 138
   Validation samples: 24
📥 Loading model: ProsusAI/finbert


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

✅ Model loaded with 3 labels
Small-data optimized dataset created:
  Samples: 138
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'negative': 46, 'positive': 46, 'neutral': 46}
Small-data optimized dataset created:
  Samples: 24
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'positive': 12, 'neutral': 8, 'negative': 4}


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

🎯 Starting small-data training...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,F1 Macro,Precision Macro,Recall Macro,Confusion Matrix,Precision Class 0,Recall Class 0,F1 Class 0,Support Class 0,Precision Class 1,Recall Class 1,F1 Class 1,Support Class 1,Precision Class 2,Recall Class 2,F1 Class 2,Support Class 2
20,2.3495,3.012719,0.041667,0.017544,0.011111,0.041667,0.035088,0.022222,0.083333,"[[1, 0, 3], [2, 0, 6], [12, 0, 0]]",0.066667,0.25,0.105263,4,0.0,0.0,0.0,8,0.0,0.0,0.0,12
40,2.1584,2.634763,0.041667,0.043478,0.045455,0.041667,0.028986,0.030303,0.027778,"[[0, 0, 4], [2, 0, 6], [11, 0, 1]]",0.0,0.0,0.0,4,0.0,0.0,0.0,8,0.090909,0.083333,0.086957,12
60,1.6559,2.068153,0.083333,0.083333,0.083333,0.083333,0.055556,0.055556,0.055556,"[[0, 0, 4], [2, 0, 6], [10, 0, 2]]",0.0,0.0,0.0,4,0.0,0.0,0.0,8,0.166667,0.166667,0.166667,12
80,1.4939,1.905996,0.166667,0.153846,0.142857,0.166667,0.102564,0.095238,0.111111,"[[0, 0, 4], [2, 0, 6], [8, 0, 4]]",0.0,0.0,0.0,4,0.0,0.0,0.0,8,0.285714,0.333333,0.307692,12


✅ Training completed!
   Final training loss: 1.9094
📊 Evaluating model...


💾 Saving small-data optimized model...
📋 Results saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/results/finetuning/jpm/finbert_prosusai_results.json
💾 Model saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/models/finetuned/jpm/finbert_prosusai
✅ finbert_prosusai fine-tuned successfully for JPM
   📊 Accuracy: 0.1667
   📊 F1 (weighted): 0.1538
   📊 F1 (macro): 0.1026
   📊 Precision: 0.1429
   📊 Recall: 0.1667

🤖 Fine-tuning model: distilroberta
   Source: j-hartmann/emotion-english-distilroberta-base

🚀 Starting small-data fine-tuning: distilroberta
   Bank: JPM
   Training samples: 138
   Validation samples: 24
📥 Loading model: j-hartmann/emotion-english-distilroberta-base


tokenizer_config.json:   0%|          | 0.00/294 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/329M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([3, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([3]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Model loaded with 3 labels
Small-data optimized dataset created:
  Samples: 138
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'negative': 46, 'positive': 46, 'neutral': 46}
Small-data optimized dataset created:
  Samples: 24
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'positive': 12, 'neutral': 8, 'negative': 4}
🎯 Starting small-data training...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,F1 Macro,Precision Macro,Recall Macro,Confusion Matrix,Precision Class 0,Recall Class 0,F1 Class 0,Support Class 0,Precision Class 1,Recall Class 1,F1 Class 1,Support Class 1,Precision Class 2,Recall Class 2,F1 Class 2,Support Class 2
20,1.1242,1.082184,0.333333,0.284615,0.261111,0.333333,0.206838,0.196296,0.236111,"[[0, 0, 4], [0, 1, 7], [1, 4, 7]]",0.0,0.0,0.0,4,0.2,0.125,0.153846,8,0.388889,0.583333,0.466667,12
40,1.0833,1.062721,0.416667,0.336806,0.308333,0.416667,0.243056,0.233333,0.291667,"[[0, 0, 4], [0, 1, 7], [0, 3, 9]]",0.0,0.0,0.0,4,0.25,0.125,0.166667,8,0.45,0.75,0.5625,12
60,1.0588,1.03321,0.5,0.442857,0.416667,0.5,0.342857,0.333333,0.375,"[[0, 0, 4], [0, 3, 5], [0, 3, 9]]",0.0,0.0,0.0,4,0.5,0.375,0.428571,8,0.5,0.75,0.6,12
80,1.0393,1.008775,0.625,0.588172,0.70614,0.625,0.536559,0.776316,0.513889,"[[1, 0, 3], [0, 3, 5], [0, 1, 11]]",1.0,0.25,0.4,4,0.75,0.375,0.5,8,0.578947,0.916667,0.709677,12


✅ Training completed!
   Final training loss: 1.0714
📊 Evaluating model...


💾 Saving small-data optimized model...
📋 Results saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/results/finetuning/jpm/distilroberta_results.json
💾 Model saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/models/finetuned/jpm/distilroberta
✅ distilroberta fine-tuned successfully for JPM
   📊 Accuracy: 0.6250
   📊 F1 (weighted): 0.5882
   📊 F1 (macro): 0.5366
   📊 Precision: 0.7061
   📊 Recall: 0.6250

🤖 Fine-tuning model: cardiffnlp_roberta
   Source: cardiffnlp/twitter-roberta-base-sentiment-latest

🚀 Starting small-data fine-tuning: cardiffnlp_roberta
   Bank: JPM
   Training samples: 138
   Validation samples: 24
📥 Loading model: cardiffnlp/twitter-roberta-base-sentiment-latest


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


✅ Model loaded with 3 labels
Small-data optimized dataset created:
  Samples: 138
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'negative': 46, 'positive': 46, 'neutral': 46}
Small-data optimized dataset created:
  Samples: 24
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'positive': 12, 'neutral': 8, 'negative': 4}
🎯 Starting small-data training...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,F1 Macro,Precision Macro,Recall Macro,Confusion Matrix,Precision Class 0,Recall Class 0,F1 Class 0,Support Class 0,Precision Class 1,Recall Class 1,F1 Class 1,Support Class 1,Precision Class 2,Recall Class 2,F1 Class 2,Support Class 2
20,1.0841,1.005201,0.666667,0.610526,0.566434,0.666667,0.477193,0.438228,0.527778,"[[0, 3, 1], [0, 6, 2], [0, 2, 10]]",0.0,0.0,0.0,4,0.545455,0.75,0.631579,8,0.769231,0.833333,0.8,12
40,0.9137,1.007972,0.583333,0.548148,0.52381,0.583333,0.424691,0.412698,0.444444,"[[0, 3, 1], [0, 4, 4], [2, 0, 10]]",0.0,0.0,0.0,4,0.571429,0.5,0.533333,8,0.666667,0.833333,0.740741,12
60,0.8265,1.046299,0.666667,0.658832,0.683333,0.666667,0.618708,0.655556,0.611111,"[[2, 1, 1], [0, 4, 4], [2, 0, 10]]",0.5,0.5,0.5,4,0.8,0.5,0.615385,8,0.666667,0.833333,0.740741,12
80,0.8743,1.015232,0.666667,0.658832,0.683333,0.666667,0.618708,0.655556,0.611111,"[[2, 1, 1], [0, 4, 4], [2, 0, 10]]",0.5,0.5,0.5,4,0.8,0.5,0.615385,8,0.666667,0.833333,0.740741,12


✅ Training completed!
   Final training loss: 0.9296
📊 Evaluating model...


💾 Saving small-data optimized model...
📋 Results saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/results/finetuning/jpm/cardiffnlp_roberta_results.json
💾 Model saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/models/finetuned/jpm/cardiffnlp_roberta
✅ cardiffnlp_roberta fine-tuned successfully for JPM
   📊 Accuracy: 0.6667
   📊 F1 (weighted): 0.6588
   📊 F1 (macro): 0.6187
   📊 Precision: 0.6833
   📊 Recall: 0.6667

🏦 Processing bank: HSBC

🤖 Fine-tuning model: finbert_yiyanghkust
   Source: yiyanghkust/finbert-tone

🚀 Starting small-data fine-tuning: finbert_yiyanghkust
   Bank: HSBC
   Training samples: 138
   Validation samples: 24
📥 Loading model: yiyanghkust/finbert-tone
✅ Model loaded with 3 labels
Small-data optimized dataset created:
  Samples: 138
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'negative': 46, 'positive': 46, 'neutral': 46}
Small-data optimized dataset created:
  Samples: 2

Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,F1 Macro,Precision Macro,Recall Macro,Confusion Matrix,Precision Class 0,Recall Class 0,F1 Class 0,Support Class 0,Precision Class 1,Recall Class 1,F1 Class 1,Support Class 1,Precision Class 2,Recall Class 2,F1 Class 2,Support Class 2
20,8.4608,10.491673,0.208333,0.120635,0.086247,0.208333,0.209524,0.146853,0.375,"[[4, 0, 0], [7, 1, 0], [0, 12, 0]]",0.363636,1.0,0.533333,4,0.076923,0.125,0.095238,8,0.0,0.0,0.0,12
40,7.1288,8.881717,0.208333,0.113519,0.081585,0.208333,0.19195,0.132867,0.375,"[[4, 0, 0], [7, 1, 0], [2, 10, 0]]",0.307692,1.0,0.470588,4,0.090909,0.125,0.105263,8,0.0,0.0,0.0,12
60,4.6846,6.45546,0.208333,0.173183,0.325758,0.208333,0.21604,0.287879,0.319444,"[[3, 0, 1], [7, 1, 0], [1, 10, 1]]",0.272727,0.75,0.4,4,0.090909,0.125,0.105263,8,0.5,0.083333,0.142857,12
80,3.2667,5.392152,0.208333,0.173183,0.325758,0.208333,0.21604,0.287879,0.319444,"[[3, 0, 1], [7, 1, 0], [1, 10, 1]]",0.272727,0.75,0.4,4,0.090909,0.125,0.105263,8,0.5,0.083333,0.142857,12


✅ Training completed!
   Final training loss: 5.9209
📊 Evaluating model...


💾 Saving small-data optimized model...
📋 Results saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/results/finetuning/hsbc/finbert_yiyanghkust_results.json
💾 Model saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/models/finetuned/hsbc/finbert_yiyanghkust
✅ finbert_yiyanghkust fine-tuned successfully for HSBC
   📊 Accuracy: 0.2083
   📊 F1 (weighted): 0.1732
   📊 F1 (macro): 0.2160
   📊 Precision: 0.3258
   📊 Recall: 0.2083

🤖 Fine-tuning model: finbert_prosusai
   Source: ProsusAI/finbert

🚀 Starting small-data fine-tuning: finbert_prosusai
   Bank: HSBC
   Training samples: 138
   Validation samples: 24
📥 Loading model: ProsusAI/finbert
✅ Model loaded with 3 labels
Small-data optimized dataset created:
  Samples: 138
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'negative': 46, 'positive': 46, 'neutral': 46}
Small-data optimized dataset created:
  Samples: 24
  Banks: 2
  Max length: 256
  Label ma

Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,F1 Macro,Precision Macro,Recall Macro,Confusion Matrix,Precision Class 0,Recall Class 0,F1 Class 0,Support Class 0,Precision Class 1,Recall Class 1,F1 Class 1,Support Class 1,Precision Class 2,Recall Class 2,F1 Class 2,Support Class 2
20,2.3495,3.012719,0.041667,0.017544,0.011111,0.041667,0.035088,0.022222,0.083333,"[[1, 0, 3], [2, 0, 6], [12, 0, 0]]",0.066667,0.25,0.105263,4,0.0,0.0,0.0,8,0.0,0.0,0.0,12
40,2.1584,2.634763,0.041667,0.043478,0.045455,0.041667,0.028986,0.030303,0.027778,"[[0, 0, 4], [2, 0, 6], [11, 0, 1]]",0.0,0.0,0.0,4,0.0,0.0,0.0,8,0.090909,0.083333,0.086957,12
60,1.6559,2.068153,0.083333,0.083333,0.083333,0.083333,0.055556,0.055556,0.055556,"[[0, 0, 4], [2, 0, 6], [10, 0, 2]]",0.0,0.0,0.0,4,0.0,0.0,0.0,8,0.166667,0.166667,0.166667,12
80,1.4939,1.905996,0.166667,0.153846,0.142857,0.166667,0.102564,0.095238,0.111111,"[[0, 0, 4], [2, 0, 6], [8, 0, 4]]",0.0,0.0,0.0,4,0.0,0.0,0.0,8,0.285714,0.333333,0.307692,12


✅ Training completed!
   Final training loss: 1.9094
📊 Evaluating model...


💾 Saving small-data optimized model...
📋 Results saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/results/finetuning/hsbc/finbert_prosusai_results.json
💾 Model saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/models/finetuned/hsbc/finbert_prosusai
✅ finbert_prosusai fine-tuned successfully for HSBC
   📊 Accuracy: 0.1667
   📊 F1 (weighted): 0.1538
   📊 F1 (macro): 0.1026
   📊 Precision: 0.1429
   📊 Recall: 0.1667

🤖 Fine-tuning model: distilroberta
   Source: j-hartmann/emotion-english-distilroberta-base

🚀 Starting small-data fine-tuning: distilroberta
   Bank: HSBC
   Training samples: 138
   Validation samples: 24
📥 Loading model: j-hartmann/emotion-english-distilroberta-base


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([3, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([3]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Model loaded with 3 labels
Small-data optimized dataset created:
  Samples: 138
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'negative': 46, 'positive': 46, 'neutral': 46}
Small-data optimized dataset created:
  Samples: 24
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'positive': 12, 'neutral': 8, 'negative': 4}
🎯 Starting small-data training...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,F1 Macro,Precision Macro,Recall Macro,Confusion Matrix,Precision Class 0,Recall Class 0,F1 Class 0,Support Class 0,Precision Class 1,Recall Class 1,F1 Class 1,Support Class 1,Precision Class 2,Recall Class 2,F1 Class 2,Support Class 2
20,1.1242,1.082184,0.333333,0.284615,0.261111,0.333333,0.206838,0.196296,0.236111,"[[0, 0, 4], [0, 1, 7], [1, 4, 7]]",0.0,0.0,0.0,4,0.2,0.125,0.153846,8,0.388889,0.583333,0.466667,12
40,1.0833,1.062721,0.416667,0.336806,0.308333,0.416667,0.243056,0.233333,0.291667,"[[0, 0, 4], [0, 1, 7], [0, 3, 9]]",0.0,0.0,0.0,4,0.25,0.125,0.166667,8,0.45,0.75,0.5625,12
60,1.0588,1.03321,0.5,0.442857,0.416667,0.5,0.342857,0.333333,0.375,"[[0, 0, 4], [0, 3, 5], [0, 3, 9]]",0.0,0.0,0.0,4,0.5,0.375,0.428571,8,0.5,0.75,0.6,12
80,1.0393,1.008775,0.625,0.588172,0.70614,0.625,0.536559,0.776316,0.513889,"[[1, 0, 3], [0, 3, 5], [0, 1, 11]]",1.0,0.25,0.4,4,0.75,0.375,0.5,8,0.578947,0.916667,0.709677,12


✅ Training completed!
   Final training loss: 1.0714
📊 Evaluating model...


💾 Saving small-data optimized model...
📋 Results saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/results/finetuning/hsbc/distilroberta_results.json
💾 Model saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/models/finetuned/hsbc/distilroberta
✅ distilroberta fine-tuned successfully for HSBC
   📊 Accuracy: 0.6250
   📊 F1 (weighted): 0.5882
   📊 F1 (macro): 0.5366
   📊 Precision: 0.7061
   📊 Recall: 0.6250

🤖 Fine-tuning model: cardiffnlp_roberta
   Source: cardiffnlp/twitter-roberta-base-sentiment-latest

🚀 Starting small-data fine-tuning: cardiffnlp_roberta
   Bank: HSBC
   Training samples: 138
   Validation samples: 24
📥 Loading model: cardiffnlp/twitter-roberta-base-sentiment-latest


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


✅ Model loaded with 3 labels
Small-data optimized dataset created:
  Samples: 138
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'negative': 46, 'positive': 46, 'neutral': 46}
Small-data optimized dataset created:
  Samples: 24
  Banks: 2
  Max length: 256
  Label mapping: {'negative': 0, 'neutral': 1, 'positive': 2}
  Label distribution: {'positive': 12, 'neutral': 8, 'negative': 4}
🎯 Starting small-data training...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,F1 Macro,Precision Macro,Recall Macro,Confusion Matrix,Precision Class 0,Recall Class 0,F1 Class 0,Support Class 0,Precision Class 1,Recall Class 1,F1 Class 1,Support Class 1,Precision Class 2,Recall Class 2,F1 Class 2,Support Class 2
20,1.0841,1.005201,0.666667,0.610526,0.566434,0.666667,0.477193,0.438228,0.527778,"[[0, 3, 1], [0, 6, 2], [0, 2, 10]]",0.0,0.0,0.0,4,0.545455,0.75,0.631579,8,0.769231,0.833333,0.8,12
40,0.9137,1.007972,0.583333,0.548148,0.52381,0.583333,0.424691,0.412698,0.444444,"[[0, 3, 1], [0, 4, 4], [2, 0, 10]]",0.0,0.0,0.0,4,0.571429,0.5,0.533333,8,0.666667,0.833333,0.740741,12
60,0.8265,1.046299,0.666667,0.658832,0.683333,0.666667,0.618708,0.655556,0.611111,"[[2, 1, 1], [0, 4, 4], [2, 0, 10]]",0.5,0.5,0.5,4,0.8,0.5,0.615385,8,0.666667,0.833333,0.740741,12
80,0.8743,1.015232,0.666667,0.658832,0.683333,0.666667,0.618708,0.655556,0.611111,"[[2, 1, 1], [0, 4, 4], [2, 0, 10]]",0.5,0.5,0.5,4,0.8,0.5,0.615385,8,0.666667,0.833333,0.740741,12


✅ Training completed!
   Final training loss: 0.9296
📊 Evaluating model...


💾 Saving small-data optimized model...
📋 Results saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/results/finetuning/hsbc/cardiffnlp_roberta_results.json
💾 Model saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/models/finetuned/hsbc/cardiffnlp_roberta
✅ cardiffnlp_roberta fine-tuned successfully for HSBC
   📊 Accuracy: 0.6667
   📊 F1 (weighted): 0.6588
   📊 F1 (macro): 0.6187
   📊 Precision: 0.6833
   📊 Recall: 0.6667

SMALL-DATA FINE-TUNING SUMMARY REPORT

📊 Overall Statistics:
   Total models attempted: 8
   Successfully fine-tuned: 8
   Failed: 0
   Success rate: 100.0%

✅ Successfully fine-tuned models:
   • jpm_finbert_yiyanghkust
   • jpm_finbert_prosusai
   • jpm_distilroberta
   • jpm_cardiffnlp_roberta
   • hsbc_finbert_yiyanghkust
   • hsbc_finbert_prosusai
   • hsbc_distilroberta
   • hsbc_cardiffnlp_roberta

📋 Comprehensive summary saved to: /content/drive/MyDrive/CAM_DS_AI_Project_Enhanced/results/finetuning/small_data_finetuning_summary.json

📈 Small

In [11]:
## 11. UTILITY FUNCTIONS FOR SMALL-DATA MODELS

def load_finetuned_model(model_name: str, bank: str):
    """Load a fine-tuned model for inference."""
    model_path = finetuning_paths[bank]["models_finetuned"] / model_name

    if not model_path.exists():
        raise FileNotFoundError(f"Fine-tuned model not found: {model_path}")

    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSequenceClassification.from_pretrained(model_path)

    # Load label mapping
    label_mapping_path = model_path / "label_mapping.json"
    if label_mapping_path.exists():
        with open(label_mapping_path, "r") as f:
            label_mapping = json.load(f)
    else:
        label_mapping = None

    return model, tokenizer, label_mapping

def load_all_finetuned_models_for_comparison():
    """Load all fine-tuned models for use in comparison notebook."""
    print("Loading all fine-tuned models for comparison...")

    loaded_models = {}

    for bank in BANKS:
        loaded_models[bank] = {}
        for model_name in MODELS.keys():
            try:
                model, tokenizer, label_mapping = load_finetuned_model(model_name, bank)
                loaded_models[bank][model_name] = {
                    'model': model,
                    'tokenizer': tokenizer,
                    'label_mapping': label_mapping,
                    'model_path': str(finetuning_paths[bank]["models_finetuned"] / model_name)
                }
                print(f"  ✅ Loaded {bank}.{model_name}")
            except Exception as e:
                print(f"  ❌ Failed to load {bank}.{model_name}: {e}")
                loaded_models[bank][model_name] = None

    return loaded_models

def predict_sentiment(text: str, model, tokenizer, label_mapping=None):
    """Predict sentiment for a single text using fine-tuned model."""
    model.eval()

    # Tokenize input (use longer sequences for small-data models)
    inputs = tokenizer(
        text,
        truncation=True,
        padding=True,
        max_length=256,  # Longer for small-data models
        return_tensors='pt'
    )

    # Get predictions
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class = torch.argmax(predictions, dim=-1).item()
        confidence = torch.max(predictions).item()

    # Convert to label if mapping available
    if label_mapping and 'id_to_label' in label_mapping:
        predicted_label = label_mapping['id_to_label'][str(predicted_class)]
    else:
        predicted_label = predicted_class

    return {
        'predicted_label': predicted_label,
        'confidence': confidence,
        'raw_predictions': predictions.numpy()
    }

def create_comparison_compatibility_bridge():
    """Create compatibility files and structure for 05_model_comparison.ipynb."""
    print("Creating compatibility bridge for model comparison notebook...")

    # Ensure comparison results directories exist
    for bank in BANKS:
        comparison_results_dir = finetuning_paths[bank]["results_comparison"]
        comparison_results_dir.mkdir(parents=True, exist_ok=True)

        # Create enhanced comparison results file structure
        comparison_template = {
            'timestamp': pd.Timestamp.now().isoformat(),
            'bank': bank,
            'models_finetuned': [],
            'finetuning_status': 'completed',
            'available_for_comparison': True,
            'small_data_optimized': True,
            'optimization_techniques': [
                'data_augmentation', 'class_balancing', 'increased_dropout',
                'label_smoothing', 'cosine_scheduling', 'gradient_clipping'
            ]
        }

        # Check which models were successfully fine-tuned
        for model_name in MODELS.keys():
            model_path = finetuning_paths[bank]["models_finetuned"] / model_name
            if model_path.exists():
                comparison_template['models_finetuned'].append(model_name)

        # Save compatibility file
        compatibility_file = comparison_results_dir / f"finetuning_status_{bank}.json"
        with open(compatibility_file, "w") as f:
            json.dump(comparison_template, f, indent=2, default=str)

        print(f"  Created compatibility file for {bank.upper()}: {compatibility_file}")

    # Create global compatibility summary
    global_compatibility = {
        'banks_processed': BANKS,
        'models_available': list(MODELS.keys()),
        'finetuning_complete': True,
        'comparison_ready': True,
        'small_data_optimized': True,
        'paths': {
            'finetuning_results': str(drive_base / "results/finetuning"),
            'models': str(drive_base / "models/finetuned"),
            'comparison_ready': str(drive_base / "results/comparison")
        }
    }

    global_compatibility_file = drive_base / "results" / "finetuning_comparison_bridge.json"
    global_compatibility_file.parent.mkdir(parents=True, exist_ok=True)
    with open(global_compatibility_file, "w") as f:
        json.dump(global_compatibility, f, indent=2, default=str)

    print(f"  Global compatibility bridge: {global_compatibility_file}")

    return global_compatibility

print("\n🎯 Small-data optimized fine-tuning notebook ready!")
print("This version includes:")
print("  • Text augmentation for minority classes")
print("  • Balanced dataset creation")
print("  • Increased regularization (dropout, weight decay)")
print("  • Label smoothing for robustness")
print("  • Cosine learning rate scheduling")
print("  • Gradient clipping for stability")
print("  • Longer input sequences")
print("  • Macro F1 optimization for imbalanced data")


🎯 Small-data optimized fine-tuning notebook ready!
This version includes:
  • Text augmentation for minority classes
  • Balanced dataset creation
  • Increased regularization (dropout, weight decay)
  • Label smoothing for robustness
  • Cosine learning rate scheduling
  • Gradient clipping for stability
  • Longer input sequences
  • Macro F1 optimization for imbalanced data
