# 🦙 Scam-Detector-LoRA Model Testing on Kaggle Dataset

This notebook tests the **scam-detector-lora** model (Llama-3-8B with LoRA adapter) on the Kaggle fraud detection dataset.

**Tasks:**
1. **Classification**: Predict fraud categories (job_scam, phishing, legitimate, etc.)
2. **Reasoning**: Generate explanations for why a message is fraud or legitimate

**Model Details:**
- Base Model: `unsloth/llama-3-8b-instruct-bnb-4bit`
- LoRA Configuration: r=16, alpha=16, targets all attention/FFN modules
- Task Type: Causal Language Modeling with instruction tuning

---

## 📋 Kaggle Setup Instructions

**To run this notebook on Kaggle:**

1. **Add Input Datasets:**
   - Add the `scam-detector-lora` model as input data
     - Path will be: `/kaggle/input/scam-detector-lora/other/default/1/scam-detector-lora`
   - Add your fraud detection dataset (e.g., `fraud-data`)
     - Path will be: `/kaggle/input/fraud-data/final_fraud_detection_dataset.csv`

2. **Enable GPU Accelerator:**
   - Go to Settings → Accelerator → Select "GPU T4 x2" or "GPU P100"
   - This is required for running the Llama-3-8B model efficiently

3. **Set Internet to ON:**
   - Required to download the base model `unsloth/llama-3-8b-instruct-bnb-4bit`

4. **Run all cells sequentially**

The notebook will automatically detect available paths and adjust accordingly.

## 1️⃣ Environment Setup and Imports

In [1]:
# Debug: Check available input paths in Kaggle
from pathlib import Path
import os

if Path('/kaggle').exists():
    print("🔍 Available datasets in /kaggle/input:")
    for item in sorted(Path('/kaggle/input').iterdir()):
        print(f"  📁 {item.name}")
        # Show subdirectories for each dataset
        if item.is_dir():
            for subitem in sorted(item.iterdir())[:5]:  # Show first 5 items
                print(f"     └─ {subitem.relative_to(item)}")
    print("\n")

🔍 Available datasets in /kaggle/input:
  📁 fraud-dataset
     └─ final_fraud_detection_dataset.csv
  📁 scam-detector-lora
     └─ other




In [None]:
# Install required packages
import sys
import subprocess

def install_packages():
    packages = [
        "transformers>=4.40.0",
        "peft>=0.17.0",
        "torch>=2.0.0",
        "accelerate>=0.27.0",
        "bitsandbytes>=0.43.0",
        "pandas",
        "numpy",
        "scikit-learn",
        "tqdm",
    ]
    
    print("📦 Installing required packages...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q"] + packages)
    print("✅ Packages installed successfully!")

# Uncomment to install (if needed)
install_packages()

In [3]:
import os
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from datetime import datetime

import torch
import transformers
import peft
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel, PeftConfig

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, 
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix
)

print(f"🔧 PyTorch version: {torch.__version__}")
print(f"🔧 Transformers version: {transformers.__version__}")
print(f"🔧 PEFT version: {peft.__version__}")
print(f"🔧 Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

2025-10-20 09:35:12.965660: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1760952913.206375      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1760952913.273706      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🔧 PyTorch version: 2.6.0+cu124
🔧 Transformers version: 4.53.3
🔧 PEFT version: 0.16.0
🔧 Device: CPU


## 2️⃣ Configuration and Paths

In [4]:
# Detect environment (Kaggle or Local)
IS_KAGGLE = Path('/kaggle').exists()

if IS_KAGGLE:
    # Kaggle paths - update these based on your Kaggle dataset names
    OUTPUT_DIR = Path('/kaggle/working')
    # Dataset path - adjust the dataset name if different
    DATASET_PATH = Path('/kaggle/input/fraud-dataset/final_fraud_detection_dataset.csv')
    # Model path - using the uploaded scam-detector-lora
    MODEL_PATH = Path('/kaggle/input/scam-detector-lora/other/default/1/scam-detector-lora')
else:
    # Local paths
    BASE_DIR = Path('/Users/admin/Desktop/Workbench/Baseline Demo')
    OUTPUT_DIR = BASE_DIR / 'runs'
    DATASET_PATH = BASE_DIR / 'final_fraud_detection_dataset.csv'
    MODEL_PATH = BASE_DIR / 'models' / 'scam-detector-lora'

OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"🗂️  Running on: {'Kaggle' if IS_KAGGLE else 'Local'}")
print(f"📂 Dataset: {DATASET_PATH}")
print(f"🤖 Model: {MODEL_PATH}")
print(f"💾 Output: {OUTPUT_DIR}")

# Check paths exist
if DATASET_PATH.exists():
    print(f"✅ Dataset found!")
else:
    print(f"⚠️  Dataset not found at {DATASET_PATH}")
    # Try to find it in other locations
    if IS_KAGGLE:
        possible_paths = list(Path('/kaggle/input').glob('**/final_fraud_detection_dataset.csv'))
        if possible_paths:
            DATASET_PATH = possible_paths[0]
            print(f"✅ Found dataset at: {DATASET_PATH}")
        else:
            print("❌ Dataset not found. Please check Kaggle input datasets.")

if MODEL_PATH.exists():
    print(f"✅ Model found!")
else:
    print(f"⚠️  Model not found at {MODEL_PATH}")
    if IS_KAGGLE:
        # Try to find model in other locations
        possible_models = list(Path('/kaggle/input').glob('**/scam-detector-lora'))
        if possible_models:
            MODEL_PATH = possible_models[0]
            print(f"✅ Found model at: {MODEL_PATH}")
        else:
            print("❌ Model not found. Please check Kaggle input datasets.")

🗂️  Running on: Kaggle
📂 Dataset: /kaggle/input/fraud-dataset/final_fraud_detection_dataset.csv
🤖 Model: /kaggle/input/scam-detector-lora/other/default/1/scam-detector-lora
💾 Output: /kaggle/working
✅ Dataset found!
✅ Model found!


In [5]:
# Model Configuration
CONFIG = {
    'model_name': 'unsloth/llama-3-8b-instruct-bnb-4bit',
    'adapter_path': str(MODEL_PATH),
    'max_length': 512,
    'generation_max_length': 256,
    'temperature': 0.7,
    'top_p': 0.9,
    'top_k': 50,
    'repetition_penalty': 1.1,
    'test_size': 0.2,
    'random_seed': 42,
    'batch_size': 8,  # Adjust based on GPU memory
    'sample_size': 500,  # Number of samples to test (set to None for full dataset)
}

# Fraud categories
FRAUD_CATEGORIES = [
    'job_scam',
    'legitimate',
    'phishing',
    'popup_scam',
    'refund_scam',
    'reward_scam',
    'sms_spam',
    'ssn_scam',
    'tech_support_scam'
]

print("✅ Configuration loaded")

✅ Configuration loaded


## 3️⃣ Load Dataset

In [6]:
# Load dataset
print("📊 Loading dataset...")
df = pd.read_csv(DATASET_PATH)

print(f"\n📈 Dataset shape: {df.shape}")
print(f"\n📋 Columns: {df.columns.tolist()}")
print(f"\n🔍 First few rows:")
display(df.head())

# Check category distribution
print("\n📊 Category distribution:")
print(df['detailed_category'].value_counts())

# Sample for testing (if specified)
if CONFIG['sample_size'] is not None:
    print(f"\n✂️  Sampling {CONFIG['sample_size']} examples for testing...")
    df_test = df.sample(n=min(CONFIG['sample_size'], len(df)), random_state=CONFIG['random_seed'])
else:
    df_test = df.copy()

print(f"\n✅ Test set size: {len(df_test)} samples")

📊 Loading dataset...

📈 Dataset shape: (194913, 4)

📋 Columns: ['text', 'binary_label', 'detailed_category', 'data_type']

🔍 First few rows:


Unnamed: 0,text,binary_label,detailed_category,data_type
0,"Position Summary The Asset Manager will plan, ...",1,job_scam,text_classification
1,We offer interns that can develop web sites re...,1,job_scam,text_classification
2,We are a Health Benefits company. Helping peop...,1,job_scam,text_classification
3,Apply using below link Clinical Director - Sur...,1,job_scam,text_classification
4,Looking for an Assistant Accountant to join a ...,1,job_scam,text_classification



📊 Category distribution:
detailed_category
legitimate           101717
phishing              71857
popup_scam            11333
sms_spam               6988
reward_scam             606
tech_support_scam       605
refund_scam             604
ssn_scam                604
job_scam                599
Name: count, dtype: int64

✂️  Sampling 500 examples for testing...

✅ Test set size: 500 samples


## 4️⃣ Load Model with LoRA Adapter

In [7]:
print("🤖 Loading model and LoRA adapter...")

# Configure 4-bit quantization for efficient inference
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# Load base model
print("\n📥 Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
    CONFIG['model_name'],
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

# Load tokenizer
print("📥 Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(CONFIG['adapter_path'])

# Ensure tokenizer has padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

# Load LoRA adapter
print("📥 Loading LoRA adapter...")
model = PeftModel.from_pretrained(base_model, CONFIG['adapter_path'])
model.eval()

print("\n✅ Model loaded successfully!")
print(f"🔧 Model device: {next(model.parameters()).device}")
print(f"🔧 Model dtype: {next(model.parameters()).dtype}")

🤖 Loading model and LoRA adapter...


PackageNotFoundError: No package metadata was found for bitsandbytes

## 5️⃣ Prompt Templates

In [None]:
def create_classification_prompt(text):
    """Create prompt for classification task"""
    prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are an expert fraud detection system. Analyze the given message and classify it into one of these categories:
- job_scam: Fraudulent job offers or work-from-home scams
- phishing: Attempts to steal personal information or credentials
- popup_scam: Fake alerts or popup warnings
- refund_scam: Fake refund or payment requests
- reward_scam: Fake prize or lottery winnings
- sms_spam: Unsolicited commercial messages
- ssn_scam: Social security number theft attempts
- tech_support_scam: Fake technical support scams
- legitimate: Genuine, non-fraudulent messages

Respond with ONLY the category name.<|eot_id|><|start_header_id|>user<|end_header_id|>

Classify this message:
{text[:500]}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"""
    return prompt

def create_reasoning_prompt(text):
    """Create prompt for reasoning/explanation task"""
    prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are an expert fraud detection system. Analyze the given message and:
1. Determine if it's fraudulent or legitimate
2. Identify the specific type of fraud (if applicable)
3. Provide a detailed explanation of your reasoning
4. Point out specific red flags or indicators

Be thorough and educational in your explanation.<|eot_id|><|start_header_id|>user<|end_header_id|>

Analyze this message and explain your reasoning:
{text[:500]}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"""
    return prompt

# Test prompts
test_text = "URGENT: Your bank account has been compromised. Click here immediately to secure it."
print("🧪 Sample Classification Prompt:")
print("="*80)
print(create_classification_prompt(test_text))
print("\n" + "="*80)
print("\n🧪 Sample Reasoning Prompt:")
print("="*80)
print(create_reasoning_prompt(test_text))

## 6️⃣ Inference Functions

In [None]:
def generate_response(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=50):
    """Generate response from the model"""
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=CONFIG['max_length'])
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_length,
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
            repetition_penalty=CONFIG['repetition_penalty'],
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract only the assistant's response
    if "assistant" in response:
        response = response.split("assistant")[-1].strip()
    
    return response

def classify_message(text):
    """Classify a message into fraud category"""
    prompt = create_classification_prompt(text)
    response = generate_response(prompt, max_length=50, temperature=0.3)
    
    # Extract category from response
    response_lower = response.lower().strip()
    
    # Try to find exact match
    for category in FRAUD_CATEGORIES:
        if category in response_lower:
            return category
    
    # Return the first line of response if no match found
    return response_lower.split('\n')[0].strip()

def explain_reasoning(text):
    """Generate explanation for fraud detection"""
    prompt = create_reasoning_prompt(text)
    response = generate_response(
        prompt, 
        max_length=CONFIG['generation_max_length'],
        temperature=CONFIG['temperature']
    )
    return response

print("✅ Inference functions defined")

## 7️⃣ Test on Sample Messages

In [None]:
# Test with a few examples first
sample_messages = [
    {
        'text': "URGENT: Your bank account has been compromised. Click here to verify your identity immediately or your account will be closed.",
        'expected': 'phishing'
    },
    {
        'text': "Hi John, thanks for your email. The meeting is scheduled for tomorrow at 2 PM in conference room B. See you there!",
        'expected': 'legitimate'
    },
    {
        'text': "Congratulations! You've won $10,000 in our lottery. Send your bank details to claim your prize.",
        'expected': 'reward_scam'
    },
    {
        'text': "ALERT: Your computer is infected with 5 viruses! Call 1-800-FAKE-TECH immediately for free virus removal.",
        'expected': 'tech_support_scam'
    }
]

print("🧪 Testing model on sample messages\n")
print("="*100)

for i, sample in enumerate(sample_messages, 1):
    print(f"\n📝 Example {i}:")
    print(f"Text: {sample['text'][:100]}...")
    print(f"Expected: {sample['expected']}")
    
    # Classification
    predicted = classify_message(sample['text'])
    print(f"Predicted: {predicted}")
    print(f"Match: {'✅' if predicted == sample['expected'] else '❌'}")
    
    # Reasoning
    print(f"\n💭 Reasoning:")
    reasoning = explain_reasoning(sample['text'])
    print(reasoning[:300] + "..." if len(reasoning) > 300 else reasoning)
    print("\n" + "-"*100)

## 8️⃣ Batch Classification on Test Set

In [None]:
print("🚀 Running batch classification on test set...\n")

# Store results
predictions = []
ground_truth = []

# Process in batches
for idx, row in tqdm(df_test.iterrows(), total=len(df_test), desc="Classifying"):
    text = row['text']
    true_label = row['detailed_category']
    
    try:
        predicted_label = classify_message(text)
        predictions.append(predicted_label)
        ground_truth.append(true_label)
    except Exception as e:
        print(f"\n⚠️  Error processing sample {idx}: {e}")
        predictions.append('error')
        ground_truth.append(true_label)

print("\n✅ Classification completed!")

## 9️⃣ Evaluation Metrics

In [None]:
# Calculate accuracy
accuracy = accuracy_score(ground_truth, predictions)
print(f"\n📊 Overall Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)\n")

# Calculate precision, recall, F1 per class
precision, recall, f1, support = precision_recall_fscore_support(
    ground_truth, 
    predictions, 
    labels=FRAUD_CATEGORIES,
    average=None,
    zero_division=0
)

# Create metrics dataframe
metrics_df = pd.DataFrame({
    'Category': FRAUD_CATEGORIES,
    'Precision': precision,
    'Recall': recall,
    'F1-Score': f1,
    'Support': support
})

print("\n📈 Per-Category Metrics:")
print("="*80)
display(metrics_df.sort_values('F1-Score', ascending=False))

# Calculate macro and weighted averages
precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
    ground_truth, predictions, average='macro', zero_division=0
)
precision_weighted, recall_weighted, f1_weighted, _ = precision_recall_fscore_support(
    ground_truth, predictions, average='weighted', zero_division=0
)

print("\n📊 Average Metrics:")
print("="*80)
print(f"Macro Avg    - Precision: {precision_macro:.4f}, Recall: {recall_macro:.4f}, F1: {f1_macro:.4f}")
print(f"Weighted Avg - Precision: {precision_weighted:.4f}, Recall: {recall_weighted:.4f}, F1: {f1_weighted:.4f}")

## 🔟 Detailed Classification Report

In [None]:
print("\n📋 Detailed Classification Report:")
print("="*80)
print(classification_report(ground_truth, predictions, labels=FRAUD_CATEGORIES, zero_division=0))

## 1️⃣1️⃣ Confusion Matrix

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Compute confusion matrix
cm = confusion_matrix(ground_truth, predictions, labels=FRAUD_CATEGORIES)

# Plot confusion matrix
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=FRAUD_CATEGORIES, 
            yticklabels=FRAUD_CATEGORIES)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Scam Detector LoRA')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\n💾 Confusion matrix saved to: {OUTPUT_DIR / 'confusion_matrix.png'}")

## 1️⃣2️⃣ Generate Reasoning for Sample Cases

In [None]:
# Sample cases for reasoning generation (correct and incorrect predictions)
print("🧠 Generating detailed reasoning for sample cases...\n")
print("="*100)

# Get some correct predictions
correct_mask = [p == t for p, t in zip(predictions, ground_truth)]
correct_indices = [i for i, correct in enumerate(correct_mask) if correct]

# Get some incorrect predictions
incorrect_indices = [i for i, correct in enumerate(correct_mask) if not correct]

# Sample 3 correct and 3 incorrect
sample_correct = np.random.choice(correct_indices, min(3, len(correct_indices)), replace=False)
sample_incorrect = np.random.choice(incorrect_indices, min(3, len(incorrect_indices)), replace=False)

reasoning_results = []

print("\n✅ CORRECT PREDICTIONS:\n")
for idx in sample_correct:
    row = df_test.iloc[idx]
    text = row['text']
    true_label = ground_truth[idx]
    pred_label = predictions[idx]
    
    print(f"\n📝 Text: {text[:200]}...")
    print(f"✅ True: {true_label} | Predicted: {pred_label}")
    print(f"\n💭 Reasoning:")
    reasoning = explain_reasoning(text)
    print(reasoning)
    print("\n" + "-"*100)
    
    reasoning_results.append({
        'text': text,
        'true_label': true_label,
        'predicted_label': pred_label,
        'correct': True,
        'reasoning': reasoning
    })

print("\n\n❌ INCORRECT PREDICTIONS:\n")
for idx in sample_incorrect:
    row = df_test.iloc[idx]
    text = row['text']
    true_label = ground_truth[idx]
    pred_label = predictions[idx]
    
    print(f"\n📝 Text: {text[:200]}...")
    print(f"❌ True: {true_label} | Predicted: {pred_label}")
    print(f"\n💭 Reasoning:")
    reasoning = explain_reasoning(text)
    print(reasoning)
    print("\n" + "-"*100)
    
    reasoning_results.append({
        'text': text,
        'true_label': true_label,
        'predicted_label': pred_label,
        'correct': False,
        'reasoning': reasoning
    })

## 1️⃣3️⃣ Save Results

In [None]:
# Create results dataframe
results_df = df_test.copy()
results_df['predicted_category'] = predictions
results_df['correct'] = [p == t for p, t in zip(predictions, ground_truth)]

# Save classification results
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
results_file = OUTPUT_DIR / f'scam_detector_lora_results_{timestamp}.csv'
results_df.to_csv(results_file, index=False)
print(f"\n💾 Classification results saved to: {results_file}")

# Save reasoning results
reasoning_df = pd.DataFrame(reasoning_results)
reasoning_file = OUTPUT_DIR / f'scam_detector_lora_reasoning_{timestamp}.csv'
reasoning_df.to_csv(reasoning_file, index=False)
print(f"💾 Reasoning results saved to: {reasoning_file}")

# Save metrics
metrics_file = OUTPUT_DIR / f'scam_detector_lora_metrics_{timestamp}.csv'
metrics_df.to_csv(metrics_file, index=False)
print(f"💾 Metrics saved to: {metrics_file}")

# Save summary report
summary_file = OUTPUT_DIR / f'scam_detector_lora_summary_{timestamp}.txt'
with open(summary_file, 'w') as f:
    f.write("="*80 + "\n")
    f.write("SCAM DETECTOR LORA - TEST RESULTS SUMMARY\n")
    f.write("="*80 + "\n\n")
    f.write(f"Test Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    f.write(f"Model: {CONFIG['model_name']}\n")
    f.write(f"Adapter: {CONFIG['adapter_path']}\n")
    f.write(f"Test Samples: {len(df_test)}\n\n")
    f.write(f"Overall Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)\n\n")
    f.write(f"Macro Avg    - Precision: {precision_macro:.4f}, Recall: {recall_macro:.4f}, F1: {f1_macro:.4f}\n")
    f.write(f"Weighted Avg - Precision: {precision_weighted:.4f}, Recall: {recall_weighted:.4f}, F1: {f1_weighted:.4f}\n\n")
    f.write("="*80 + "\n")
    f.write("PER-CATEGORY METRICS\n")
    f.write("="*80 + "\n\n")
    f.write(metrics_df.to_string())
    f.write("\n\n")
    f.write("="*80 + "\n")
    f.write("CLASSIFICATION REPORT\n")
    f.write("="*80 + "\n\n")
    f.write(classification_report(ground_truth, predictions, labels=FRAUD_CATEGORIES, zero_division=0))

print(f"💾 Summary report saved to: {summary_file}")
print("\n✅ All results saved successfully!")

## 1️⃣4️⃣ Summary Statistics

In [None]:
print("\n" + "="*80)
print("📊 FINAL SUMMARY")
print("="*80)
print(f"\n🎯 Overall Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"\n📈 Best Performing Categories (by F1-Score):")
top_categories = metrics_df.nlargest(3, 'F1-Score')[['Category', 'F1-Score', 'Support']]
for _, row in top_categories.iterrows():
    print(f"   {row['Category']:20s} F1: {row['F1-Score']:.4f} (n={int(row['Support'])})")

print(f"\n📉 Categories Needing Improvement (by F1-Score):")
bottom_categories = metrics_df.nsmallest(3, 'F1-Score')[['Category', 'F1-Score', 'Support']]
for _, row in bottom_categories.iterrows():
    print(f"   {row['Category']:20s} F1: {row['F1-Score']:.4f} (n={int(row['Support'])})")

print(f"\n🎯 Fraud Detection Performance:")
fraud_categories = [c for c in FRAUD_CATEGORIES if c != 'legitimate']
fraud_mask = [t in fraud_categories for t in ground_truth]
fraud_accuracy = accuracy_score(
    [ground_truth[i] for i, is_fraud in enumerate(fraud_mask) if is_fraud],
    [predictions[i] for i, is_fraud in enumerate(fraud_mask) if is_fraud]
)
print(f"   Fraud Category Accuracy: {fraud_accuracy:.4f} ({fraud_accuracy*100:.2f}%)")

# Binary classification (fraud vs legitimate)
binary_ground_truth = ['fraud' if t != 'legitimate' else 'legitimate' for t in ground_truth]
binary_predictions = ['fraud' if p != 'legitimate' else 'legitimate' for p in predictions]
binary_accuracy = accuracy_score(binary_ground_truth, binary_predictions)
print(f"   Binary Classification (Fraud vs Legitimate): {binary_accuracy:.4f} ({binary_accuracy*100:.2f}%)")

print("\n" + "="*80)
print("✅ Testing completed successfully!")
print("="*80)