# Phase 7: Fine-Tuning LLM for Phishing Detection

## Setup Instructions

1. Open this notebook in Google Colab
2. Go to Runtime > Change runtime type > Select GPU (T4)
3. Upload your datasets to Colab or mount Google Drive
4. Run all cells

**Expected Time**: 15-30 minutes with GPU

## Step 1: Install Dependencies

In [None]:
!pip install -q transformers datasets accelerate peft bitsandbytes trl pandas scikit-learn

## Step 2: Check GPU

In [None]:
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## Step 3: Upload Datasets

Upload these files from your local machine:
- `enron_preprocessed_3k.csv`
- `combined_preprocessed_2k.csv`

Or mount Google Drive if you've uploaded them there.

In [None]:
# Option 1: Upload files directly
from google.colab import files
print("Upload enron_preprocessed_3k.csv:")
uploaded = files.upload()

print("\nUpload combined_preprocessed_2k.csv:")
uploaded = files.upload()

In [None]:
# Option 2: Mount Google Drive (uncomment if using Drive)
# from google.colab import drive
# drive.mount('/content/drive')
# 
# enron_path = '/content/drive/MyDrive/phishing-detection/enron_preprocessed_3k.csv'
# combined_path = '/content/drive/MyDrive/phishing-detection/combined_preprocessed_2k.csv'

## Step 4: Load and Prepare Data

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset

# Load datasets
print("Loading datasets...")
enron_df = pd.read_csv('enron_preprocessed_3k.csv')
combined_df = pd.read_csv('combined_preprocessed_2k.csv')

print(f"Enron: {len(enron_df)} emails")
print(f"Combined: {len(combined_df)} emails")

# Use Enron for training (80/20 split)
train_df, test_enron_df = train_test_split(
    enron_df, 
    test_size=0.2, 
    random_state=42, 
    stratify=enron_df['label']
)

print(f"\nTraining: {len(train_df)} emails")
print(f"Test (Enron): {len(test_enron_df)} emails")
print(f"Test (Combined): {len(combined_df)} emails")

## Step 5: Format Data for Training

In [None]:
def format_prompt(email_text, label=None):
    """Format email for instruction tuning"""
    prompt = f"""Classify this email as 'phishing' or 'legitimate'.

Email:
{email_text}

Classification:"""
    
    if label is not None:
        classification = "phishing" if label == 1 else "legitimate"
        return prompt + f" {classification}"
    return prompt

# Format training data
train_texts = [format_prompt(row['text'], row['label']) for _, row in train_df.iterrows()]

# Create HuggingFace dataset
train_dataset = Dataset.from_dict({"text": train_texts})

print(f"Training dataset: {len(train_dataset)} examples")
print(f"\nExample:\n{train_texts[0][:300]}...")

## Step 6: Load Model with LoRA

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Model configuration
model_name = "Qwen/Qwen2.5-1.5B-Instruct"

print(f"Loading model: {model_name}")

# 4-bit quantization for memory efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print("Model loaded successfully!")

## Step 7: Configure LoRA

In [None]:
# Prepare model for training
model = prepare_model_for_kbit_training(model)

# LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Add LoRA adapters
model = get_peft_model(model, lora_config)

# Print trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"Trainable parameters: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")

## Step 8: Training Configuration

In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer

# Training arguments
training_args = TrainingArguments(
    output_dir="./phishing-finetuned",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    optim="paged_adamw_8bit",
    warmup_steps=10,
    max_steps=200,  # Limit steps for faster training
    report_to="none"
)

# Create trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
    args=training_args,
    dataset_text_field="text",
    max_seq_length=512,
)

print("Trainer configured successfully!")

## Step 9: Train Model

**This will take 15-30 minutes**

In [None]:
import time

print("Starting training...")
print("This will take 15-30 minutes with GPU T4")
print("="*60)

start_time = time.time()

# Train
trainer.train()

end_time = time.time()
training_time = (end_time - start_time) / 60

print("="*60)
print(f"Training completed in {training_time:.2f} minutes!")

## Step 10: Save Model

In [None]:
# Save the fine-tuned model
model.save_pretrained("./phishing-finetuned-final")
tokenizer.save_pretrained("./phishing-finetuned-final")

print("Model saved to ./phishing-finetuned-final")

## Step 11: Evaluation Function

In [None]:
def classify_email(email_text, model, tokenizer):
    """Classify a single email"""
    prompt = format_prompt(email_text)
    
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            temperature=0.1,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response[len(prompt):].strip().lower()
    
    # Extract classification
    if "phishing" in response:
        return 1
    elif "legitimate" in response:
        return 0
    else:
        return None  # Failed to classify

def evaluate_dataset(df, model, tokenizer, dataset_name):
    """Evaluate model on a dataset"""
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    import time
    
    print(f"\nEvaluating on {dataset_name}...")
    
    predictions = []
    true_labels = []
    failed = 0
    
    start_time = time.time()
    
    for idx, row in df.iterrows():
        pred = classify_email(row['text'], model, tokenizer)
        
        if pred is not None:
            predictions.append(pred)
            true_labels.append(row['label'])
        else:
            failed += 1
        
        if (idx + 1) % 50 == 0:
            print(f"Processed {idx + 1}/{len(df)} emails...")
    
    end_time = time.time()
    total_time = end_time - start_time
    
    # Calculate metrics
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, zero_division=0)
    recall = recall_score(true_labels, predictions, zero_division=0)
    f1 = f1_score(true_labels, predictions, zero_division=0)
    speed = len(df) / total_time
    success_rate = (len(predictions) / len(df)) * 100
    
    print(f"\n{dataset_name} Results:")
    print("="*60)
    print(f"Accuracy:      {accuracy*100:.2f}%")
    print(f"Precision:     {precision*100:.2f}%")
    print(f"Recall:        {recall*100:.2f}%")
    print(f"F1 Score:      {f1*100:.2f}%")
    print(f"Speed:         {speed:.3f} emails/second")
    print(f"Success Rate:  {success_rate:.2f}% ({len(predictions)}/{len(df)})")
    print(f"Failed:        {failed}")
    print("="*60)
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'speed': speed,
        'success_rate': success_rate,
        'failed': failed
    }

## Step 12: Evaluate on Enron Test Set

In [None]:
# Sample 100 emails for faster evaluation
test_enron_sample = test_enron_df.sample(n=min(100, len(test_enron_df)), random_state=42)

enron_results = evaluate_dataset(test_enron_sample, model, tokenizer, "Enron Test Set")

## Step 13: Evaluate on Combined Dataset

In [None]:
# Sample 100 emails for faster evaluation
combined_sample = combined_df.sample(n=min(100, len(combined_df)), random_state=42)

combined_results = evaluate_dataset(combined_sample, model, tokenizer, "Combined Dataset")

## Step 14: Compare with Previous Results

In [None]:
print("\n" + "="*80)
print("COMPARISON WITH OTHER APPROACHES")
print("="*80)

print("\nEnron Dataset:")
print("-" * 80)
print(f"{'Approach':<30} {'Accuracy':<12} {'F1 Score':<12} {'Speed (emails/s)':<20}")
print("-" * 80)
print(f"{'Traditional ML':<30} {'98.00%':<12} {'98.03%':<12} {'601,765':<20}")
print(f"{'Single LLM (Zero-Shot)':<30} {'91.00%':<12} {'90.53%':<12} {'0.625':<20}")
print(f"{'Fine-Tuned LLM (NEW)':<30} {f'{enron_results["accuracy"]*100:.2f}%':<12} {f'{enron_results["f1"]*100:.2f}%':<12} {f'{enron_results["speed"]:.3f}':<20}")
print(f"{'Debate System':<30} {'76.00%':<12} {'72.09%':<12} {'0.133':<20}")
print(f"{'LangGraph':<30} {'55.00%':<12} {'18.18%':<12} {'0.165':<20}")

print("\nCombined Dataset:")
print("-" * 80)
print(f"{'Approach':<30} {'Accuracy':<12} {'F1 Score':<12} {'Speed (emails/s)':<20}")
print("-" * 80)
print(f"{'Traditional ML':<30} {'99.50%':<12} {'99.50%':<12} {'125,178':<20}")
print(f"{'Single LLM (Zero-Shot)':<30} {'97.00%':<12} {'96.70%':<12} {'0.453':<20}")
print(f"{'Fine-Tuned LLM (NEW)':<30} {f'{combined_results["accuracy"]*100:.2f}%':<12} {f'{combined_results["f1"]*100:.2f}%':<12} {f'{combined_results["speed"]:.3f}':<20}")
print(f"{'Debate System':<30} {'54.00%':<12} {'4.17%':<12} {'0.120':<20}")
print(f"{'LangGraph':<30} {'53.00%':<12} {'0.00%':<12} {'0.145':<20}")
print("="*80)

## Step 15: Save Results

In [None]:
import json

results = {
    'enron': enron_results,
    'combined': combined_results,
    'training_time_minutes': training_time
}

with open('phase7_finetuned_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print("Results saved to phase7_finetuned_results.json")

# Download results
files.download('phase7_finetuned_results.json')

## Step 16: Test Individual Emails (Optional)

In [None]:
# Test on a few examples
test_emails = [
    "Dear user, your account has been suspended. Click here to verify your identity immediately.",
    "Hi team, the meeting is scheduled for tomorrow at 2 PM in conference room B.",
    "URGENT: Your PayPal account will be closed unless you update your information now!"
]

print("Testing individual emails:\n")
for i, email in enumerate(test_emails, 1):
    pred = classify_email(email, model, tokenizer)
    classification = "PHISHING" if pred == 1 else "LEGITIMATE" if pred == 0 else "FAILED"
    print(f"Email {i}: {classification}")
    print(f"Text: {email[:100]}...\n")

## Summary

✅ Model fine-tuned successfully  
✅ Evaluated on both datasets  
✅ Results saved  

**Next Steps:**
1. Download the results JSON file
2. Update your project documentation with Phase 7 results
3. Compare fine-tuned performance with other approaches

**Expected Improvement:**
- Fine-tuned model should perform 2-5% better than zero-shot LLM
- Target: 93-97% accuracy (between zero-shot LLM and traditional ML)