In [None]:
# Cell 1: Setup
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.email_detector.detector import EmailPhishingDetector
from src.utils.data_loader import create_sample_email_data
from src.utils.visualization import plot_confusion_matrix

%matplotlib inline
print("‚úÖ Setup complete!")


In [None]:
# Cell 2: Load and Prepare Data
emails_df = create_sample_email_data()
labels = emails_df['label'].tolist()

print(f"üìß Total emails: {len(emails_df)}")
print(f"üî¥ Phishing: {sum(labels)}")
print(f"üü¢ Safe: {len(labels) - sum(labels)}")

In [None]:
# Cell 3: Train Email Detector
detector = EmailPhishingDetector()

print("üöÄ Training email phishing detector...")
features = detector.train(emails_df, labels)

print(f"\n‚úÖ Training complete!")
print(f"üìä Total features: {len(detector.feature_names)}")


In [None]:
# Cell 4: Feature Importance
feature_importance = detector.model.feature_importances_
top_indices = np.argsort(feature_importance)[-15:][::-1]

print("üéØ Top 15 Most Important Features:")
for i, idx in enumerate(top_indices, 1):
    print(f"{i}. {detector.feature_names[idx]}: {feature_importance[idx]:.4f}")

# Visualize
plt.figure(figsize=(10, 6))
plt.barh(range(len(top_indices)), feature_importance[top_indices])
plt.yticks(range(len(top_indices)), [detector.feature_names[i] for i in top_indices])
plt.xlabel('Importance')
plt.title('Top 15 Feature Importances')
plt.tight_layout()
plt.show()

In [None]:
# Cell 5: Test Predictions
test_emails = [
    {
        'text': "URGENT! Your account will be suspended! Click here NOW!",
        'sender': "security@fake-bank.com",
        'subject': "URGENT ACTION REQUIRED"
    },
    {
        'text': "Hi team, please review the attached document for our meeting.",
        'sender': "colleague@company.com",
        'subject': "Meeting Document"
    }
]

print("üß™ Testing predictions:\n")

for i, email in enumerate(test_emails, 1):
    result = detector.predict_with_explanation(
        email['text'], 
        email['sender'], 
        email['subject']
    )
    
    print(f"Test Email {i}:")
    print(f"  Subject: {email['subject']}")
    print(f"  Prediction: {result['prediction']}")
    print(f"  Confidence: {result['confidence']:.1f}%")
    print(f"  Risk Factors: {len(result['risk_factors'])}")
    for factor in result['risk_factors'][:3]:
        print(f"    ‚Ä¢ {factor}")
    print()

In [None]:
# Cell 6: XAI - LIME Explanation
print("üîç Explainable AI Analysis:")

result = detector.predict_with_explanation(
    "URGENT! Your PayPal account will be suspended! Verify now!",
    "security@paypal-fake.com",
    "Account Suspension Warning"
)

print(f"\nPrediction: {result['prediction']}")
print(f"Confidence: {result['confidence']:.1f}%")

print("\nüí° Top LIME Explanations:")
for feature, weight in result['lime_explanation'][:10]:
    direction = "‚ÜóÔ∏è Increases" if weight > 0 else "‚ÜòÔ∏è Decreases"
    print(f"  {feature}: {direction} phishing risk ({weight:.3f})")


In [None]:
# Cell 7: Save Model
import os
os.makedirs('../models', exist_ok=True)

model_path = '../models/email_detector_v1.pkl'
detector.save_model(model_path)

print(f"‚úÖ Model saved to {model_path}")
print("\n‚û°Ô∏è Next: Web log analysis in 03_web_log_analysis.ipynb")