# Spam Detection Analysis Notebook

This notebook provides an interactive analysis of the spam detection project results.

## Overview
- Load and visualize training results
- Analyze model performance
- Explore clustering results
- Test predictions interactively


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import os
from sklearn.metrics import classification_report, confusion_matrix

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("Libraries imported successfully!")


## 1. Load Data and Results


In [None]:
# Load preprocessed data
train_df = pd.read_csv('outputs/train.csv')
val_df = pd.read_csv('outputs/val.csv')
test_df = pd.read_csv('outputs/test.csv')

print(f"Training data: {len(train_df)} messages")
print(f"Validation data: {len(val_df)} messages")
print(f"Test data: {len(test_df)} messages")

# Display class distribution
print("\nClass distribution:")
print(train_df['label'].value_counts())


## 2. Load Trained Model


In [None]:
# Load the best trained model
model_path = 'outputs/models/spam_pipeline.joblib'
if os.path.exists(model_path):
    model = joblib.load(model_path)
    print("Model loaded successfully!")
    print(f"Model type: {type(model).__name__}")
else:
    print("Model not found. Please run the training pipeline first.")
    model = None


## 3. Interactive Prediction Testing


In [None]:
def predict_message_interactive(message):
    """
    Interactive prediction function.
    """
    if model is None:
        print("Model not loaded. Please run training first.")
        return
    
    # Make prediction
    prediction = model.predict([message])[0]
    
    # Get confidence
    try:
        proba = model.predict_proba([message])[0]
        confidence = max(proba)
    except AttributeError:
        decision = model.decision_function([message])[0]
        confidence = 1 / (1 + abs(decision))
    
    # Format result
    if prediction == 'spam':
        emoji = "🚨"
        status = "SPAM"
    else:
        emoji = "✅"
        status = "NOT SPAM"
    
    print(f"Message: {message}")
    print(f"Result: {emoji} {status} (confidence: {confidence:.2%})")

# Test with sample messages
test_messages = [
    "Congratulations! You have won $1000 cash prize!",
    "Hey, how are you doing today?",
    "URGENT! Your account has been compromised. Click here to verify.",
    "Thanks for the great presentation today.",
    "Free entry in 2 a wkly comp to win FA Cup final tkts."
]

print("Testing sample messages:")
print("=" * 50)
for msg in test_messages:
    predict_message_interactive(msg)
    print("-" * 30)


## 4. Custom Message Testing


In [None]:
# Test your own messages here
custom_message = "Enter your message here to test the spam detector"

if custom_message != "Enter your message here to test the spam detector":
    predict_message_interactive(custom_message)
else:
    print("Please modify the 'custom_message' variable above to test your own message.")
