# Email Spam Classifier: Model Evaluation & Streamlit GUI

This notebook visualizes the performance of the trained VotingClassifier (MultinomialNB + RandomForest) and provides a Streamlit GUI for interactive predictions.

In [None]:
# Import Required Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix, classification_report, roc_curve, auc, precision_recall_curve
import streamlit as st
import os
from project.preprocessing import batch_preprocess

In [None]:
# Load Trained Model and Vectorizer
MODEL_PATH = os.path.join('project', 'final_model.pkl')
DATA_PATH = '190k_mails_spam.csv'  # Adjust if needed

# Load model and vectorizer
model_bundle = joblib.load(MODEL_PATH)
model = model_bundle['model']
vectorizer = model_bundle['vectorizer']

# Load data
df = pd.read_csv(DATA_PATH)
df.columns = [c.lower() for c in df.columns]
text_col = next((c for c in df.columns if 'text' in c or 'message' in c or 'email' in c), df.columns[0])
label_col = next((c for c in df.columns if 'label' in c or 'spam' in c or 'target' in c), df.columns[-1])
df = df[[text_col, label_col]].dropna()

# Preprocess text
df['clean_text'] = batch_preprocess(df[text_col].astype(str).tolist())

# 66-22-12 split (same as training)
from sklearn.model_selection import train_test_split
train_df, temp_df = train_test_split(df, test_size=0.34, random_state=42, stratify=df[label_col])
test_df, val_df = train_test_split(temp_df, test_size=0.353, random_state=42, stratify=temp_df[label_col])

X_test = vectorizer.transform(test_df['clean_text'])
y_test = test_df[label_col]
X_val = vectorizer.transform(val_df['clean_text'])
y_val = val_df[label_col]

In [None]:
# Define Prediction Function
def predict_spam(texts):
    clean_texts = batch_preprocess(texts)
    X = vectorizer.transform(clean_texts)
    preds = model.predict(X)
    probs = model.predict_proba(X) if hasattr(model, 'predict_proba') else None
    return preds, probs

In [None]:
# Visualize Accuracy and Precision
from sklearn.metrics import precision_score

y_test_pred = model.predict(X_test)
y_val_pred = model.predict(X_val)

acc_test = accuracy_score(y_test, y_test_pred)
prec_test = precision_score(y_test, y_test_pred)
acc_val = accuracy_score(y_val, y_val_pred)
prec_val = precision_score(y_val, y_val_pred)

metrics_df = pd.DataFrame({
    'Set': ['Test', 'Validation'],
    'Accuracy': [acc_test, acc_val],
    'Precision': [prec_test, prec_val]
})

metrics_df.set_index('Set')[['Accuracy', 'Precision']].plot(kind='bar', ylim=(0,1), figsize=(7,4), title='Accuracy & Precision')
plt.ylabel('Score')
plt.show()

In [None]:
# Plot Confusion Matrix
fig, axes = plt.subplots(1, 2, figsize=(12,5))
cm_test = confusion_matrix(y_test, y_test_pred)
cm_val = confusion_matrix(y_val, y_val_pred)
sns.heatmap(cm_test, annot=True, fmt='d', cmap='Blues', ax=axes[0])
axes[0].set_title('Test Set Confusion Matrix')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')
sns.heatmap(cm_val, annot=True, fmt='d', cmap='Greens', ax=axes[1])
axes[1].set_title('Validation Set Confusion Matrix')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('Actual')
plt.tight_layout()
plt.show()

In [None]:
# Visualize Model Performance (ROC Curve, Precision-Recall Curve)
from sklearn.preprocessing import label_binarize

# Binarize labels if not already 0/1
if set(y_test.unique()) != {0,1}:
    y_test_bin = label_binarize(y_test, classes=[y_test.min(), y_test.max()]).ravel()
else:
    y_test_bin = y_test

if hasattr(model, 'predict_proba'):
    y_score = model.predict_proba(X_test)[:,1]
else:
    y_score = model.decision_function(X_test)

fpr, tpr, _ = roc_curve(y_test_bin, y_score)
roc_auc = auc(fpr, tpr)

precision, recall, _ = precision_recall_curve(y_test_bin, y_score)

plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0,1],[0,1],'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()

plt.subplot(1,2,2)
plt.plot(recall, precision, label='Precision-Recall curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Example: Predict on Sample Texts
sample_texts = [
    "Congratulations! You've won a free ticket. Reply to claim.",
    "Hi, can we reschedule our meeting to tomorrow?",
    "Urgent: Your account will be suspended unless you verify now.",
    "Lunch at 1pm? Let me know."
]
preds, probs = predict_spam(sample_texts)
for text, pred, prob in zip(sample_texts, preds, probs):
    print(f"Text: {text}\nPrediction: {'Spam' if pred else 'Ham'} | Probability (Spam): {prob[1]:.2f}\n")

In [None]:
# Streamlit GUI for Prediction
st.title('Email Spam Classifier')
st.write('Enter your email text below and click Predict to see if it is spam or not.')

user_input = st.text_area('Email Text', '')
if st.button('Predict'):
    if user_input.strip():
        pred, prob = predict_spam([user_input])
        label = 'Spam' if pred[0] else 'Ham'
        st.write(f'**Prediction:** {label}')
        if prob is not None:
            st.write(f'**Probability (Spam):** {prob[0][1]:.2f}')
    else:
        st.warning('Please enter some text.')