In [2]:
pip install streamlit scikit-learn pandas numpy

Collecting streamlit
  Downloading streamlit-1.49.1-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.49.1-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m61.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m95.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.49.1


In [1]:
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# ---------------------------
# Load Dataset
# ---------------------------
data = pd.read_csv("spam_ham_dataset.csv")  # replace with your dataset filename

# Features and Target
X = data["text"]
y = data["label"]   # or use 'label_num' if already numeric

# Encode labels to numeric if needed
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# ---------------------------
# Text Preprocessing using TF-IDF
# ---------------------------
vectorizer = TfidfVectorizer(stop_words='english', max_features=3000)  # limit to top 3000 words
X_tfidf = vectorizer.fit_transform(X)



In [3]:
# ---------------------------
# Train-test split
# ---------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_tfidf, y_encoded, test_size=0.2, random_state=42
)

# ---------------------------
# Train Naive Bayes Model
# ---------------------------
model = MultinomialNB()
model.fit(X_train, y_train)

# ---------------------------
# Classification Metrics
# ---------------------------
y_pred = model.predict(X_test)

print(f"\n--- Classification Metrics ---")
print("Accuracy :", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average="weighted"))
print("Recall   :", recall_score(y_test, y_pred, average="weighted"))
print("F1-score :", f1_score(y_test, y_pred, average="weighted"))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=encoder.classes_))




--- Classification Metrics ---
Accuracy : 0.9449275362318841
Precision: 0.9464348470925827
Recall   : 0.9449275362318841
F1-score : 0.9453847504718782

Confusion Matrix:
 [[705  37]
 [ 20 273]]

Classification Report:
               precision    recall  f1-score   support

         ham       0.97      0.95      0.96       742
        spam       0.88      0.93      0.91       293

    accuracy                           0.94      1035
   macro avg       0.93      0.94      0.93      1035
weighted avg       0.95      0.94      0.95      1035



In [4]:
# ---------------------------
# Gradio Interface
# ---------------------------
def predict_spam(message):
    # Transform the input message using the same TF-IDF vectorizer
    msg_tfidf = vectorizer.transform([message])
    prediction = model.predict(msg_tfidf)[0]
    label = encoder.inverse_transform([prediction])[0]
    return f"This message is classified as: {label.upper()}"

demo = gr.Interface(
    fn=predict_spam,
    inputs=gr.Textbox(label="Enter email message"),
    outputs="text",
    title="📧 Spam Mail Detection using Naive Bayes",
    description="Enter a message to classify as HAM (not spam) or SPAM.\nModel uses TF-IDF + MultinomialNB."
)

demo.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://48f51acf41b6e7c1ec.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


