<a href="https://colab.research.google.com/github/Naqeebullah11/Abasyn_Internship-ML/blob/main/NLP_Based_Spam_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

NLP Text Classification with Gradio




# 1. Import Libraries

In [None]:
!pip install -q scikit-learn pandas gradio

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import precision_recall_curve, classification_report
import gradio as gr


# 2. Load Dataset (Spam or Not Spam)


In [None]:
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep="\t", header=None, names=["label","message"])
df["label"] = df["label"].map({"ham":0, "spam":1})


# 3. Train-Test Split


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    df["message"], df["label"], test_size=0.2, random_state=42, stratify=df["label"]
)

# Build pipeline


In [None]:
model = Pipeline([
    ("tfidf", TfidfVectorizer(stop_words="english", ngram_range=(1,2), min_df=2, max_df=0.95)),
    ("clf", LogisticRegression(solver="liblinear", class_weight="balanced"))
])

# Train model


In [None]:
model.fit(X_train, y_train)

# Evaluate on test set


In [None]:
y_proba = model.predict_proba(X_test)[:,1]
y_pred = model.predict(X_test)
print("ðŸ“Š Model Performance Report:")
print(classification_report(y_test, y_pred, target_names=["Not Spam","Spam"]))

ðŸ“Š Model Performance Report:
              precision    recall  f1-score   support

    Not Spam       0.99      0.98      0.99       966
        Spam       0.90      0.93      0.91       149

    accuracy                           0.98      1115
   macro avg       0.94      0.95      0.95      1115
weighted avg       0.98      0.98      0.98      1115



# Find best threshold (maximize F1-score)


In [None]:
precisions, recalls, thresholds = precision_recall_curve(y_test, y_proba)
f1_scores = 2 * (precisions * recalls) / (precisions + recalls + 1e-6)
best_threshold = thresholds[np.argmax(f1_scores)]
print(f"\nâœ… Best threshold (auto-selected): {best_threshold:.2f}")



âœ… Best threshold (auto-selected): 0.66


# Prediction function for Gradio


In [None]:
def predict_message(text, threshold=best_threshold):
    proba = model.predict_proba([text])[0,1]
    label = "ðŸš¨ Spam" if proba >= threshold else "âœ… Not Spam"
    return {label: float(proba) if proba >= threshold else 1-float(proba)}


# Gradio Interface


In [None]:
demo = gr.Interface(
    fn=predict_message,
    inputs=[
        gr.Textbox(lines=3, placeholder="Type a message..."),
        gr.Slider(0.1, 0.9, value=float(best_threshold), step=0.05, label="Spam Threshold")
    ],
    outputs=gr.Label(num_top_classes=2),
    title="ðŸ“§ Spam Detector (AI Project)",
    description="Enter a message to check if it's Spam or Not Spam. The threshold is auto-optimized for best accuracy."
)

demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://43300cf8aef0a7932d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


