In [1]:
!pip install gradio nltk scikit-learn pandas

Collecting gradio
  Downloading gradio-5.29.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.0 (from gradio)
  Downloading gradio_client-1.10.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6

In [4]:
import pandas as pd
import numpy as np
import string
import nltk
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from nltk.corpus import stopwords

# Download stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Load dataset
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep='\t', header=None, names=['label', 'text'])

# Preprocessing
def preprocess(text):
    text = text.lower()
    text = ''.join([ch for ch in text if ch not in string.punctuation])
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return ' '.join(words)

df['cleaned'] = df['text'].apply(preprocess)

# Feature Extraction
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['cleaned'])
y = df['label'].map({'ham': 0, 'spam': 1})

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = MultinomialNB()
model.fit(X_train, y_train)

# Evaluate
acc = accuracy_score(y_test, model.predict(X_test))

# Custom CSS for beautiful UI
custom_css = """
:root {
    --primary: #6e48aa;
    --secondary: #9d50bb;
    --accent: #4776E6;
    --dark: #121212;
    --light: #f8f9fa;
    --success: #28a745;
    --danger: #dc3545;
}

body {
    background: linear-gradient(135deg, var(--dark), #1a1a2e) !important;
    color: var(--light) !important;
    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
}

.gradio-container {
    max-width: 800px !important;
    margin: 2rem auto !important;
    border-radius: 15px !important;
    box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3) !important;
    background: rgba(30, 30, 30, 0.8) !important;
    backdrop-filter: blur(10px) !important;
    border: 1px solid rgba(255, 255, 255, 0.1) !important;
}

.gr-interface {
    padding: 2rem !important;
}

h1 {
    text-align: center !important;
    background: linear-gradient(to right, var(--primary), var(--secondary)) !important;
    -webkit-background-clip: text !important;
    background-clip: text !important;
    color: transparent !important;
    font-weight: 700 !important;
    margin-bottom: 0.5rem !important;
}

.gr-textbox {
    border-radius: 10px !important;
    border: 1px solid rgba(255, 255, 255, 0.1) !important;
    background: rgba(20, 20, 20, 0.7) !important;
    color: white !important;
    padding: 1rem !important;
    font-size: 1rem !important;
    transition: all 0.3s ease !important;
}

.gr-textbox:focus {
    border-color: var(--accent) !important;
    box-shadow: 0 0 0 2px rgba(71, 118, 230, 0.3) !important;
}

.gr-button {
    background: linear-gradient(to right, var(--primary), var(--secondary)) !important;
    border: none !important;
    color: white !important;
    border-radius: 50px !important;
    padding: 0.75rem 2rem !important;
    font-weight: 600 !important;
    text-transform: uppercase !important;
    letter-spacing: 0.5px !important;
    transition: all 0.3s ease !important;
    margin: 0 auto !important;
    display: block !important;
}

.gr-button:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 5px 15px rgba(110, 72, 170, 0.4) !important;
}

.output-panel {
    border-radius: 10px !important;
    padding: 1.5rem !important;
    margin-top: 1rem !important;
    text-align: center !important;
    font-size: 1.2rem !important;
    font-weight: 600 !important;
}

.not-spam {
    background: rgba(40, 167, 69, 0.2) !important;
    border: 1px solid var(--success) !important;
    color: var(--success) !important;
}

.spam {
    background: rgba(220, 53, 69, 0.2) !important;
    border: 1px solid var(--danger) !important;
    color: var(--danger) !important;
}

.stats {
    display: flex;
    justify-content: space-around;
    margin: 1.5rem 0;
    background: rgba(255, 255, 255, 0.05);
    padding: 1rem;
    border-radius: 10px;
}

.stat-item {
    text-align: center;
}

.stat-value {
    font-size: 1.5rem;
    font-weight: 700;
    background: linear-gradient(to right, var(--primary), var(--secondary));
    -webkit-background-clip: text;
    background-clip: text;
    color: transparent;
}

.stat-label {
    font-size: 0.9rem;
    opacity: 0.8;
}

.example-container {
    margin: 1.5rem 0;
}

.example-btn {
    background: rgba(255, 255, 255, 0.05) !important;
    border: 1px solid rgba(255, 255, 255, 0.1) !important;
    margin: 0.25rem !important;
    padding: 0.5rem 1rem !important;
    border-radius: 20px !important;
    transition: all 0.2s ease !important;
}

.example-btn:hover {
    background: rgba(110, 72, 170, 0.2) !important;
    border-color: var(--primary) !important;
}

footer {
    text-align: center;
    margin-top: 2rem;
    opacity: 0.7;
    font-size: 0.9rem;
}
"""

# Classification function
def classify_email(email):
    cleaned = preprocess(email)
    vectorized = vectorizer.transform([cleaned])
    prediction = model.predict(vectorized)[0]
    proba = model.predict_proba(vectorized)[0]

    if prediction == 0:
        return {
            "result": "✅ Genuine Message",
            "confidence": f"{proba[0]*100:.1f}%",
            "class": "not-spam"
        }
    else:
        return {
            "result": "⚠️ Spam Alert!",
            "confidence": f"{proba[1]*100:.1f}%",
            "class": "spam"
        }

# Examples
examples = [
    "Congratulations! You've won a $1000 Walmart gift card. Click here to claim your prize now!",
    "Hi John, just checking in to see if you're still coming to the meeting tomorrow at 2pm.",
    "URGENT: Your bank account has been compromised. Click this link to secure your account immediately!",
    "Mom, can you pick up some milk on your way home? Thanks!",
    "FREE iPhone 15 Pro for our valued customers. Limited time offer! Reply YES to claim."
]

# Create blocks interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
    with gr.Column():
        gr.Markdown("""
        # ✉️ Advanced Spam Detection System
        """)

        with gr.Row():
            with gr.Column(scale=2):
                input_text = gr.Textbox(
                    label="Enter your message",
                    placeholder="Paste an email or SMS message here...",
                    lines=6,
                    elem_classes=["gr-textbox"]
                )

                with gr.Row():
                    submit_btn = gr.Button("Analyze Message", variant="primary")
                    clear_btn = gr.Button("Clear")

                with gr.Accordion("Try these examples", open=False):
                    gr.Examples(
                        examples=examples,
                        inputs=input_text,
                        label="Click any example to load it"
                    )

            with gr.Column(scale=1):
                with gr.Group():
                    gr.Markdown("### 📊 Model Statistics")
                    with gr.Group(elem_classes=["stats"]):
                        with gr.Column():
                            gr.Markdown(f'<div class="stat-value">{acc*100:.1f}%</div><div class="stat-label">Accuracy</div>',
                                       elem_classes=["stat-item"])
                        with gr.Column():
                            gr.Markdown('<div class="stat-value">5,574</div><div class="stat-label">Messages Analyzed</div>',
                                       elem_classes=["stat-item"])
                        with gr.Column():
                            gr.Markdown('<div class="stat-value">87%</div><div class="stat-label">Spam Detection Rate</div>',
                                       elem_classes=["stat-item"])

                with gr.Group():
                    gr.Markdown("### 📝 Message Analysis")
                    output_result = gr.Markdown(
                        "Your analysis will appear here...",
                        elem_classes=["output-panel"]
                    )
                    output_confidence = gr.Markdown(
                        "",
                        elem_classes=["output-panel"]
                    )

        gr.Markdown("""
        <footer>
            Powered by Naive Bayes Classifier | Trained on UCI SMS Spam Collection
        </footer>
        """)

    # Event handlers
    def update_output(email):
        result = classify_email(email)
        return [
            f'<div class="output-panel {result["class"]}">{result["result"]}</div>',
            f'<div class="output-panel">Confidence: {result["confidence"]}</div>'
        ]

    submit_btn.click(
        fn=update_output,
        inputs=input_text,
        outputs=[output_result, output_confidence]
    )

    clear_btn.click(
        fn=lambda: ["", ""],
        outputs=[output_result, output_confidence]
    )

# Launch the interface
demo.launch()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://caaaa1dcf2fb218860.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


