<a href="https://colab.research.google.com/github/Hadia-git-sketch/ArchTechnologies_1st_MonthTasks/blob/main/EmailSpamDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

# --- 1. DATA PREPARATION & TRAINING ---
# Load dataset (SMS Spam Collection) [cite: 4, 8]
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep='\t', names=['label', 'text'])
df['label_num'] = df.label.map({'ham': 0, 'spam': 1})

# Split data for evaluation
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label_num'], test_size=0.2, random_state=42)

# Feature Engineering: TF-IDF Vectorization [cite: 5, 9]
vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train Model [cite: 4, 9]
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

# --- 2. EVALUATION (Required for Submission) ---
y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=['Ham', 'Spam'])

print(f"Model Accuracy: {accuracy:.2%}")
print("\nEvaluation Report:\n", report)

# --- 3. INTERFACE FUNCTION ---
def classify_email(message):
    if not message.strip():
        return "Please enter text to analyze."

    vec_message = vectorizer.transform([message])
    prediction = model.predict(vec_message)[0]
    probs = model.predict_proba(vec_message)[0]

    label = "ðŸš¨ SPAM" if prediction == 1 else "âœ… HAM (Safe)"
    confidence = f"{max(probs) * 100:.2f}%"

    return f"Result: {label}\nConfidence: {confidence}"

# --- 4. GRADIO GUI ---
app = gr.Interface(
    fn=classify_email,
    inputs=gr.Textbox(lines=4, placeholder="Enter email/message text here...", label="Email Content"),
    outputs=gr.Textbox(label="AI Analysis"),
    title="Arch Technologies: Email Spam Guard",
    description=f"Task 1: Email Spam Classification. Model Accuracy: {accuracy:.2%}",
    theme="soft"
)

if __name__ == "__main__":
    app.launch()

Model Accuracy: 97.85%

Evaluation Report:
               precision    recall  f1-score   support

         Ham       0.98      1.00      0.99       966
        Spam       1.00      0.84      0.91       149

    accuracy                           0.98      1115
   macro avg       0.99      0.92      0.95      1115
weighted avg       0.98      0.98      0.98      1115

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1d976beef2e84e156a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
