In [1]:
%%writefile app.py
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

df = pd.read_csv("spam.csv", encoding='latin-1')

# Splitting the dataset into training and testing sets
X = df['Message']
y = df['Category']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Building and train the Naive Bayes model using a pipeline
model_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('nb', MultinomialNB(alpha=0.1))
])
model_pipeline.fit(X_train, y_train)

# Streamlit App
st.title("Email Spam Detection System")
st.markdown("Enter your email message below and click **Check** to determine if it is spam.")

# Text input area
user_message = st.text_area("Enter your message here:")

if st.button("Check"):
    if user_message.strip() == "":
        st.error("Please enter a message to analyze.")
    else:
        prediction = model_pipeline.predict([user_message])[0]
        if prediction.lower() == "spam":
            st.error("The message is **spam**.")
        else:
            st.success("The message is **legitimate (not spam)**.")
        
        # Pie chart for dataset distribution
        fig, ax = plt.subplots()
        ax.pie(label_counts, labels=label_counts.index, autopct='%1.1f%%', startangle=90, colors=['#ff9999','#99ff99'])
        ax.set_title("Dataset Distribution: Spam vs. Legitimate")
        st.pyplot(fig)

Writing app.py


In [None]:
# !streamlit run app.py