In [1]:
import pandas as pd

data = pd.read_csv('spam_ham_dataset.csv')

data['label'] = data['label'].map({'ham': 0, 'spam': 1})
print(data['label'].unique())

[0 1]


In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
#Converting the email texts into numerical data so we can use logistic regression
vectorizer = TfidfVectorizer(max_features=1000)
email_texts = data['text']

email_texts = email_texts.fillna('')
X = vectorizer.fit_transform(email_texts).toarray()


In [3]:
print(X.shape)

(5171, 1000)


In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,data['label'], test_size=0.2, random_state=42)

model=LogisticRegression()
model.fit(X_train,y_train)

In [5]:
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 97.58%


In [6]:
from sklearn.metrics import classification_report, confusion_matrix

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.98      0.98      0.98       742
           1       0.96      0.96      0.96       293

    accuracy                           0.98      1035
   macro avg       0.97      0.97      0.97      1035
weighted avg       0.98      0.98      0.98      1035

[[729  13]
 [ 12 281]]


In [7]:
import pickle

with open('spam_classifier.pkl', 'wb') as f:
    pickle.dump(model, f)


with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)


In [8]:
import pickle
from IPython.display import display
import ipywidgets as widgets

with open('spam_classifier.pkl', 'rb') as f:
    model = pickle.load(f)

with open('vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)

def predict_email(email_text):
    email_vectorized = vectorizer.transform([email_text])
    prediction = model.predict(email_vectorized)
    return "Spam" if prediction[0] == 1 else "Ham"

email_input = widgets.Textarea(
    placeholder='Enter email text here...',
    description='Email:',
    layout=widgets.Layout(width='80%', height='100px')
)

classify_button = widgets.Button(description="Classify")
output_label = widgets.Label()

def classify_email(_):
    result = predict_email(email_input.value)
    output_label.value = f"Prediction: {result}"

classify_button.on_click(classify_email)

display(email_input, classify_button, output_label)


Textarea(value='', description='Email:', layout=Layout(height='100px', width='80%'), placeholder='Enter email …

Button(description='Classify', style=ButtonStyle())

Label(value='')