In [6]:
import pandas as pd
import numpy as np
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load dataset (Replace with actual dataset)
df = pd.read_csv("/content/spam.csv", encoding='latin-1')
df = df[['v1', 'v2']]  # Keep only relevant columns
df.columns = ['label', 'message']

# Convert labels to binary (ham=0, spam=1)
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Text preprocessing function
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    return text

df['cleaned_text'] = df['message'].apply(preprocess_text)

# Convert text to numerical features using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X = vectorizer.fit_transform(df['cleaned_text'])

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, df['label'], test_size=0.2, random_state=42)

# Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

def classify_email(email_text):
    email_text = preprocess_text(email_text)  # Preprocess
    email_vector = vectorizer.transform([email_text])  # Convert to TF-IDF
    prediction = model.predict(email_vector)[0]  # Predict
    return "Spam" if prediction == 1 else "Not Spam"

# Upload a single email for classification
email_input = input("Enter email content: ")  # User inputs email text
result = classify_email(email_input)
print(f"The email is classified as: {result}")



Enter email content: chool of Digital Sciences, DUK . <sods@duk.ac.in> 12:43 PM (1 hour ago) to student-mdcs24, student-mdgi24, student-msda24, student-mdai24, aayisha.cs24, adithyaviju.cs24, akhil.cs24, anagham.cs24, ananthakrishnan.cs24, ARSHED, govindk.cs24, hadi.cs24, ibrahim.cs24, sreelakshmi.cs24, suhana.cs24  Dear Students, As part of the MIC - AICTE Faculty Development Program (FDP), Academic office require Gallery Hall 45 on the following dates and times to conduct the inaugural and closing events:  April 7th from 9:00 AM to 12:00 PM  (Web Technology - Prof Sanil P Nair)  April 11th from 2:00 PM to 6:00 PM (HD2 (Industry Readiness Program -  Prof Sanil P Nair)  To accommodate these events, kindly request that classes scheduled in Room No. 45 during these periods be relocated to Room No. 44.
The email is classified as: Not Spam
