In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

df = pd.read_csv("C:/Users/Sangee/Desktop/mail_data.csv")
data = df.where((pd.notnull(df)), '')
data['category'] = data['Category'].apply(lambda x: 1 if x == 'ham' else 0)
data = data.drop(columns=['Category'])

X = data['Message']
y = data['category']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

feature_extraction = TfidfVectorizer(min_df=1, stop_words='english', lowercase=True)
X_train_feature = feature_extraction.fit_transform(X_train)
X_test_feature = feature_extraction.transform(X_test)

y_train = y_train.astype('int')
y_test = y_test.astype('int')

model = LogisticRegression(class_weight='balanced')
model.fit(X_train_feature, y_train)

print("Training Accuracy:", accuracy_score(y_train, model.predict(X_train_feature)))
print("Test Accuracy:", accuracy_score(y_test, model.predict(X_test_feature)))

spam_reports = []
blocked_senders = set()
messages = list(X_test)

def handle_spam(message):
    print("\nOptions for Spam Handling:")
    print("1. Report Spam")
    print("2. Block Sender")
    print("3. Delete Message")

    choice = input("Choose an option (1-3): ")

    if choice == '1':
        spam_reports.append(message)
        print("✅ Message reported as spam.")

    elif choice == '2':
        sender = "Unknown Sender"
        blocked_senders.add(sender)
        print(f"🚫 Sender '{sender}' has been blocked.")

    elif choice == '3':
        if message in messages:
            messages.remove(message)
            print("🗑 Message deleted.")
        else:
            print("⚠ Message not found.")

while True:
    user_mail = input("\nEnter your email message (or type 'exit' to quit): ")
    if user_mail.strip().lower() == 'exit':
        break

    input_data_features = feature_extraction.transform([user_mail])
    prediction = model.predict(input_data_features)
    prediction_prob = model.predict_proba(input_data_features)[:,1]  # Get probability of being ham

    print("\nPrediction Result:")
    print(f"Confidence: {prediction_prob[0]:.4f}")
    if prediction[0] == 1:
        print("✅ Ham mail")
    else:
        print("🚨 Spam mail")
        handle_spam(user_mail)

print("\nBlocked Senders:", blocked_senders)
print("Spam Reports:", spam_reports)

Training Accuracy: 0.9928202827013687
Test Accuracy: 0.968609865470852



Enter your email message (or type 'exit' to quit):  SIX chances to win CASH! From 100 to 20,000 pounds txt> CSH11 and send to 87575. Cost 150p/day, 6days, 16+ TsandCs apply Reply HL 4 info



Prediction Result:
Confidence: 0.0263
🚨 Spam mail

Options for Spam Handling:
1. Report Spam
2. Block Sender
3. Delete Message


Choose an option (1-3):  2


🚫 Sender 'Unknown Sender' has been blocked.



Enter your email message (or type 'exit' to quit):  I'm gonna be home soon



Prediction Result:
Confidence: 0.9639
✅ Ham mail
