In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report 

In [7]:
data = pd.read_csv("spam.csv", encoding='latin-1')

In [8]:
data.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [9]:
data = data[['v1','v2']]

In [10]:
data

Unnamed: 0,v1,v2
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will Ì_ b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


In [11]:
data.columns = ['label', 'message']

In [12]:
data

Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will Ì_ b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


In [20]:
data['label'] = data['label'].map({'ham':0, 'spam':1})

In [22]:
data

Unnamed: 0,label,message
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,1,This is the 2nd time we have tried 2 contact u...
5568,0,Will Ì_ b going to esplanade fr home?
5569,0,"Pity, * was in mood for that. So...any other s..."
5570,0,The guy did some bitching but I acted like i'd...


In [24]:
data.isnull().sum()

label      0
message    0
dtype: int64

In [26]:
X_train, X_test, y_train, y_test = train_test_split(data['message'],data['label'], test_size=0.2, random_state=42)

In [40]:
vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1,2))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [42]:
model= MultinomialNB()
output = model.fit(X_train_vec, y_train)


In [44]:
y_pred = model.predict(X_test_vec)


In [46]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9623318385650225
              precision    recall  f1-score   support

           0       0.96      1.00      0.98       965
           1       1.00      0.72      0.84       150

    accuracy                           0.96      1115
   macro avg       0.98      0.86      0.91      1115
weighted avg       0.96      0.96      0.96      1115



In [48]:
user_message = input("Enter message:")

user_message_vec = vectorizer.transform([user_message])

prediction = model.predict(user_message_vec)

if prediction[0] == 1:
    print("Spam message")
else:
    print("Not Spam")



Enter message: Congratulations! you won a free ticket


Not Spam


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


data = pd.read_csv("spam.csv", encoding="latin-1")
data = data[['v1', 'v2']]
data.columns = ['label', 'message']


data['label'] = data['label'].map({'ham': 0, 'spam': 1})


X_train, X_test, y_train, y_test = train_test_split(
    data['message'],
    data['label'],
    test_size=0.2,
    random_state=42,
    stratify=data['label']
)


vectorizer = CountVectorizer(
    stop_words='english',
    ngram_range=(1, 2),
    min_df=2
)

X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


model = LogisticRegression(
    class_weight='balanced',
    max_iter=1000
)

model.fit(X_train_vec, y_train)


y_pred = model.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


print("\n--- Spam Detector Ready ---")

while True:
    user_message = input("\nEnter message (or type 'exit'): ")
    if user_message.lower() == 'exit':
        break

    user_message_vec = vectorizer.transform([user_message])

    spam_probability = model.predict_proba(user_message_vec)[0][1]

    if spam_probability >= 0.35:   
        print(" Spam Message")
    else:
        print(" Not Spam Message")

    print(f"Spam Probability: {spam_probability:.2f}")


Accuracy: 0.9865470852017937
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       966
           1       1.00      0.90      0.95       149

    accuracy                           0.99      1115
   macro avg       0.99      0.95      0.97      1115
weighted avg       0.99      0.99      0.99      1115


--- Spam Detector Ready ---



Enter message (or type 'exit'):  WINNER!!! Claim your prize now. Limited time offer. Call immediately.


 Spam Message
Spam Probability: 0.78



Enter message (or type 'exit'):  Are you coming to college tomorrow for the project discussion?


 Not Spam Message
Spam Probability: 0.02



Enter message (or type 'exit'):  exit
