In [1]:
!pip install pandas numpy scikit-learn


Collecting scikit-learn
  Downloading scikit_learn-1.7.1-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting scipy>=1.8.0 (from scikit-learn)
  Downloading scipy-1.16.0-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.7.1-cp313-cp313-win_amd64.whl (8.7 MB)
   ---------------------------------------- 0.0/8.7 MB ? eta -:--:--
   -- ------------------------------------- 0.5/8.7 MB 3.2 MB/s eta 0:00:03
   ---- ----------------------------------- 1.0/8.7 MB 3.0 MB/s eta 0:00:03
   -------- ------------------------------- 1.8/8.7 MB 3.3 MB/s eta 0:00:03
   ------------- -------------------------- 2.9/8.7 MB 3.5 MB/s eta 0:00:02
   ---------------- ----------------------- 3.7/8.7 MB 3.7 MB/s eta 0:00:02
   ---------------------- ----------------- 5.0/8.7 MB 4.0 MB/s eta 0:00:01
   --------------------------- ------------ 6.0/8.7 MB 4.1 MB/s eta 0:00:01

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score


In [3]:
data = {
    'message': [
        'Congratulations! you have won a free ticket',
        'Hi, how are you doing today?',
        'Win cash prizes now!!!',
        'Are we still meeting tomorrow?',
        'Free entry in 2 a weekly competition!',
        'Let’s catch up for dinner',
        'Exclusive offer just for you',
        'Hello, please review the document'
    ],
    'label': ['spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham']
}

df = pd.DataFrame(data)
print(df)


                                       message label
0  Congratulations! you have won a free ticket  spam
1                 Hi, how are you doing today?   ham
2                       Win cash prizes now!!!  spam
3               Are we still meeting tomorrow?   ham
4        Free entry in 2 a weekly competition!  spam
5                    Let’s catch up for dinner   ham
6                 Exclusive offer just for you  spam
7            Hello, please review the document   ham


In [4]:
df['label'] = df['label'].map({'spam': 1, 'ham': 0})
print(df)


                                       message  label
0  Congratulations! you have won a free ticket      1
1                 Hi, how are you doing today?      0
2                       Win cash prizes now!!!      1
3               Are we still meeting tomorrow?      0
4        Free entry in 2 a weekly competition!      1
5                    Let’s catch up for dinner      0
6                 Exclusive offer just for you      1
7            Hello, please review the document      0


In [5]:
# Vectorize text data (convert messages into numerical form)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['message'])

# Labels
y = df['label']


In [6]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# Train the Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train, y_train)


0,1,2
,alpha,1.0
,force_alpha,True
,fit_prior,True
,class_prior,


In [8]:
# Predict using the model
y_pred = model.predict(X_test)


In [9]:
# Check model accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.5


In [10]:
# Try with a new message
sample = ["Congratulations! You won a free cruise ticket."]
sample_vec = vectorizer.transform(sample)
prediction = model.predict(sample_vec)

print("Prediction (1 = Spam, 0 = Not Spam):", prediction[0])


Prediction (1 = Spam, 0 = Not Spam): 1
