In [4]:
import sys
!{sys.executable} -m pip install pandas numpy scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.8.0-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting scipy>=1.10.0 (from scikit-learn)
  Downloading scipy-1.17.0-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.3.0 (from scikit-learn)
  Using cached joblib-1.5.3-py3-none-any.whl.metadata (5.5 kB)
Collecting threadpoolctl>=3.2.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.8.0-cp313-cp313-win_amd64.whl (8.0 MB)
   ---------------------------------------- 0.0/8.0 MB ? eta -:--:--
   - -------------------------------------- 0.3/8.0 MB ? eta -:--:--
   - -------------------------------------- 0.3/8.0 MB ? eta -:--:--
   - -------------------------------------- 0.3/8.0 MB ? eta -:--:--
   -- ------------------------------------- 0.5/8.0 MB 398.2 kB/s eta 0:00:19
   -- ------------------------------------- 0.5/8.0 MB 398.2 kB/s eta 0:00:19
   ----- ---------------------------------- 1.0/8.0 MB 696


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [6]:
data = {
    'label': ['ham','spam','ham','spam','ham','spam','ham','spam'],
    'message': [
        'Hey, are we meeting today?',
        'Congratulations! You won a free lottery ticket',
        'Can you call me later?',
        'Win cash prizes now!!!',
        'Let us study machine learning',
        'Limited offer, claim now',
        'How was your exam?',
        'Free entry in a contest, text now'
    ]
}

df = pd.DataFrame(data)
df.head()


Unnamed: 0,label,message
0,ham,"Hey, are we meeting today?"
1,spam,Congratulations! You won a free lottery ticket
2,ham,Can you call me later?
3,spam,Win cash prizes now!!!
4,ham,Let us study machine learning


In [7]:
df['label'] = df['label'].map({'ham':0, 'spam':1})
X = df['message']
y = df['label']


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)


In [9]:
vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [10]:
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)


0,1,2
,"alpha  alpha: float or array-like of shape (n_features,), default=1.0 Additive (Laplace/Lidstone) smoothing parameter (set alpha=0 and force_alpha=True, for no smoothing).",1.0
,"force_alpha  force_alpha: bool, default=True If False and alpha is less than 1e-10, it will set alpha to 1e-10. If True, alpha will remain unchanged. This may cause numerical errors if alpha is too close to 0. .. versionadded:: 1.2 .. versionchanged:: 1.4  The default value of `force_alpha` changed to `True`.",True
,"fit_prior  fit_prior: bool, default=True Whether to learn class prior probabilities or not. If false, a uniform prior will be used.",True
,"class_prior  class_prior: array-like of shape (n_classes,), default=None Prior probabilities of the classes. If specified, the priors are not adjusted according to the data.",


In [11]:
y_pred = model.predict(X_test_tfidf)


In [13]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print(
    classification_report(
        y_test, 
        y_pred, 
        zero_division=0
    )
)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.0
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00       2.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0

Confusion Matrix:
 [[0 0]
 [2 0]]


In [14]:
new_email = ['Congratulations! You have won free coupons']
new_email_tfidf = vectorizer.transform(new_email)
prediction = model.predict(new_email_tfidf)

if prediction[0] == 1:
    print("Spam Email")
else:
    print("Not Spam (Ham)")


Not Spam (Ham)
