In [3]:
from sklearn.feature_extraction.text import CountVectorizer

In [4]:
from sklearn.naive_bayes import MultinomialNB

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
from sklearn.metrics import classification_report

In [7]:
texts = [
    "Win money now!", "Buy cheap meds", "Hey, how are you?",
    "Your loan is approved", "Let's catch up tomorrow", "Free entry to win iPhone"
]

In [8]:
labels = [1, 1, 0, 1, 0, 1]

In [9]:
vectorizer = CountVectorizer()

In [10]:
X = vectorizer.fit_transform(texts)

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)


In [45]:
model = MultinomialNB(alpha = 0.1,fit_prior =True)

In [46]:
model.fit(X_train, y_train)

In [47]:
y_pred = model.predict(X_test)

In [48]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.50      0.67         2

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [49]:
probabilities = model.predict_proba(X_test)
for i, (text, prob) in enumerate(zip(texts, probabilities)):
    spam_prob = prob[1]
    print(f"'{text}' -> Spam probability: {spam_prob:.3f}")

'Win money now!' -> Spam probability: 0.893
'Buy cheap meds' -> Spam probability: 0.430


In [50]:
new_texts = [
    "Congratulations! You've been selected for a prize.",
    "Let's meet for lunch today.",
    "Limited-time offer: Get free vacation tickets!",
    "Can you send me the report by tomorrow?"
]

new_vectors = vectorizer.transform(new_texts)

predictions = model.predict(new_vectors)

for text, label in zip(new_texts, predictions):
    print(f"{label} -> {text}")

0 -> Congratulations! You've been selected for a prize.
0 -> Let's meet for lunch today.
1 -> Limited-time offer: Get free vacation tickets!
0 -> Can you send me the report by tomorrow?


In [52]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [53]:
vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1,2), max_df=0.9, min_df=2)
X = vectorizer.fit_transform(texts)

In [55]:
from sklearn.model_selection import StratifiedKFold, train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, labels, test_size=0.3, random_state=42, stratify=labels
)

In [56]:
model = MultinomialNB(alpha=0.1)

In [57]:
model.fit(X_train, y_train)

In [58]:
y_pred = model.predict(X_test)

In [59]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [60]:
probabilities = model.predict_proba(X_test)
for i, (text, prob) in enumerate(zip(texts, probabilities)):
    spam_prob = prob[1]
    print(f"'{text}' -> Spam probability: {spam_prob:.3f}")

'Win money now!' -> Spam probability: 0.750
'Buy cheap meds' -> Spam probability: 0.750


In [61]:
new_texts = [
    "Congratulations! You've been selected for a prize.",
    "Let's meet for lunch today.",
    "Limited-time offer: Get free vacation tickets!",
    "Can you send me the report by tomorrow?"
]

new_vectors = vectorizer.transform(new_texts)

predictions = model.predict(new_vectors)

for text, label in zip(new_texts, predictions):
    print(f"{label} -> {text}")

1 -> Congratulations! You've been selected for a prize.
1 -> Let's meet for lunch today.
1 -> Limited-time offer: Get free vacation tickets!
1 -> Can you send me the report by tomorrow?
