In [11]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

In [12]:
data = {
    'text': [
        "Congratulations! You've won a $1000 Walmart gift card. Click to claim now!",
        "Hi John, are we still meeting tomorrow?",
        "Lowest price guaranteed, buy now!",
        "Dear customer, your bill is due. Pay here.",
        "Free entry in a contest to win a car. Text WIN to 12345",
        "Can you send me the project files?",
    ],
    'label': [1, 0, 1, 0, 1, 0]
}
df = pd.DataFrame(data)

In [13]:
x = df['text']
y = df['label']

In [14]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=10)

In [16]:
vectorizers = {
    'CountVectorizer': CountVectorizer(),
    'TfidfVectorizer': TfidfVectorizer()
}

models = {
    'MultinomialNB': MultinomialNB(),
    'LogisticRegression': LogisticRegression(max_iter=1000),
    'SVM': SVC(kernel='linear')
}

In [25]:
for vect_name, vectorizer in vectorizers.items():
    print(f"\n--- Using {vect_name} ---")
    x_train_vect = vectorizer.fit_transform(x_train)
    x_test_vect = vectorizer.transform(x_test)
    
    for model_name, model in models.items():
        model.fit(x_train_vect, y_train)
        y_pred = model.predict(x_test_vect)
        print(f"\nModel: {model_name}")
        print("Accuracy:", accuracy_score(y_test, y_pred))
        print(classification_report(y_test, y_pred, target_names=['Ham', 'Spam']))


--- Using CountVectorizer ---

Model: MultinomialNB
Accuracy: 0.5
              precision    recall  f1-score   support

         Ham       0.00      0.00      0.00         1
        Spam       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2


Model: LogisticRegression
Accuracy: 0.5
              precision    recall  f1-score   support

         Ham       0.50      1.00      0.67         1
        Spam       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2


Model: SVM
Accuracy: 0.5
              precision    recall  f1-score   support

         Ham       0.50      1.00      0.67         1
        Spam       0.00      0.00      0.00         1

    accuracy                           0.50     