# Naive Bayes

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report



In [3]:
data = pd.read_csv('/assets/naive-bayes.csv', encoding="latin1")


In [4]:
X_train, X_test, y_train, y_test = train_test_split(data['v2'], data['v1'], test_size=0.2, random_state=42)

vectorizer = CountVectorizer()
X_train_freq = vectorizer.fit_transform(X_train)
X_test_freq = vectorizer.transform(X_test)

### 1. Gaussian Naive Bayes

In [5]:
# 1. Gaussian Naive Bayes
gnb = GaussianNB()
gnb.fit(X_train_freq.toarray(), y_train)
y_pred_gnb = gnb.predict(X_test_freq.toarray())

### 2. Multinomial Naive Bayes

In [6]:
# 2. Multinomial Naive Bayes
mnb = MultinomialNB()
mnb.fit(X_train_freq, y_train)
y_pred_mnb = mnb.predict(X_test_freq)

### 3. Bernoulli Naive Bayes

In [7]:
vectorizer_binary = CountVectorizer(binary=True)
X_train_binary = vectorizer_binary.fit_transform(X_train)
X_test_binary = vectorizer_binary.transform(X_test)

bnb = BernoulliNB()
bnb.fit(X_train_binary, y_train)
y_pred_bnb = bnb.predict(X_test_binary)

## Evaluation

In [8]:
def evaluate_model(y_true, y_pred, model_name):
    print(f"\nConfusion Matrix for {model_name}:\n")
    print(confusion_matrix(y_true, y_pred))
    print(f"\nAccuracy for {model_name}: {accuracy_score(y_true, y_pred)}")
    print(f"\nClassification Report for {model_name}:\n")
    print(classification_report(y_true, y_pred))

# Evaluate Gaussian NB
evaluate_model(y_test, y_pred_gnb, 'Gaussian Naive Bayes')

# Evaluate Multinomial NB
evaluate_model(y_test, y_pred_mnb, 'Multinomial Naive Bayes')

# Evaluate Bernoulli NB
evaluate_model(y_test, y_pred_bnb, 'Bernoulli Naive Bayes')


Confusion Matrix for Gaussian Naive Bayes:

[[866  99]
 [ 12 138]]

Accuracy for Gaussian Naive Bayes: 0.9004484304932735

Classification Report for Gaussian Naive Bayes:

              precision    recall  f1-score   support

         ham       0.99      0.90      0.94       965
        spam       0.58      0.92      0.71       150

    accuracy                           0.90      1115
   macro avg       0.78      0.91      0.83      1115
weighted avg       0.93      0.90      0.91      1115


Confusion Matrix for Multinomial Naive Bayes:

[[963   2]
 [ 16 134]]

Accuracy for Multinomial Naive Bayes: 0.9838565022421525

Classification Report for Multinomial Naive Bayes:

              precision    recall  f1-score   support

         ham       0.98      1.00      0.99       965
        spam       0.99      0.89      0.94       150

    accuracy                           0.98      1115
   macro avg       0.98      0.95      0.96      1115
weighted avg       0.98      0.98      0.98   