### Gaussian Naive Bayes
- **Use Case:** Suitable for continuous data where features are normally distributed.
- **Common Applications:** Classification problems involving continuous features, such as the Iris dataset.

In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
data = load_iris()
X = data.data
y = data.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Gaussian Naive Bayes classifier
gnb = GaussianNB()

# Train the model
gnb.fit(X_train, y_train)

# Make predictions
y_pred = gnb.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Gaussian Naive Bayes Accuracy: {accuracy:.2f}')
print(f'Classification Report:\n{report}')


Gaussian Naive Bayes Accuracy: 0.98
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45



### Multinomial Naive Bayes
- **Use Case:** Ideal for discrete data, such as word counts in text classification.
- **Common Applications:** Document classification, spam filtering, sentiment analysis.
- **Example:**


In [21]:
X.shape

(2257, 35788)

In [8]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
newsgroups_data = fetch_20newsgroups(subset='train', categories=categories)

# Convert text data to term frequency vectors
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(newsgroups_data.data)
y = newsgroups_data.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Multinomial Naive Bayes classifier
mnb = MultinomialNB()

# Train the model
mnb.fit(X_train, y_train)

# Make predictions
y_pred = mnb.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=newsgroups_data.target_names)

print(f'Multinomial Naive Bayes Accuracy: {accuracy:.2f}')
print(f'Classification Report:\n{report}')


Multinomial Naive Bayes Accuracy: 0.97
Classification Report:
                        precision    recall  f1-score   support

           alt.atheism       0.95      0.95      0.95       129
         comp.graphics       0.95      0.98      0.97       169
               sci.med       0.98      0.97      0.98       189
soc.religion.christian       0.97      0.96      0.96       191

              accuracy                           0.97       678
             macro avg       0.96      0.97      0.97       678
          weighted avg       0.97      0.97      0.97       678




### Bernoulli Naive Bayes
- **Use Case:** Best suited for binary/boolean features, such as presence or absence of a word in a document.
- **Common Applications:** Binary text classification, document categorization.

In [3]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
newsgroups_data = fetch_20newsgroups(subset='train', categories=categories)

# Convert text data to binary term frequency vectors
vectorizer = CountVectorizer(binary=True)
X = vectorizer.fit_transform(newsgroups_data.data)
y = newsgroups_data.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Bernoulli Naive Bayes classifier
bnb = BernoulliNB()

# Train the model
bnb.fit(X_train, y_train)

# Make predictions
y_pred = bnb.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=newsgroups_data.target_names)

print(f'Bernoulli Naive Bayes Accuracy: {accuracy:.2f}')
print(f'Classification Report:\n{report}')

Bernoulli Naive Bayes Accuracy: 0.87
Classification Report:
                        precision    recall  f1-score   support

           alt.atheism       0.98      0.88      0.93       129
         comp.graphics       0.67      1.00      0.80       169
               sci.med       0.99      0.77      0.87       189
soc.religion.christian       0.98      0.84      0.90       191

              accuracy                           0.87       678
             macro avg       0.91      0.87      0.88       678
          weighted avg       0.91      0.87      0.87       678

