# 1_Naive_Bayes_Classifier.ipynb

In [2]:
# --- Imports ---
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, accuracy_score

In [3]:
# --- Load dataset ---
iris = load_iris()
X = iris.data
y = iris.target

In [4]:
# --- Normalize/Scale the data ---
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:
# --- PCA for dimensionality reduction ---
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

In [6]:
# --- Train-Test Split ---
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# --- Model training ---
model = GaussianNB()
model.fit(X_train, y_train)

In [7]:
# --- Predictions ---
y_pred = model.predict(X_test)

In [8]:
# --- Evaluation Metrics ---
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, output_dict=True)

print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy:.4f}")

Confusion Matrix:
 [[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.89      0.94         9
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30

Accuracy: 0.9667


In [9]:
# --- Cross-validation ---
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(model, X_pca, y, cv=kfold)

print("\nCross-validation scores:", cv_scores)
print("Average CV Score:", np.mean(cv_scores))


Cross-validation scores: [0.93333333 0.9        0.9        0.96666667 0.76666667]
Average CV Score: 0.8933333333333333
