# Step-by-step implementation of Gaussian Na√Øve Bayes on Iris dataset

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

iris = load_iris()
X = iris.data
y = iris.target

# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Calculate mean, variance, and priors manually
classes = np.unique(y_train)
mean = {}
var = {}
priors = {}

for c in classes:
    X_c = X_train[y_train == c]
    mean[c] = X_c.mean(axis=0)
    var[c] = X_c.var(axis=0)
    priors[c] = X_c.shape[0] / X_train.shape[0]

# Define Gaussian probability function
def gaussian_prob(x, mean, var):
    eps = 1e-6  # to avoid division by zero
    coeff = 1.0 / np.sqrt(2.0 * np.pi * var + eps)
    exponent = np.exp(- (x - mean) ** 2 / (2 * var + eps))
    return coeff * exponent

# Predict function
def predict(X):
    y_pred = []
    for x in X:
        posteriors = []
        for c in classes:
            prior = np.log(priors[c])
            class_conditional = np.sum(np.log(gaussian_prob(x, mean[c], var[c])))
            posterior = prior + class_conditional
            posteriors.append(posterior)
        y_pred.append(np.argmax(posteriors))
    return np.array(y_pred)

# Predict and evaluate
y_pred = predict(X_test)
print("Step-by-step Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Step-by-step Accuracy: 0.9777777777777777
Confusion Matrix:
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]


# In-built GaussianNB from sklearn

In [2]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix

# Model
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Prediction
y_pred_inbuilt = gnb.predict(X_test)

# Evaluation
print("In-built GaussianNB Accuracy:", accuracy_score(y_test, y_pred_inbuilt))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_inbuilt))


In-built GaussianNB Accuracy: 0.9777777777777777
Confusion Matrix:
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]


# GridSearchCV to Find Best K for KNN Classifier

In [3]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, train_test_split

# Load data
iris = load_iris()
X = iris.data
y = iris.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define parameter grid
param_grid = {'n_neighbors': list(range(1, 21))}

# Model and GridSearchCV
knn = KNeighborsClassifier()
grid = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)

# Best parameters and accuracy
print("Best K value:", grid.best_params_)
print("Best Accuracy:", grid.best_score_)


Best K value: {'n_neighbors': 1}
Best Accuracy: 0.9523809523809523
