**1. (Gaussian Naïve Bayes Classifier) Implement Gaussian Naïve Bayes
Classifier on the Iris dataset from sklearn.datasets using
(i) Step-by-step implementation
(ii) In-built functio**


In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

classes = np.unique(y_train)
mean = {}
var = {}
priors = {}

for c in classes:
    X_c = X_train[y_train == c]
    mean[c] = X_c.mean(axis=0)
    var[c] = X_c.var(axis=0)
    priors[c] = X_c.shape[0] / X_train.shape[0]

def gaussian_probability(x, mean, var):
    exponent = np.exp(-((x - mean) ** 2) / (2 * var))
    return (1 / np.sqrt(2 * np.pi * var)) * exponent

def predict(X):
    y_pred = []
    for x in X:
        posteriors = []
        for c in classes:
            prior = np.log(priors[c])
            likelihood = np.sum(np.log(gaussian_probability(x, mean[c], var[c])))
            posterior = prior + likelihood
            posteriors.append(posterior)
        y_pred.append(classes[np.argmax(posteriors)])
    return np.array(y_pred)

y_pred = predict(X_test)
print("Step-by-step Gaussian NB Accuracy:", accuracy_score(y_test, y_pred))


Step-by-step Gaussian NB Accuracy: 0.9777777777777777


**2. Explore about GridSearchCV toot in scikit-learn. This is a tool that is
often used for tuning hyperparameters of machine learning models. Use
this tool to find the best value of K for K-NN Classifier using any dataset**

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

knn = KNeighborsClassifier()

param_grid = {'n_neighbors': range(1, 21)}

grid = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')

grid.fit(X_train, y_train)

print("Best K value:", grid.best_params_['n_neighbors'])
print("Best cross-validation accuracy:", grid.best_score_)

best_knn = grid.best_estimator_
y_pred = best_knn.predict(X_test)
print("Test set accuracy with best K:", accuracy_score(y_test, y_pred))


Best K value: 1
Best cross-validation accuracy: 0.9523809523809523
Test set accuracy with best K: 1.0
