1.  (Gaussian Naïve Bayes Classifier) Implement Gaussian Naïve Bayes 
Classifier on the Iris dataset from sklearn.datasets using 
 
        (i) Step-by-step implementation 
        (ii) In-built function

(i)

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

classes = np.unique(y_train)
n_features = X_train.shape[1]

priors = {}
means = {}
vars_ = {}

for c in classes:
    X_c = X_train[y_train == c]
    priors[c] = X_c.shape[0] / X_train.shape[0]
    means[c] = X_c.mean(axis=0)
    vars_[c] = X_c.var(axis=0) + 1e-9

In [3]:
def gaussian_log_likelihood(x, mean, var):
    return -0.5 * np.sum(np.log(2 * np.pi * var)) - 0.5 * np.sum(((x - mean) ** 2) / var)

def predict(X):
    preds = []
    for x in X:
        class_scores = []
        for c in classes:
            log_prior = np.log(priors[c])
            log_likelihood = gaussian_log_likelihood(x, means[c], vars_[c])
            class_scores.append(log_prior + log_likelihood)
        preds.append(np.argmax(class_scores))
    return np.array(preds)

y_pred = predict(X_test)
print("Accuracy (manual GNB):", accuracy_score(y_test, y_pred))
print("Confusion Matrix (manual GNB):\n", confusion_matrix(y_test, y_pred))

Accuracy (manual GNB): 0.9777777777777777
Confusion Matrix (manual GNB):
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]


(ii)

In [4]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [5]:
data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

print("Accuracy (sklearn GaussianNB):", accuracy_score(y_test, y_pred))
print("Confusion Matrix (sklearn GaussianNB):\n", confusion_matrix(y_test, y_pred))

Accuracy (sklearn GaussianNB): 0.9777777777777777
Confusion Matrix (sklearn GaussianNB):
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]


2.  Explore about GridSearchCV toot in scikit-learn. This is a tool that is often used for tuning hyperparameters of machine learning models. Use this tool to find the best value of K for K-NN Classifier using any dataset

In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [13]:
data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

param_grid = {'n_neighbors': range(1, 21)}

knn = KNeighborsClassifier()
grid = GridSearchCV(knn, param_grid, cv=5)
grid.fit(X_train, y_train)

print("Best K value:", grid.best_params_)
print("Best cross-validation score:", grid.best_score_)

best_knn = grid.best_estimator_
y_pred = best_knn.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))

Best K value: {'n_neighbors': 1}
Best cross-validation score: 0.9523809523809523
Test Accuracy: 1.0
