# Assignment 6

Solutions for:

1. Gaussian Na√Øve Bayes Classifier on the Iris dataset

   (i) Step-by-step implementation

   (ii) In-built function

2. Use GridSearchCV to find best K for K-NN Classifier

In [None]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y
)

## Q1 (i) Manual step-by-step implementation of Gaussian Naive Bayes

In [None]:
def train_gnb(X, y):
    classes = np.unique(y)
    priors = {}
    means = {}
    variances = {}
    for c in classes:
        Xc = X[y == c]
        priors[c] = Xc.shape[0] / X.shape[0]
        means[c] = Xc.mean(axis=0)
        variances[c] = Xc.var(axis=0) + 1e-9
    return classes, priors, means, variances

def gaussian_prob(x, mean, var):
    coeff = 1 / np.sqrt(2 * np.pi * var)
    exponent = np.exp(-((x - mean) ** 2) / (2 * var))
    return coeff * exponent

def predict_gnb(X, classes, priors, means, variances):
    preds = []
    for x in X:
        posteriors = []
        for c in classes:
            likelihood = np.prod(gaussian_prob(x, means[c], variances[c]))
            posterior = priors[c] * likelihood
            posteriors.append(posterior)
        preds.append(classes[np.argmax(posteriors)])
    return np.array(preds)

classes, priors, means, variances = train_gnb(X_train, y_train)
y_pred_manual = predict_gnb(X_test, classes, priors, means, variances)
print('Accuracy (manual GaussianNB):', accuracy_score(y_test, y_pred_manual))
print('\nClassification report (manual GaussianNB):')
print(classification_report(y_test, y_pred_manual))
print('\nConfusion matrix (manual GaussianNB):')
print(confusion_matrix(y_test, y_pred_manual))

## Q1 (ii) Using sklearn's GaussianNB

In [None]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_train, y_train)
y_pred_sklearn = clf.predict(X_test)
print('Accuracy (sklearn GaussianNB):', accuracy_score(y_test, y_pred_sklearn))
print('\nClassification report (sklearn GaussianNB):')
print(classification_report(y_test, y_pred_sklearn))
print('\nConfusion matrix (sklearn GaussianNB):')
print(confusion_matrix(y_test, y_pred_sklearn))

## Q2 Use GridSearchCV to find best K for K-NN Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
param_grid = {'n_neighbors': list(range(1, 21))}
knn = KNeighborsClassifier()
gs = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
gs.fit(X_train, y_train)
print('Best parameters from GridSearchCV:', gs.best_params_)
print('Best cross-validation score:', gs.best_score_)
best_knn = gs.best_estimator_
y_test_pred = best_knn.predict(X_test)
print('Test set accuracy with best K:', accuracy_score(y_test, y_test_pred))
print('\nClassification report (best KNN):')
print(classification_report(y_test, y_test_pred))
print('\nConfusion matrix (best KNN):')
print(confusion_matrix(y_test, y_test_pred))