In [None]:
# Q1 (i)
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from collections import Counter
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
class GaussianNaiveBayes:
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        n_classes = len(self.classes)

        self.mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self.var = np.zeros((n_classes, n_features), dtype=np.float64)
        self.priors = np.zeros(n_classes, dtype=np.float64)

        for c in self.classes:
            X_c = X[y == c]
            self.mean[c, :] = X_c.mean(axis=0)
            self.var[c, :] = X_c.var(axis=0)
            self.priors[c] = X_c.shape[0] / float(n_samples)

    def _gaussian_density(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        return (1.0 / np.sqrt(2 * np.pi * var)) * np.exp(-(x - mean) ** 2 / (2 * var))

    def _class_likelihood(self, X):
        likelihoods = []
        for x in X:
            posteriors = []
            for idx, c in enumerate(self.classes):
                prior = np.log(self.priors[idx])
                class_conditional = np.sum(np.log(self._gaussian_density(idx, x)))
                posterior = prior + class_conditional
                posteriors.append(posterior)
            likelihoods.append(self.classes[np.argmax(posteriors)])
        return likelihoods

    def predict(self, X):
        return self._class_likelihood(X)
model = GaussianNaiveBayes()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = np.sum(y_pred == y_test) / len(y_test)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.98


In [None]:
# Q1(ii)
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_inbuilt = gnb.predict(X_test)
accuracy_inbuilt = accuracy_score(y_test, y_pred_inbuilt)
print(f'Accuracy (In-built): {accuracy_inbuilt:.2f}')

Accuracy (In-built): 1.00


In [None]:
# Q2
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

knn = KNeighborsClassifier()

param_grid = {'n_neighbors': list(range(1, 31))}
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')

grid_search.fit(X_train, y_train)

best_k = grid_search.best_params_['n_neighbors']
print(f'Best value of K: {best_k}')

best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy with best K ({best_k}): {accuracy:.2f}')

Best value of K: 1
Accuracy with best K (1): 1.00
