In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [7]:
iris= load_iris()
X=iris.data
y=iris.target

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [11]:
class GaussianNaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = np.zeros((len(self.classes), X.shape[1]))
        self.var = np.zeros((len(self.classes), X.shape[1]))
        self.priors = np.zeros(len(self.classes))
        
        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.mean[idx, :] = X_c.mean(axis=0)
            self.var[idx, :] = X_c.var(axis=0)
            self.priors[idx] = X_c.shape[0] / X.shape[0]
    
    def gaussian_pdf(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(- (x - mean)**2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator
    
    def predict_single(self, x):
        posteriors = []
        
        for idx, c in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            conditional = np.sum(np.log(self.gaussian_pdf(idx, x)))
            posterior = prior + conditional
            posteriors.append(posterior)
        
        return self.classes[np.argmax(posteriors)]
    
    def predict(self, X):
        return np.array([self.predict_single(x) for x in X])

In [14]:
gnb_custom = GaussianNaiveBayes()
gnb_custom.fit(X_train, y_train)
y_pred_custom = gnb_custom.predict(X_test)

In [15]:
accuracy_custom = accuracy_score(y_test, y_pred_custom)
print(f'Step-by-Step Implementation Accuracy: {accuracy_custom * 100:.2f}%')

Step-by-Step Implementation Accuracy: 97.78%


In [16]:
from sklearn.naive_bayes import GaussianNB

In [17]:
gnb_sklearn = GaussianNB()

In [19]:
gnb_sklearn.fit(X_train, y_train)
y_pred_sklearn = gnb_sklearn.predict(X_test)

In [20]:
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
print(f'In-Built Function Accuracy: {accuracy_sklearn * 100:.2f}%')

In-Built Function Accuracy: 97.78%


Q2

In [21]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


In [22]:
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [23]:
knn = KNeighborsClassifier()

param_grid = {
    'n_neighbors': list(range(1, 31)),  
    'weights': ['uniform', 'distance'],  
    'metric': ['euclidean', 'manhattan'] }

In [24]:
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1)

grid_search.fit(X_train, y_train)

print(f"Best Hyperparameters: {grid_search.best_params_}")
print(f"Best Accuracy: {grid_search.best_score_:.2f}")

Fitting 5 folds for each of 120 candidates, totalling 600 fits
Best Hyperparameters: {'metric': 'euclidean', 'n_neighbors': 18, 'weights': 'distance'}
Best Accuracy: 0.96


In [25]:
best_knn = grid_search.best_estimator_

y_pred = best_knn.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print(f"Test Set Accuracy: {accuracy * 100:.2f}%")

Test Set Accuracy: 100.00%
