#### Q1.
Implement Gaussian Naive BayesClassifier on the Iris dataset from sklearn.datasets using

    (i) Step-by-step implementation

    (ii) In-built function

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.naive_bayes import GaussianNB

# --- Step-by-step Gaussian Naive Bayes ---
def log_gaussian(x, mean, var):
    eps = 1e-9
    var = var + eps
    t1 = -0.5 * ((x - mean) ** 2) / var
    t2 = -0.5 * np.log(2 * np.pi * var)
    return np.sum(t1 + t2, axis=1)

def fit_gnb(X, y):
    cls = np.unique(y)
    pri, mu, var = {}, {}, {}
    for c in cls:
        Xc = X[y == c]
        pri[c] = len(Xc) / len(X)
        mu[c] = np.mean(Xc, axis=0)
        var[c] = np.var(Xc, axis=0)
    return {'cls': cls, 'pri': pri, 'mu': mu, 'var': var}

def predict_gnb(model, X):
    cls = model['cls']
    scores = []
    for c in cls:
        lp = np.log(model['pri'][c])
        ll = log_gaussian(X, model['mu'][c], model['var'][c])
        scores.append((lp + ll).reshape(-1, 1))
    scores = np.hstack(scores)
    return cls[np.argmax(scores, axis=1)]

# --- Load and split data ---
iris = load_iris()
X, y = iris.data, iris.target
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# --- Manual implementation ---
m = fit_gnb(Xtr, ytr)
yp = predict_gnb(m, Xte)
print("Manual GaussianNB:")
print("Accuracy:", round(accuracy_score(yte, yp), 4))
print(classification_report(yte, yp, target_names=iris.target_names))
print("Confusion Matrix:\n", confusion_matrix(yte, yp))

# --- In-built sklearn GaussianNB ---
nb = GaussianNB()
nb.fit(Xtr, ytr)
pred2 = nb.predict(Xte)
print("\nSklearn GaussianNB:")
print("Accuracy:", round(accuracy_score(yte, pred2), 4))
print(classification_report(yte, pred2, target_names=iris.target_names))
print("Confusion Matrix:\n", confusion_matrix(yte, pred2))


Manual GaussianNB:
Accuracy: 0.9667
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      0.90      0.95        10
   virginica       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30

Confusion Matrix:
 [[10  0  0]
 [ 0  9  1]
 [ 0  0 10]]

Sklearn GaussianNB:
Accuracy: 0.9667
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      0.90      0.95        10
   virginica       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30

Confusion Matrix:
 [[10  0  0]
 [ 0  9  1]
 [ 0  0 10]]


#### Q2.
Explore about GridSearchCV toot in scikit-learn. This is a tool that is often used for tuning hyperparameters of machine learning models. Use this tool to find the best value of K for K-NN Classifier using any dataset.

In [2]:
# --- KNN with GridSearchCV ---
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# --- Load and split data ---
iris = load_iris()
X, y = iris.data, iris.target
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# --- Scale features ---
sc = StandardScaler()
X_tr_s = sc.fit_transform(X_tr)
X_te_s = sc.transform(X_te)

# --- GridSearch for best K ---
params = {'n_neighbors': list(range(1, 21))}
knn = KNeighborsClassifier()
grid = GridSearchCV(knn, param_grid=params, cv=5)
grid.fit(X_tr_s, y_tr)

print("Best K:", grid.best_params_['n_neighbors'])
print("Best CV Score:", round(grid.best_score_, 4))

# --- Evaluate on test set ---
best_knn = grid.best_estimator_
y_pred = best_knn.predict(X_te_s)

print("\nTest Accuracy:", round(accuracy_score(y_te, y_pred), 4))
print("Classification Report:\n", classification_report(y_te, y_pred, target_names=iris.target_names))
print("Confusion Matrix:\n", confusion_matrix(y_te, y_pred))


Best K: 5
Best CV Score: 0.9667

Test Accuracy: 0.9333
Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.83      1.00      0.91        10
   virginica       1.00      0.80      0.89        10

    accuracy                           0.93        30
   macro avg       0.94      0.93      0.93        30
weighted avg       0.94      0.93      0.93        30

Confusion Matrix:
 [[10  0  0]
 [ 0 10  0]
 [ 0  2  8]]
