In [1]:
# Q1(i) - Step-by-step implementation of Gaussian Naive Bayes on Iris dataset

import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Compute priors, means, and variances
classes = np.unique(y_train)
priors = {}
means = {}
vars_ = {}

for c in classes:
    X_c = X_train[y_train == c]
    priors[c] = X_c.shape[0] / X_train.shape[0]
    means[c] = X_c.mean(axis=0)
    vars_[c] = X_c.var(axis=0)

# Gaussian probability density function
def gaussian_prob(x, mean, var):
    eps = 1e-6
    coeff = 1 / np.sqrt(2 * np.pi * var + eps)
    exponent = np.exp(-((x - mean) ** 2) / (2 * var + eps))
    return coeff * exponent

# Predict function
def predict(X):
    y_pred = []
    for sample in X:
        posteriors = []
        for c in classes:
            prior = np.log(priors[c])
            conditional = np.sum(np.log(gaussian_prob(sample, means[c], vars_[c])))
            posterior = prior + conditional
            posteriors.append(posterior)
        y_pred.append(np.argmax(posteriors))
    return np.array(y_pred)

# Evaluate
y_pred = predict(X_test)
print("# Q1(i) Result:")
print("Accuracy:", accuracy_score(y_test, y_pred))


# Q1(i) Result:
Accuracy: 1.0


In [2]:
# Q1(ii) - In-built Gaussian Naive Bayes on Iris dataset

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load and split
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train in-built model
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

# Evaluate
print("# Q1(ii) Result:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=iris.target_names))


# Q1(ii) Result:
Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [3]:
# Q2 - Using GridSearchCV to find best K for KNN Classifier

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define model and parameter grid
knn = KNeighborsClassifier()
param_grid = {'n_neighbors': list(range(1, 21))}

# Apply GridSearchCV
grid = GridSearchCV(knn, param_grid, cv=5)
grid.fit(X_train, y_train)

# Best parameters
best_k = grid.best_params_['n_neighbors']
best_model = grid.best_estimator_

# Evaluate
y_pred = best_model.predict(X_test)
print("# Q2 Result:")
print("Best K value:", best_k)
print("Accuracy:", accuracy_score(y_test, y_pred))


# Q2 Result:
Best K value: 3
Accuracy: 1.0
