In [2]:
#Q1
from sklearn.datasets import load_iris
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [10]:
load_iris().keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [12]:
x=load_iris().data
y=load_iris().target

In [18]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [19]:
def gaussian_prob(x, mean, var):
    eps = 1e-6 
    coeff = 1 / np.sqrt(2 * np.pi * var + eps)
    exponent = np.exp(- (x - mean) ** 2 / (2 * var + eps))
    return coeff * exponent

In [20]:
class GaussianNB_Manual:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.priors = {}
        
        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = np.mean(X_c, axis=0)
            self.var[c] = np.var(X_c, axis=0)
            self.priors[c] = X_c.shape[0] / X.shape[0]
    
    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)
    
    def _predict(self, x):
        posteriors = []
        for c in self.classes:
            prior = np.log(self.priors[c])
            conditional = np.sum(np.log(gaussian_prob(x, self.mean[c], self.var[c])))
            posterior = prior + conditional
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]


In [21]:
gnb_manual = GaussianNB_Manual()
gnb_manual.fit(X_train, y_train)

y_pred_manual = gnb_manual.predict(X_test)

# Evaluate performance
print("Accuracy (Manual Implementation):", accuracy_score(y_test, y_pred_manual))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_manual))
print("\nClassification Report:\n", classification_report(y_test, y_pred_manual))


Accuracy (Manual Implementation): 0.9777777777777777

Confusion Matrix:
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45



In [25]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
gnb.fit(X_train, y_train)

y_pred_builtin = gnb.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred_builtin))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_builtin))
print("\nClassification Report:\n", classification_report(y_test, y_pred_builtin))


Accuracy: 0.9777777777777777

Confusion Matrix:
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45



In [6]:
#Q2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

knn = KNeighborsClassifier()

param_grid = {'n_neighbors': list(range(1, 11))}

grid_search = GridSearchCV(
    estimator=knn,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy'
)

grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)

results = pd.DataFrame(grid_search.cv_results_)
print("\nAccuracy for each value of K:")
print(results[['param_n_neighbors', 'mean_test_score']])

best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)

print("\nTest Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Best Parameters: {'n_neighbors': 1}
Best Cross-Validation Accuracy: 0.9523809523809523

Accuracy for each value of K:
   param_n_neighbors  mean_test_score
0                  1         0.952381
1                  2         0.933333
2                  3         0.933333
3                  4         0.933333
4                  5         0.942857
5                  6         0.942857
6                  7         0.952381
7                  8         0.952381
8                  9         0.942857
9                 10         0.933333

Test Accuracy: 1.0

Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00   