In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.naive_bayes import GaussianNB
import numpy as np

data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

classes = np.unique(y_train)
means = {}
vars_ = {}
priors = {}

for c in classes:
    X_c = X_train[y_train == c]
    priors[c] = X_c.shape[0] / X_train.shape[0]
    means[c] = X_c.mean(axis=0)
    vars_[c] = X_c.var(axis=0) + 1e-9

def gaussian_log_likelihood(x, mean, var):
    return -0.5 * np.sum(np.log(2 * np.pi * var)) - 0.5 * np.sum(((x - mean)**2) / var)

def predict_manual(X):
    preds = []
    for x in X:
        posteriors = []
        for c in classes:
            log_prior = np.log(priors[c])
            log_likelihood = gaussian_log_likelihood(x, means[c], vars_[c])
            posteriors.append(log_prior + log_likelihood)
        preds.append(np.argmax(posteriors))
    return np.array(preds)

y_pred_manual = predict_manual(X_test)
print("Manual Accuracy:", accuracy_score(y_test, y_pred_manual))
print(confusion_matrix(y_test, y_pred_manual))
print(classification_report(y_test, y_pred_manual))

gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_builtin = gnb.predict(X_test)
print("Built-in Accuracy:", accuracy_score(y_test, y_pred_builtin))
print(confusion_matrix(y_test, y_pred_builtin))
print(classification_report(y_test, y_pred_builtin))


Manual Accuracy: 0.9210526315789473
[[12  0  0]
 [ 0 12  1]
 [ 0  2 11]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.86      0.92      0.89        13
           2       0.92      0.85      0.88        13

    accuracy                           0.92        38
   macro avg       0.92      0.92      0.92        38
weighted avg       0.92      0.92      0.92        38

Built-in Accuracy: 0.9210526315789473
[[12  0  0]
 [ 0 12  1]
 [ 0  2 11]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.86      0.92      0.89        13
           2       0.92      0.85      0.88        13

    accuracy                           0.92        38
   macro avg       0.92      0.92      0.92        38
weighted avg       0.92      0.92      0.92        38



In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

params = {'n_neighbors': list(range(1, 21))}

knn = KNeighborsClassifier()

grid = GridSearchCV(knn, params, cv=5)
grid.fit(X_train, y_train)

best_k = grid.best_params_['n_neighbors']

model = KNeighborsClassifier(n_neighbors=best_k)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Best K:", best_k)
print("Accuracy:", accuracy_score(y_test, y_pred))


Best K: 10
Accuracy: 0.9736842105263158
