In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [3]:
iris = load_iris()
X = iris.data          
y = iris.target        

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

In [4]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_sklearn = gnb.predict(X_test)

print("=== sklearn GaussianNB ===")
print("Accuracy:", accuracy_score(y_test, y_pred_sklearn))
print("\nConfusion matrix:\n", confusion_matrix(y_test, y_pred_sklearn))
print("\nClassification report:\n", classification_report(y_test, y_pred_sklearn, target_names=iris.target_names))


=== sklearn GaussianNB ===
Accuracy: 0.9111111111111111

Confusion matrix:
 [[15  0  0]
 [ 0 14  1]
 [ 0  3 12]]

Classification report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.82      0.93      0.88        15
   virginica       0.92      0.80      0.86        15

    accuracy                           0.91        45
   macro avg       0.92      0.91      0.91        45
weighted avg       0.92      0.91      0.91        45



In [5]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#Define model and parameter grid
knn = KNeighborsClassifier()

# Try different values of K
param_grid = {'n_neighbors': np.arange(1, 21)}   # K from 1 to 20

# Apply GridSearchCV
grid = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)

# Check best parameters and accuracy
print("Best K value:", grid.best_params_)
print("Best Cross-validation Accuracy:", round(grid.best_score_, 4))

# Evaluate on the test data
best_knn = grid.best_estimator_
y_pred = best_knn.predict(X_test)

print("\nTest Set Accuracy:", round(accuracy_score(y_test, y_pred), 4))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=iris.target_names))

Best K value: {'n_neighbors': np.int64(14)}
Best Cross-validation Accuracy: 0.9714

Test Set Accuracy: 0.9556

Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.88      1.00      0.94        15
   virginica       1.00      0.87      0.93        15

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45

