In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
import numpy as np


In [3]:
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# Train a baseline KNN model
knn_baseline = KNeighborsClassifier()
knn_baseline.fit(X_train, y_train)
y_pred_base = knn_baseline.predict(X_test)

# Evaluate baseline model
print("Baseline KNN Performance")
print(classification_report(y_test, y_pred_base))


Baseline KNN Performance
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [11]:
# Define the grid of hyperparameters
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

# GridSearchCV with 5-fold cross-validation
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Evaluate the best model
best_knn_grid = grid_search.best_estimator_
y_pred_grid = best_knn_grid.predict(X_test)
print("GridSearchCV KNN Performance")
print(f"Best Parameters: {grid_search.best_params_}")
print(classification_report(y_test, y_pred_grid))


GridSearchCV KNN Performance
Best Parameters: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [15]:
# Define the parameter distributions
param_dist = {
    'n_neighbors': np.arange(1, 31),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

# RandomizedSearchCV with 10 iterations
random_search = RandomizedSearchCV(KNeighborsClassifier(), param_distributions=param_dist,
                                   n_iter=10, cv=5, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

# Evaluate the best model
best_knn_random = random_search.best_estimator_
y_pred_random = best_knn_random.predict(X_test)
print("RandomizedSearchCV KNN Performance")
print(f"Best Parameters: {random_search.best_params_}")
print(classification_report(y_test, y_pred_random))


RandomizedSearchCV KNN Performance
Best Parameters: {'weights': 'distance', 'n_neighbors': 17, 'metric': 'minkowski'}
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

