In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Define the KNN model
model = KNeighborsClassifier()

# Set up the grid of hyperparameters to test
param_grid = {
    'n_neighbors': np.arange(1, 21),  # Test k values from 1 to 20
    'weights': ['uniform', 'distance'],  # Test both uniform and distance-based weighting
    'metric': ['euclidean', 'manhattan', 'minkowski']  # Test different distance metrics
}

# Set up GridSearchCV with cross-validation
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Perform hyperparameter tuning
grid_search.fit(X, y)

# Output the best hyperparameters and the best accuracy
print("Best hyperparameters:", grid_search.best_params_)
print("Best cross-validated accuracy:", grid_search.best_score_)


Best hyperparameters: {'metric': 'euclidean', 'n_neighbors': 10, 'weights': 'distance'}
Best cross-validated accuracy: 0.9866666666666667


* Dataset: Uses the iris dataset.
* Model: Sets up a KNN model (KNeighborsClassifier).
* Parameter Grid: Defines a grid for testing different n_neighbors values, weights, and metric options.
* This program tests:
* n_neighbors: The number of neighbors to consider (1 to 20).
* weights: Either uniform (all neighbors contribute equally) or distance (closer neighbors contribute more).
* metric: Different distance metrics (euclidean, manhattan, minkowski) to measure distances.
* Grid Search with Cross-Validation: GridSearchCV performs a grid search over the parameter grid with 5-fold cross-validation, scoring based on 
  accuracy.
* Best Parameters: After fitting, grid_search.best_params_ gives the optimal hyperparameter values, and grid_search.best_score_ gives the best cross- 
  validated accuracy score.