In [1]:
!pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.7.0-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-25.7.0-py3-none-any.whl (26 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-25.7.0 scikit-optimize-0.10.2


In [18]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from scipy.stats import randint
from skopt.space import Integer, Categorical, Real
from skopt import BayesSearchCV

#Creating Dataset

In [3]:
x,y = make_classification(
    n_samples=1000,
    n_features=3,
     n_redundant=1,
    n_classes=2,
    random_state=999
)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.33, random_state=42)

#KNN Classifier

In [5]:
knc=KNeighborsClassifier(n_neighbors=5,algorithm='auto')

In [6]:
knc.fit(X_train,y_train)

In [7]:
y_pred=knc.predict(X_test)

In [8]:
print("Confusion Matrix")
print(confusion_matrix(y_pred,y_test))
print("Accuracy : ",accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))

Confusion Matrix
[[158  20]
 [ 11 141]]
Accuracy :  0.906060606060606
              precision    recall  f1-score   support

           0       0.93      0.89      0.91       178
           1       0.88      0.93      0.90       152

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



#Manual Hyperparameter Search (Brute Force Method)

In [9]:
best_score = 0
best_params = {}

for n in [3,5,7,9,11]:
    for w in ['uniform', 'distance']:
        for p in [1,2]:
            knn = KNeighborsClassifier(n_neighbors=n, weights=w, p=p)
            scores = cross_val_score(knn, X_train, y_train, cv=5)
            mean_score = scores.mean()
            if mean_score > best_score:
                best_score = mean_score
                best_params = {'n_neighbors': n, 'weights': w, 'p': p}

print("Best Params (Manual):", best_params)
print("Best CV Score (Manual):", best_score)

Best Params (Manual): {'n_neighbors': 9, 'weights': 'uniform', 'p': 2}
Best CV Score (Manual): 0.9029850746268657


#Grid Search CV

In [10]:
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11, 13, 15],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski'],
    'p': [1, 2]
}

In [11]:
grid = GridSearchCV(
    estimator=KNeighborsClassifier(),
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

In [12]:
grid.fit(X_train,y_train)

In [13]:
print("Best Params (GridSearch):", grid.best_params_)
print("Best Score (GridSearch):", grid.best_score_)

Best Params (GridSearch): {'metric': 'euclidean', 'n_neighbors': 9, 'p': 1, 'weights': 'uniform'}
Best Score (GridSearch): 0.9029850746268657


In [14]:
y_pred = grid.best_estimator_.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))

Test Accuracy: 0.9121212121212121


#Randomized Search CV

In [15]:
param_dist = {
    'n_neighbors': randint(1, 30),
    'weights': ['uniform', 'distance'],
    'p': randint(1, 3),
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

In [16]:
random_search = RandomizedSearchCV(
    estimator=KNeighborsClassifier(),
    param_distributions=param_dist,
    n_iter=20,
    scoring='accuracy',
    cv=5,
    random_state=42,
    n_jobs=-1
)

In [17]:
random_search.fit(X_train, y_train)
print("Best Params (RandomizedSearch):", random_search.best_params_)
print("Best Score (RandomizedSearch):", random_search.best_score_)

Best Params (RandomizedSearch): {'metric': 'minkowski', 'n_neighbors': 11, 'p': 2, 'weights': 'uniform'}
Best Score (RandomizedSearch): 0.9014925373134328


# Bayes Search CV

In [19]:
param_space = {
    'n_neighbors': Integer(1, 30),
    'weights': Categorical(['uniform', 'distance']),
    'algorithm': Categorical(['auto', 'ball_tree', 'kd_tree', 'brute']),
    'p': Integer(1, 2),      # 1 = Manhattan, 2 = Euclidean
    'leaf_size': Integer(10, 50)
}

In [20]:
bayes_search = BayesSearchCV(estimator=knc,search_spaces=param_space,n_iter=32,cv=5,n_jobs=-1,verbose=2,random_state=42)

In [21]:
bayes_search.fit(X_train,y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

In [22]:
print("Best Hyperparameters:", bayes_search.best_params_)
print("Best CV Accuracy:", bayes_search.best_score_)

Best Hyperparameters: OrderedDict({'algorithm': 'auto', 'leaf_size': 11, 'n_neighbors': 14, 'p': 1, 'weights': 'uniform'})
Best CV Accuracy: 0.9014925373134328


In [23]:
y_pred = bayes_search.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))

Test Accuracy: 0.9030303030303031
