In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
from sklearn.datasets import make_classification
X,y=make_classification(n_samples=1000,n_features=3,n_redundant=1,n_classes=2,random_state=999)

In [3]:
X

array([[-0.33504974,  0.02852654,  1.16193084],
       [-1.37746253, -0.4058213 ,  0.44359618],
       [-1.04520026, -0.72334759, -3.10470423],
       ...,
       [-0.75602574, -0.51816111, -2.20382324],
       [ 0.56066316, -0.07335845, -2.15660348],
       [-1.87521902, -1.11380394, -4.04620773]])

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.33, random_state=42)

In [8]:
from sklearn.neighbors import KNeighborsClassifier

In [9]:
classifier=KNeighborsClassifier(n_neighbors=5,algorithm='auto')
classifier.fit(X_train,y_train)

In [10]:
y_pred=classifier.predict(X_test)

In [11]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [12]:
print(confusion_matrix(y_pred,y_test))
print(accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))

[[158  20]
 [ 11 141]]
0.906060606060606
              precision    recall  f1-score   support

           0       0.93      0.89      0.91       178
           1       0.88      0.93      0.90       152

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



## Hyperparameter tuning

In [14]:
from sklearn.model_selection import GridSearchCV

In [15]:
param_grid={'n_neighbors':[1,2,3,4,5,6,7,8,9],
            'p':[1,2],
            'weights':['uniform','distance']
    
}

In [16]:
grid=GridSearchCV(KNeighborsClassifier(),param_grid,refit=True,verbose=3)

In [17]:
grid.fit(X_train,y_train)

Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV 1/5] END n_neighbors=1, p=1, weights=uniform;, score=0.881 total time=   0.0s
[CV 2/5] END n_neighbors=1, p=1, weights=uniform;, score=0.896 total time=   0.0s
[CV 3/5] END n_neighbors=1, p=1, weights=uniform;, score=0.881 total time=   0.0s
[CV 4/5] END n_neighbors=1, p=1, weights=uniform;, score=0.881 total time=   0.0s
[CV 5/5] END n_neighbors=1, p=1, weights=uniform;, score=0.881 total time=   0.0s
[CV 1/5] END n_neighbors=1, p=1, weights=distance;, score=0.881 total time=   0.0s
[CV 2/5] END n_neighbors=1, p=1, weights=distance;, score=0.896 total time=   0.0s
[CV 3/5] END n_neighbors=1, p=1, weights=distance;, score=0.881 total time=   0.0s
[CV 4/5] END n_neighbors=1, p=1, weights=distance;, score=0.881 total time=   0.0s
[CV 5/5] END n_neighbors=1, p=1, weights=distance;, score=0.881 total time=   0.0s
[CV 1/5] END n_neighbors=1, p=2, weights=uniform;, score=0.873 total time=   0.0s
[CV 2/5] END n_neighbors=1, p=2

[CV 3/5] END n_neighbors=9, p=2, weights=uniform;, score=0.896 total time=   0.0s
[CV 4/5] END n_neighbors=9, p=2, weights=uniform;, score=0.903 total time=   0.0s
[CV 5/5] END n_neighbors=9, p=2, weights=uniform;, score=0.896 total time=   0.0s
[CV 1/5] END n_neighbors=9, p=2, weights=distance;, score=0.925 total time=   0.0s
[CV 2/5] END n_neighbors=9, p=2, weights=distance;, score=0.873 total time=   0.0s
[CV 3/5] END n_neighbors=9, p=2, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5] END n_neighbors=9, p=2, weights=distance;, score=0.858 total time=   0.0s
[CV 5/5] END n_neighbors=9, p=2, weights=distance;, score=0.888 total time=   0.0s


In [18]:
grid.best_params_

{'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}

In [20]:
grid_pred=grid.predict(X_test)

In [23]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
print(accuracy_score(grid_pred,y_test))
print(classification_report(grid_pred,y_test))
print(confusion_matrix(grid_pred,y_test))

0.9121212121212121
              precision    recall  f1-score   support

           0       0.92      0.91      0.91       172
           1       0.90      0.92      0.91       158

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

[[156  16]
 [ 13 145]]
