In [28]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 
%matplotlib inline 
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [3]:
X,y = make_classification(
    n_samples = 1000,
    n_features= 3,
    n_redundant= 1,
    n_classes= 2,
    random_state= 999
)

In [6]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42,test_size=0.3)

In [8]:
X

array([[-0.33504974,  0.02852654,  1.16193084],
       [-1.37746253, -0.4058213 ,  0.44359618],
       [-1.04520026, -0.72334759, -3.10470423],
       ...,
       [-0.75602574, -0.51816111, -2.20382324],
       [ 0.56066316, -0.07335845, -2.15660348],
       [-1.87521902, -1.11380394, -4.04620773]])

In [9]:
y

array([0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1,
       0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0,

In [11]:
knc = KNeighborsClassifier()

In [30]:
param_grid = {
    'n_neighbors': [i for i in range(5, 10)],
    'algorithm': ['auto','kd_tree','ball_tree','brute'],
    'leaf_size': [30, 40, 50],
    'p': [1, 2],
    'n_jobs': [-1]
}


In [31]:
grid = GridSearchCV(knc,param_grid,cv =5)

In [32]:
grid.fit(X_train,y_train)

In [34]:
grid.best_params_

{'algorithm': 'auto', 'leaf_size': 30, 'n_jobs': -1, 'n_neighbors': 9, 'p': 1}

In [35]:
grid.best_score_

0.9014285714285715

In [36]:
y_pred = grid.predict(X_test)

In [37]:
print(accuracy_score(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))
print(classification_report(y_pred,y_test))

0.9133333333333333
[[143  15]
 [ 11 131]]
              precision    recall  f1-score   support

           0       0.93      0.91      0.92       158
           1       0.90      0.92      0.91       142

    accuracy                           0.91       300
   macro avg       0.91      0.91      0.91       300
weighted avg       0.91      0.91      0.91       300



### Key Insights

* K-Nearest Neighbour(KNN) model acheived an accuracy of 91.33% on the test set , showing overall strong performance

* The Confusion matrix revelas that the model classifies both classes (0 and 1) accurately with only a small number of missclassifications:
    * False Positives : 15
    * False Negatives : 11
* Precision and Recall are both 90% for both classes, indicating the model is  balanced and not biased towards any particualr class.
* F1-Score is around 0.91 for both classes.The model classifies both classes fairly.
* The hyperparameters were tuned using GridSearchCV, improving the model's ability to generalize unseen data.