In [1]:
# KNN using Pipeline

In [70]:
import pandas as pd
import numpy as np

In [71]:
df = pd.read_csv('KNN_Project_Data')

In [72]:
df.head()

Unnamed: 0,XVPM,GWYH,TRAT,TLLZ,IGGA,HYKR,EDFS,GUUB,MGJM,JHZC,TARGET CLASS
0,1636.670614,817.988525,2565.995189,358.347163,550.417491,1618.870897,2147.641254,330.727893,1494.878631,845.136088,0
1,1013.40276,577.587332,2644.141273,280.428203,1161.873391,2084.107872,853.404981,447.157619,1193.032521,861.081809,1
2,1300.035501,820.518697,2025.854469,525.562292,922.206261,2552.355407,818.676686,845.491492,1968.367513,1647.186291,1
3,1059.347542,1066.866418,612.000041,480.827789,419.467495,685.666983,852.86781,341.664784,1154.391368,1450.935357,0
4,1018.340526,1313.679056,950.622661,724.742174,843.065903,1370.554164,905.469453,658.118202,539.45935,1899.850792,0


In [73]:
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix,classification_report

In [74]:
X = df.drop('TARGET CLASS',axis=1)
y = df['TARGET CLASS']
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [75]:
# Creating a Pipeline

In [76]:
pipe = Pipeline([
    ('scaler',StandardScaler()),
    ('knn', KNeighborsClassifier(n_neighbors=30))
])

In [77]:
# Define the parameter grid
param_grid = {
    'knn__n_neighbors': range(1, 40),  # Test values from 1 to 40
}

In [78]:
grid_search = GridSearchCV(pipe,param_grid,cv=5,scoring='accuracy')

In [79]:
grid_search.fit(X_train,y_train)

In [80]:
grid_search.best_params_

{'knn__n_neighbors': 32}

In [81]:
best_model = grid_search.best_estimator_

In [82]:
best_model

In [83]:
predictions = best_model.predict(X_test)

In [84]:
print(confusion_matrix(y_test,predictions))

[[125  27]
 [ 25 123]]


In [85]:
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

           0       0.83      0.82      0.83       152
           1       0.82      0.83      0.83       148

    accuracy                           0.83       300
   macro avg       0.83      0.83      0.83       300
weighted avg       0.83      0.83      0.83       300

