# KNN 

In [33]:
import numpy as np 
import pandas as pd 
import seaborn as sns
%matplotlib inline
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification # for classification dataset
from sklearn.neighbors import KNeighborsClassifier # Classification Algo
from sklearn.metrics import accuracy_score , confusion_matrix, classification_report # to check accuracy of KNeighborsClassifier
from sklearn.model_selection import GridSearchCV # for model selection
from sklearn.datasets import make_regression # for Regression dataset
from sklearn.neighbors import KNeighborsRegressor # # Regression Algo
from sklearn.metrics import r2_score  # to check accuracy of KNeighborsRegressor
from sklearn.metrics import mean_absolute_error , mean_squared_error # to check error's 

## KNN Classifier

In [2]:
X , y = make_classification(
    n_samples=1000,
    n_features=3,
    n_redundant=1,
    n_classes=2,
    random_state=0
)

In [3]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.33, random_state=0)

In [4]:
knnClf = KNeighborsClassifier(n_neighbors=3,algorithm='auto',p=2 ,# this will use manhattan distance & I would set it 1 this would use euclidean distance
                              n_jobs=-1,weights='uniform'
                              )
knnClf

In [5]:
knnClf.fit(X_train,y_train)

In [6]:
y_pred = knnClf.predict(X_test)

In [7]:
accuracy_score(y_true=y_test,y_pred=y_pred)

0.9454545454545454

In [8]:
confusion_matrix(y_test,y_pred)

array([[150,   6],
       [ 12, 162]], dtype=int64)

In [9]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.93      0.96      0.94       156
           1       0.96      0.93      0.95       174

    accuracy                           0.95       330
   macro avg       0.95      0.95      0.95       330
weighted avg       0.95      0.95      0.95       330



## **Hyper parameter tuning**

In [10]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(
     X, y, test_size=0.33, random_state=0)

In [11]:
paramGrid = {
    'n_neighbors' : [2,3,4,5,6,7,8,10],
    'algorithm' : ('kd_tree','ball_tree'),
    'leaf_size' : [70,20,30,40,60] ,
    'p' : [1,2]
}

In [12]:
knnCV = GridSearchCV(estimator=KNeighborsClassifier(),param_grid=paramGrid,cv=10,verbose=3)

In [13]:
knnCV.fit(X_train1,y_train1)

Fitting 10 folds for each of 160 candidates, totalling 1600 fits
[CV 1/10] END algorithm=kd_tree, leaf_size=70, n_neighbors=2, p=1;, score=0.866 total time=   0.0s
[CV 2/10] END algorithm=kd_tree, leaf_size=70, n_neighbors=2, p=1;, score=0.955 total time=   0.0s
[CV 3/10] END algorithm=kd_tree, leaf_size=70, n_neighbors=2, p=1;, score=0.940 total time=   0.0s
[CV 4/10] END algorithm=kd_tree, leaf_size=70, n_neighbors=2, p=1;, score=0.970 total time=   0.0s
[CV 5/10] END algorithm=kd_tree, leaf_size=70, n_neighbors=2, p=1;, score=0.970 total time=   0.0s
[CV 6/10] END algorithm=kd_tree, leaf_size=70, n_neighbors=2, p=1;, score=0.940 total time=   0.0s
[CV 7/10] END algorithm=kd_tree, leaf_size=70, n_neighbors=2, p=1;, score=0.985 total time=   0.0s
[CV 8/10] END algorithm=kd_tree, leaf_size=70, n_neighbors=2, p=1;, score=0.925 total time=   0.0s
[CV 9/10] END algorithm=kd_tree, leaf_size=70, n_neighbors=2, p=1;, score=0.940 total time=   0.0s
[CV 10/10] END algorithm=kd_tree, leaf_size=

In [14]:
knnCV.best_params_

{'algorithm': 'kd_tree', 'leaf_size': 70, 'n_neighbors': 10, 'p': 2}

In [15]:
knnCVPred = KNeighborsClassifier(n_neighbors=10,leaf_size=70,algorithm= 'kd_tree',p=2)
knnCVPred.fit(X_train1,y_train1)
y_pred_knnCVPred = knnCVPred.predict(X_test1)

In [24]:
y_pred_knnCVPred

array([0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
       0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0,

In [16]:
print(accuracy_score(y_true=y_test1,y_pred=y_pred_knnCVPred))
print(confusion_matrix(y_test1,y_pred_knnCVPred))
print(classification_report(y_test,y_pred_knnCVPred))

0.9454545454545454
[[153   3]
 [ 15 159]]
              precision    recall  f1-score   support

           0       0.91      0.98      0.94       156
           1       0.98      0.91      0.95       174

    accuracy                           0.95       330
   macro avg       0.95      0.95      0.95       330
weighted avg       0.95      0.95      0.95       330



### Accuracy has not changed but confusion metrix is changed

# KNN Regresssor

In [17]:
X,y = make_regression(n_samples=1000,n_features=2,noise=10,random_state=0)

In [18]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(
     X, y, test_size=0.33, random_state=0)

In [38]:
knnRegressor = KNeighborsRegressor(n_neighbors=7,n_jobs=-1,algorithm='ball_tree')
knnRegressor.fit(X_train2,y_train2)
y_pred_regressor = knnRegressor.predict(X_test2)

In [39]:
mse = mean_squared_error(y_test,y_pred_regressor)
mae = mean_absolute_error(y_test,y_pred_regressor)
rmse = np.sqrt(mse)
print(mse)
print(mae)
print(rmse)

2732.8745581570242
43.15244952318916
52.27690272153682


In [40]:
score_r2 = r2_score(y_test,y_pred)
score_r2

0.7811671087533156

In [41]:
# Ajdusted R Squared
score_Adjusted_r2 =1-(1-score_r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
score_Adjusted_r2

0.7791533091406161