In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [27]:
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_classes=2, n_redundant=1, n_features=3, random_state=999)

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [4]:
from sklearn.neighbors import KNeighborsClassifier

In [5]:
classifier = KNeighborsClassifier(n_neighbors=5, algorithm='auto')

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

In [6]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

print(confusion_matrix(y_test, y_pred))

print('--------------------------------------------------------')

print(accuracy_score(y_test, y_pred))

print('-------------------------------------------------------')
print(classification_report(y_test, y_pred))

[[158  11]
 [ 20 141]]
--------------------------------------------------------
0.906060606060606
-------------------------------------------------------
              precision    recall  f1-score   support

           0       0.89      0.93      0.91       169
           1       0.93      0.88      0.90       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



# Regressor

In [7]:
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=1000, noise=10, n_features=2, random_state=42)

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [9]:
from sklearn.neighbors import KNeighborsRegressor

In [10]:
regressor = KNeighborsRegressor(n_neighbors=5, algorithm='auto')

regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)

In [11]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

print(r2_score(y_test, y_pred))

print(mean_absolute_error(y_test, y_pred))

print(mean_squared_error(y_test, y_pred))

0.9155927746462499
9.269174435861805
132.70133826064995


# Hyperparameter Tuning

In [28]:
from sklearn.model_selection import GridSearchCV

import warnings
warnings.filterwarnings('ignore')

In [29]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [30]:
classifier = KNeighborsClassifier()

In [31]:
parameters = {
    'n_neighbors' : [1,2,3,4,5,6,7,8,9,10],
    'algorithm'  : ('auto', 'ball_tree', 'kd_tree'),
    'p' : [1,2]
    
}

In [35]:
clf = GridSearchCV(classifier,param_grid=parameters, cv=5, scoring='accuracy')

clf.fit(X_train, y_train)

In [37]:
clf.best_params_

{'algorithm': 'auto', 'n_neighbors': 9, 'p': 2}

In [38]:
classifier = KNeighborsClassifier(algorithm='auto', n_neighbors=9, p=2)

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

In [39]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

print(confusion_matrix(y_test, y_pred))

print('--------------------------------------------------------')

print(accuracy_score(y_test, y_pred))

print('-------------------------------------------------------')
print(classification_report(y_test, y_pred))

[[156  13]
 [ 16 145]]
--------------------------------------------------------
0.9121212121212121
-------------------------------------------------------
              precision    recall  f1-score   support

           0       0.91      0.92      0.91       169
           1       0.92      0.90      0.91       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

