In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [3]:
from sklearn.datasets import make_classification

In [5]:
X, y = make_classification(
    n_samples=1000,
    n_features=3,
    n_redundant=1,
    n_classes=2,
    random_state=999
)

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.33, random_state=42)

In [8]:
from sklearn.neighbors import KNeighborsClassifier

In [9]:
classifier = KNeighborsClassifier(n_neighbors=5, algorithm="auto")
classifier.fit(X_train, y_train)

In [10]:
y_pred = classifier.predict(X_test)

In [11]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [13]:
print(confusion_matrix(y_pred, y_test))
print(accuracy_score(y_pred, y_test))
print(classification_report(y_pred, y_test))

[[158  20]
 [ 11 141]]
0.906060606060606
              precision    recall  f1-score   support

           0       0.93      0.89      0.91       178
           1       0.88      0.93      0.90       152

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



## Task 
- Find best k value applying a for loop

In [32]:
k = [5,6,7,8,9,10,11,12,13,14,15]
accuracy = {}
for i in k:
    classifier = KNeighborsClassifier(n_neighbors=i, weights="uniform", algorithm="auto", p=2)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    accuracy[i] = accuracy_score(y_pred, y_test)
print(accuracy)

{5: 0.906060606060606, 6: 0.896969696969697, 7: 0.9151515151515152, 8: 0.9090909090909091, 9: 0.9121212121212121, 10: 0.906060606060606, 11: 0.9030303030303031, 12: 0.9030303030303031, 13: 0.9030303030303031, 14: 0.9030303030303031, 15: 0.906060606060606}


In [33]:
from sklearn.model_selection import GridSearchCV

In [46]:
params = {
    "n_neighbors" : [5,6,7,8,9,10,11,12,13,14,15],
    "weights" : ["uniform", "distance"],
    "algorithm" : ["auto"],
    "p" : [1,2]
}

In [47]:
hyperPT = GridSearchCV(estimator=KNeighborsClassifier(), param_grid=params, cv = 3, scoring="accuracy")

In [48]:
hyperPT.fit(X_train, y_train)

In [49]:
hyperPT.best_params_

{'algorithm': 'auto', 'n_neighbors': 15, 'p': 2, 'weights': 'uniform'}

In [50]:
y_pred1 = hyperPT.predict(X_test)

In [51]:
accuracy_score(y_pred1, y_test)

0.906060606060606

## KNN Regressor

In [59]:
from sklearn.datasets import make_regression

In [64]:
X,y = make_regression(n_samples=1000, n_features=2, noise=10, random_state=42)

In [68]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.33, random_state=42)

In [69]:
from sklearn.neighbors import KNeighborsRegressor

In [70]:
regressor = KNeighborsRegressor(n_neighbors=6)
regressor.fit(X_train, y_train)

In [71]:
y_pred = regressor.predict(X_test)

In [72]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [77]:
r2_score(y_test, y_pred)

0.9189275159979495

In [81]:
params = {
    "n_neighbors" : list(range(5,20)),
    "weights" : ["uniform", "distance"],
    "algorithm" : ["auto"],
    "p" : [1,2]
}

In [82]:
HPT = GridSearchCV(estimator=KNeighborsRegressor(), param_grid=params, scoring="accuracy", cv=5)

In [83]:
HPT.fit(X_train, y_train)

Traceback (most recent call last):
  File "C:\Users\theha\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 977, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\theha\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\metrics\_scorer.py", line 253, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\theha\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\metrics\_scorer.py", line 350, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\theha\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\utils\_param_validation.py

In [85]:
HPT.best_params_

{'algorithm': 'auto', 'n_neighbors': 5, 'p': 1, 'weights': 'uniform'}

In [87]:
y_pred = HPT.predict(X_test)

In [88]:
r2_score(y_test, y_pred)

0.9151879125172033