# Machine Learning: kNN Algorithm
- kNN - K Nearest Neighbours Algorithm.

In [99]:
import pandas as pd
from sklearn.metrics import precision_score, accuracy_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

## Loaded & Read the Dataset:

In [100]:
heart_df = pd.read_csv("heart.csv")
heart_df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


## Split Data:

In [101]:
X = heart_df.drop("target", axis = 1)
y = heart_df["target"]

In [102]:
X
y

0      1
1      1
2      1
3      1
4      1
      ..
298    0
299    0
300    0
301    0
302    0
Name: target, Length: 303, dtype: int64

## Train, Test and Split:

In [103]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size = 0.2,
    random_state = 42
)

## Scaling Data using StandardScaler:

In [104]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [105]:
X_train_scaled

array([[-1.35679832,  0.72250438,  0.00809909, ...,  0.95390513,
        -0.68970073, -0.50904773],
       [ 0.38508599,  0.72250438, -0.97189094, ...,  0.95390513,
        -0.68970073,  1.17848036],
       [-0.92132724,  0.72250438,  0.98808912, ..., -0.69498803,
        -0.68970073, -0.50904773],
       ...,
       [ 1.58263146,  0.72250438,  1.96807914, ..., -0.69498803,
         0.32186034, -0.50904773],
       [-0.92132724,  0.72250438, -0.97189094, ...,  0.95390513,
        -0.68970073,  1.17848036],
       [ 0.92942484, -1.38407465,  0.00809909, ...,  0.95390513,
         1.33342142, -0.50904773]])

## Creating a kNN Model for Scaled Data:

In [106]:
knn_classifier = KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(X_train_scaled, y_train)

## Calculatig Predections based on Model Created:

In [107]:
y_pred = knn_classifier.predict(X_test_scaled)

## Evalution Matrices:

In [108]:
precision_score = precision_score(y_test, y_pred)
accuracy_score = accuracy_score(y_test, y_pred)
recall_score = recall_score(y_test, y_pred)

print(f"Precoison Score: {precision_score}")
print(f"Accuracy Score: {accuracy_score}")
print(f"Recall Score: {recall_score}")

Precoison Score: 0.9259259259259259
Accuracy Score: 0.8524590163934426
Recall Score: 0.78125


## Making Predections for k=n Values:

In [109]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score, accuracy_score, recall_score

knn_classifier = KNeighborsClassifier(n_neighbors=7)
knn_classifier.fit(X_train_scaled, y_train)

y_pred = knn_classifier.predict(X_test_scaled)

precision = precision_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f"Precision Score: {precision}")
print(f"Accuracy Score: {accuracy}")
print(f"Recall Score: {recall}")


Precision Score: 0.9354838709677419
Accuracy Score: 0.9180327868852459
Recall Score: 0.90625


## Using Cross Validation: GridSearchCV
- For Hyperparamater Tuning.

In [110]:
classifier = KNeighborsClassifier()
param_grid = {"n_neighbors": [3, 5, 7, 9]}

classifierCV_model = GridSearchCV(
    classifier,
    param_grid,
    cv = 5
)

## Creating a Model for the GridSearchCV:

In [111]:
classifierCV_model.fit(X_train_scaled, y_train)

## Calculatig Predections based on Model Created:

In [112]:
y_pred = classifierCV_model.predict(X_test_scaled)

## Evalution Matrices:

In [113]:
precision_score = precision_score(y_test, y_pred)
accuracy_score = accuracy_score(y_test, y_pred)
recall_score = recall_score(y_test, y_pred)

print(f"Precoison Score: {precision_score}")
print(f"Accuracy Score: {accuracy_score}")
print(f"Recall Score: {recall_score}")

Precoison Score: 0.9333333333333333
Accuracy Score: 0.9016393442622951
Recall Score: 0.875


## Best Parameter & Results:

In [114]:
res = pd.DataFrame(classifierCV_model.cv_results_)
print(res)

   mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0       0.002364      0.000780         0.007305        0.001825   
1       0.001716      0.000094         0.005852        0.000178   
2       0.001408      0.000233         0.004864        0.000602   
3       0.001268      0.000090         0.004655        0.000313   

   param_n_neighbors              params  split0_test_score  \
0                  3  {'n_neighbors': 3}           0.816327   
1                  5  {'n_neighbors': 5}           0.775510   
2                  7  {'n_neighbors': 7}           0.755102   
3                  9  {'n_neighbors': 9}           0.734694   

   split1_test_score  split2_test_score  split3_test_score  split4_test_score  \
0           0.795918           0.854167           0.750000           0.812500   
1           0.836735           0.833333           0.770833           0.854167   
2           0.857143           0.812500           0.750000           0.833333   
3           0.836735    