# Machine Learning: kNN Algorithm
- kNN - K Nearest Neighbours Algorithm.

In [177]:
import pandas as pd
from sklearn.metrics import precision_score, accuracy_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

## Loaded & Read the Dataset:

In [178]:
heart_df = pd.read_csv("heart.csv")
heart_df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


## Split Data:

In [179]:
X = heart_df.drop("target", axis = 1)
y = heart_df["target"]

In [180]:
X
y

0      1
1      1
2      1
3      1
4      1
      ..
298    0
299    0
300    0
301    0
302    0
Name: target, Length: 303, dtype: int64

## Train, Test and Split:

In [181]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size = 0.2,
    random_state = 42
)

## Scaling Data using StandardScaler:

In [182]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [183]:
X_train_scaled

array([[-1.35679832,  0.72250438,  0.00809909, ...,  0.95390513,
        -0.68970073, -0.50904773],
       [ 0.38508599,  0.72250438, -0.97189094, ...,  0.95390513,
        -0.68970073,  1.17848036],
       [-0.92132724,  0.72250438,  0.98808912, ..., -0.69498803,
        -0.68970073, -0.50904773],
       ...,
       [ 1.58263146,  0.72250438,  1.96807914, ..., -0.69498803,
         0.32186034, -0.50904773],
       [-0.92132724,  0.72250438, -0.97189094, ...,  0.95390513,
        -0.68970073,  1.17848036],
       [ 0.92942484, -1.38407465,  0.00809909, ...,  0.95390513,
         1.33342142, -0.50904773]])

## Creating a kNN Model for Scaled Data:

In [184]:
knn_classifier = KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(X_train_scaled, y_train)

## Calculatig Predections based on Model Created:

In [185]:
y_pred = knn_classifier.predict(X_test_scaled)

## Evalution Matrices:

In [186]:
precision_score = precision_score(y_test, y_pred)
accuracy_score = accuracy_score(y_test, y_pred)
recall_score = recall_score(y_test, y_pred)

print(f"Precoison Score: {precision_score}")
print(f"Accuracy Score: {accuracy_score}")
print(f"Recall Score: {recall_score}")

Precoison Score: 0.9259259259259259
Accuracy Score: 0.8524590163934426
Recall Score: 0.78125


## Making Predections for k=n Values:

In [187]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score, accuracy_score, recall_score

knn_classifier = KNeighborsClassifier(n_neighbors=7)
knn_classifier.fit(X_train_scaled, y_train)

y_pred = knn_classifier.predict(X_test_scaled)

precision = precision_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f"Precision Score: {precision}")
print(f"Accuracy Score: {accuracy}")
print(f"Recall Score: {recall}")


Precision Score: 0.9354838709677419
Accuracy Score: 0.9180327868852459
Recall Score: 0.90625


## Using Cross Validation: GridSearchCV
- For Hyperparamater Tuning.

In [188]:
classifier = KNeighborsClassifier()
param_grid = {"n_neighbors": [3, 5, 7, 9]}

classifierCV_model = GridSearchCV(
    classifier,
    param_grid,
    cv = 5,
    scoring = "recall"
)

## Creating a Model for the GridSearchCV:

In [189]:
classifierCV_model.fit(X_train_scaled, y_train)

## Calculatig Predections based on Model Created:

In [190]:
y_pred = classifierCV_model.predict(X_test_scaled)

## Evalution Matrices:

In [191]:
precision_score = precision_score(y_test, y_pred)
accuracy_score = accuracy_score(y_test, y_pred)
recall_score = recall_score(y_test, y_pred)

print(f"Precoison Score: {precision_score}")
print(f"Accuracy Score: {accuracy_score}")
print(f"Recall Score: {recall_score}")

Precoison Score: 0.9354838709677419
Accuracy Score: 0.9180327868852459
Recall Score: 0.90625


## Best Parameter & Results:

In [192]:
res = pd.DataFrame(classifierCV_model.cv_results_)
print(res)

   mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0       0.001565      0.000201         0.006072        0.000295   
1       0.001369      0.000221         0.005481        0.000183   
2       0.001306      0.000094         0.005833        0.000303   
3       0.001015      0.000031         0.005316        0.000151   

   param_n_neighbors              params  split0_test_score  \
0                  3  {'n_neighbors': 3}           0.851852   
1                  5  {'n_neighbors': 5}           0.777778   
2                  7  {'n_neighbors': 7}           0.814815   
3                  9  {'n_neighbors': 9}           0.777778   

   split1_test_score  split2_test_score  split3_test_score  split4_test_score  \
0           0.814815           0.962963           0.884615           0.807692   
1           0.814815           0.925926           0.923077           0.846154   
2           0.925926           0.925926           0.846154           0.846154   
3           0.888889    

In [193]:
print(res["param_n_neighbors"])

0    3
1    5
2    7
3    9
Name: param_n_neighbors, dtype: int64


In [194]:
print(res[["param_n_neighbors", "mean_test_score"]])

   param_n_neighbors  mean_test_score
0                  3         0.864387
1                  5         0.857550
2                  7         0.871795
3                  9         0.856980


In [195]:
print(classifierCV_model.best_params_)

{'n_neighbors': 7}


## Using Pipeline:
- A sequence of data transformers with an optional final predictor.
- Pipeline allows you to sequentially apply a list of transformers to preprocess the data and, if desired, conclude the sequence with a final predictor for predictive modeling.

## Train, Test and Split:

In [196]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size = 0.2,
    random_state = 42
)

## Creating a Pipeline for the Train, Test and Split Data:

In [197]:
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier())
])

In [198]:
pipe

In [203]:
param_grid = {"knn__n_neighbors": [3, 5, 7, 9]}

classifierCV_model = GridSearchCV(
    pipe,
    param_grid,
    cv = 5,
    scoring = "recall"
)

classifierCV_model.fit(X_train, y_train)

In [204]:
y_pred = classifierCV_model.predict(X_test)

In [205]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score, accuracy_score, recall_score

precision_score = precision_score(y_test, y_pred)
accuracy_score = accuracy_score(y_test, y_pred)
recall_score = recall_score(y_test, y_pred)

print(f"Precoison Score: {precision_score}")
print(f"Accuracy Score: {accuracy_score}")
print(f"Recall Score: {recall_score}")

Precoison Score: 0.9354838709677419
Accuracy Score: 0.9180327868852459
Recall Score: 0.90625


In [206]:
print(classifierCV_model.best_params_)

{'knn__n_neighbors': 7}
