In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier


In [2]:
heart_df = pd.read_csv("heart.csv")
heart_df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
X = heart_df.drop("target", axis = 1)
y = heart_df["target"]

#Train-test-split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
#Cross validation for hyperparameter tuning using GridSearchCV

from sklearn.model_selection import GridSearchCV



classifier = KNeighborsClassifier()
param_grid = {"n_neighbors" : [3, 5, 7, 9]}

classifierCV = GridSearchCV(
    classifier,
    param_grid,
    cv = 5
)

classifierCV.fit(X_train_scaled, y_train)

y_pred = classifierCV.predict(X_test_scaled)
#Evaluatiion 
print("Recall score: ",recall_score(y_test, y_pred))
print("Accuracy Score : ",accuracy_score(y_test, y_pred))
print("precision Score : ",precision_score(y_test, y_pred))


#results

res = pd.DataFrame(classifierCV.cv_results_)

print(res[["param_n_neighbors", "mean_test_score"]])

print(classifierCV.best_params_)

Recall score:  0.875
Accuracy Score :  0.9016393442622951
precision Score :  0.9333333333333333
   param_n_neighbors  mean_test_score
0                  3         0.805782
1                  5         0.814116
2                  7         0.801616
3                  9         0.801786
{'n_neighbors': 5}


In [7]:
#pipeline

from sklearn.pipeline import Pipeline

#Train-test-split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 42
)



pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier())
])

param_grid = {"knn__n_neighbors" : [3, 5, 7, 9]}

classifierCV = GridSearchCV(
    pipeline,
    param_grid,
    cv = 5
)

classifierCV.fit(X_train, y_train)

y_pred = classifierCV.predict(X_test)
#Evaluatiion 
print("Recall score: ",recall_score(y_test, y_pred))
print("Accuracy Score : ",accuracy_score(y_test, y_pred))
print("precision Score : ",precision_score(y_test, y_pred))


print(classifierCV.best_params_)


Recall score:  0.875
Accuracy Score :  0.9016393442622951
precision Score :  0.9333333333333333
{'knn__n_neighbors': 5}
