In [11]:
import pandas as pd
from sklearn.metrics import accuracy_score,precision_score, confusion_matrix,recall_score,f1_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
heart_df = pd.read_csv("./1-heart.csv");
X=heart_df.drop("target",axis=1);
Y = heart_df["target"];
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42)

scaler = StandardScaler();
X_train_scale = scaler.fit_transform(X_train);
X_test_scaler = scaler.transform(X_test);

model = KNeighborsClassifier(n_neighbors=3);
model.fit(X_train_scale,y_train)
y_pred = model.predict(X_test_scaler);


print("recall  score ",recall_score(y_test,y_pred));
print("accuracy score ",accuracy_score(y_test,y_pred));
print("precision score ",precision_score(y_test,y_pred));


recall  score  0.78125
accuracy score  0.8524590163934426
precision score  0.9259259259259259
f1 score  0.847457627118644


In [12]:
model = KNeighborsClassifier(n_neighbors=5);
model.fit(X_train_scale,y_train)
y_pred = model.predict(X_test_scaler);


print("recall  score ",recall_score(y_test,y_pred));
print("accuracy score ",accuracy_score(y_test,y_pred));
print("precision score ",precision_score(y_test,y_pred));

recall  score  0.875
accuracy score  0.9016393442622951
precision score  0.9333333333333333


In [13]:
# k=7 is the best k value
model = KNeighborsClassifier(n_neighbors=7);
model.fit(X_train_scale,y_train)
y_pred = model.predict(X_test_scaler);


print("recall  score ",recall_score(y_test,y_pred));
print("accuracy score ",accuracy_score(y_test,y_pred));
print("precision score ",precision_score(y_test,y_pred));

recall  score  0.90625
accuracy score  0.9180327868852459
precision score  0.9354838709677419


In [14]:
model = KNeighborsClassifier(n_neighbors=9);
model.fit(X_train_scale,y_train)
y_pred = model.predict(X_test_scaler);


print("recall  score ",recall_score(y_test,y_pred));
print("accuracy score ",accuracy_score(y_test,y_pred));
print("precision score ",precision_score(y_test,y_pred));

recall  score  0.875
accuracy score  0.9016393442622951
precision score  0.9333333333333333


In [24]:
#cross validation for hyperparam tuning using GridSearchCV
from sklearn.model_selection import GridSearchCV
classifier = KNeighborsClassifier()
param_grid={"n_neighbors":[3,5,7,9]}

classifierCV = GridSearchCV(classifier,param_grid,cv=5)
classifierCV.fit(X_train_scale,y_train);

y_pred = classifierCV.predict(X_test_scaler);
print("recall  score ",recall_score(y_test,y_pred));
print("accuracy score ",accuracy_score(y_test,y_pred));
print("precision score ",precision_score(y_test,y_pred));

res = pd.DataFrame(classifierCV.cv_results_)
print(res[["param_n_neighbors","mean_test_score"]])
print("Best Parameters:", classifierCV.best_params_)
print("Best Score:", classifierCV.best_score_)

# OUTPUT
# recall  score  0.875
# accuracy score  0.9016393442622951
# precision score  0.9333333333333333
#    param_n_neighbors  mean_test_score
# 0                  3         0.805782
# 1                  5         0.814116
# 2                  7         0.801616
# 3                  9         0.801786
# Best Parameters: {'n_neighbors': 5}
# Best Score: 0.8141156462585034


recall  score  0.875
accuracy score  0.9016393442622951
precision score  0.9333333333333333
   param_n_neighbors  mean_test_score
0                  3         0.805782
1                  5         0.814116
2                  7         0.801616
3                  9         0.801786
Best Parameters: {'n_neighbors': 5}
Best Score: 0.8141156462585034


In [30]:
# If we want to find hyperparam based on the recall_score 
# in previous we done based on accuracy
from sklearn.model_selection import GridSearchCV
classifier = KNeighborsClassifier()
param_grid={"n_neighbors":[3,5,7,9]}

classifierCV = GridSearchCV(
    classifier,
    param_grid,
    cv=5,
    scoring="recall")
classifierCV.fit(X_train_scale,y_train);

y_pred = classifierCV.predict(X_test_scaler);
print("recall  score ",recall_score(y_test,y_pred));
print("accuracy score ",accuracy_score(y_test,y_pred));
print("precision score ",precision_score(y_test,y_pred));

res = pd.DataFrame(classifierCV.cv_results_)
print(res[["param_n_neighbors","mean_test_score"]])
print("Best Parameters is :", classifierCV.best_params_)
print("Best Score is :", classifierCV.best_score_)

recall  score  0.90625
accuracy score  0.9180327868852459
precision score  0.9354838709677419
   param_n_neighbors  mean_test_score
0                  3         0.864387
1                  5         0.857550
2                  7         0.871795
3                  9         0.856980
Best Parameters is : {'n_neighbors': 7}
Best Score is : 0.8717948717948717


In [27]:
#Using Pipeline

from sklearn.pipeline import Pipeline
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42)
pipeline=Pipeline([('scaler',StandardScaler()),('knn',KNeighborsClassifier())]);
param_grid={"knn__n_neighbors":[3,5,7,9]}

classifierCV = GridSearchCV(
    pipeline,
    param_grid,
    cv=5,
    scoring="recall")
classifierCV.fit(X_train,y_train);

y_pred = classifierCV.predict(X_test);
print("recall  score ",recall_score(y_test,y_pred));
print("accuracy score ",accuracy_score(y_test,y_pred));
print("precision score ",precision_score(y_test,y_pred));


recall  score  0.90625
accuracy score  0.9180327868852459
precision score  0.9354838709677419
