In [40]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

In [41]:
data=pd.read_csv('/content/heart.csv')

In [42]:
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [43]:
x=data.drop(columns=['target'],axis=1)
y=data['target']
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)

In [44]:
knnclassifier=KNeighborsClassifier(n_neighbors=3)
knnclassifier.fit(x_train,y_train)
y_pred=knnclassifier.predict(x_test)


print("accuracy :",accuracy_score(y_test,y_pred))
print("precision :",precision_score(y_test,y_pred))
print(" recall :",recall_score(y_test,y_pred))
print("f1-score :",f1_score(y_test,y_pred))

accuracy : 0.8524590163934426
precision : 0.9259259259259259
 recall : 0.78125
f1-score : 0.847457627118644


In [45]:
# for k=5
knnclassifier=KNeighborsClassifier(n_neighbors=5)
knnclassifier.fit(x_train,y_train)
y_pred=knnclassifier.predict(x_test)


print("accuracy :",accuracy_score(y_test,y_pred))
print("precision :",precision_score(y_test,y_pred))
print(" recall :",recall_score(y_test,y_pred))
print("f1-score :",f1_score(y_test,y_pred))

accuracy : 0.9016393442622951
precision : 0.9333333333333333
 recall : 0.875
f1-score : 0.9032258064516129


In [46]:
# for k=7
knnclassifier=KNeighborsClassifier(n_neighbors=7)
knnclassifier.fit(x_train,y_train)
y_pred=knnclassifier.predict(x_test)


print("accuracy :",accuracy_score(y_test,y_pred))
print("precision :",precision_score(y_test,y_pred))
print(" recall :",recall_score(y_test,y_pred))
print("f1-score :",f1_score(y_test,y_pred))


# it is great

accuracy : 0.9180327868852459
precision : 0.9354838709677419
 recall : 0.90625
f1-score : 0.9206349206349206


In [47]:
cm=confusion_matrix(y_test,y_pred)
print(cm)

[[27  2]
 [ 3 29]]


In [48]:
# for k=9
knnclassifier=KNeighborsClassifier(n_neighbors=9)
knnclassifier.fit(x_train,y_train)
y_pred=knnclassifier.predict(x_test)


print("accuracy :",accuracy_score(y_test,y_pred))
print("precision :",precision_score(y_test,y_pred))
print(" recall :",recall_score(y_test,y_pred))
print("f1-score :",f1_score(y_test,y_pred))


# it is not good as accuracy is decrease

accuracy : 0.9016393442622951
precision : 0.9333333333333333
 recall : 0.875
f1-score : 0.9032258064516129


## CROSS VALIDATION TO FIND BEST VALUE OF K

In [49]:
from sklearn.model_selection import GridSearchCV

classifier=KNeighborsClassifier()
param_grid={'n_neighbors':[3,5,7,9]}
classifier_cv=GridSearchCV(
    classifier,
    param_grid,
    cv=5,
    scoring='recall'
)
classifier_cv.fit(x_train,y_train)
y_pred=classifier_cv.predict(x_test)



print("accuracy :",accuracy_score(y_test,y_pred))
print("precision :",precision_score(y_test,y_pred))
print(" recall :",recall_score(y_test,y_pred))
print("f1-score :",f1_score(y_test,y_pred))


accuracy : 0.9180327868852459
precision : 0.9354838709677419
 recall : 0.90625
f1-score : 0.9206349206349206


In [50]:
res=pd.DataFrame(classifier_cv.cv_results_)
print(res)

   mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0       0.001768      0.000448         0.005471        0.001691   
1       0.001431      0.000097         0.004211        0.000090   
2       0.001340      0.000047         0.004197        0.000049   
3       0.001541      0.000347         0.004772        0.000403   

   param_n_neighbors              params  split0_test_score  \
0                  3  {'n_neighbors': 3}           0.851852   
1                  5  {'n_neighbors': 5}           0.777778   
2                  7  {'n_neighbors': 7}           0.814815   
3                  9  {'n_neighbors': 9}           0.777778   

   split1_test_score  split2_test_score  split3_test_score  split4_test_score  \
0           0.814815           0.962963           0.884615           0.807692   
1           0.814815           0.925926           0.923077           0.846154   
2           0.925926           0.925926           0.846154           0.846154   
3           0.888889    

In [51]:
print(res[["param_n_neighbors","mean_test_score"]])

   param_n_neighbors  mean_test_score
0                  3         0.864387
1                  5         0.857550
2                  7         0.871795
3                  9         0.856980


In [52]:
print(classifier_cv.best_params_)

{'n_neighbors': 7}


In [53]:
data1=pd.read_csv('/content/heart.csv')

In [54]:
X=data.drop(columns=['target'],axis=1)
Y=data['target']

In [55]:
from sklearn.pipeline import Pipeline

x_train,x_test,y_train,y_test=train_test_split(
    X,Y,
    test_size=0.2,
    random_state=42
)
pipe=Pipeline(
    [('scaler',StandardScaler()),
     ('knn',KNeighborsClassifier())
    ]

)
param_grid={'knn__n_neighbors':[3,5,7,9]}
classifier_cv=GridSearchCV(
    pipe,
    param_grid,
    cv=5,
    scoring='recall'
)
classifier_cv.fit(x_train,y_train)
y_pred=classifier_cv.predict(x_test)


print("accuracy :",accuracy_score(y_test,y_pred))
print("precision :",precision_score(y_test,y_pred))
print(" recall :",recall_score(y_test,y_pred))
print("f1-score :",f1_score(y_test,y_pred))


accuracy : 0.9180327868852459
precision : 0.9354838709677419
 recall : 0.90625
f1-score : 0.9206349206349206


In [56]:
res=pd.DataFrame(classifier_cv.cv_results_)
print(res)

   mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0       0.005319      0.001201         0.006690        0.001158   
1       0.004824      0.000542         0.006520        0.000905   
2       0.004738      0.000293         0.005969        0.000102   
3       0.004425      0.000139         0.005772        0.000070   

   param_knn__n_neighbors                   params  split0_test_score  \
0                       3  {'knn__n_neighbors': 3}           0.851852   
1                       5  {'knn__n_neighbors': 5}           0.777778   
2                       7  {'knn__n_neighbors': 7}           0.851852   
3                       9  {'knn__n_neighbors': 9}           0.851852   

   split1_test_score  split2_test_score  split3_test_score  split4_test_score  \
0           0.777778           0.925926           0.884615           0.807692   
1           0.814815           0.925926           0.884615           0.846154   
2           0.888889           0.925926           0.884

In [62]:
print(res[["param_knn__n_neighbors","mean_test_score"]])

   param_knn__n_neighbors  mean_test_score
0                       3         0.849573
1                       5         0.849858
2                       7         0.871795
3                       9         0.856410


In [59]:
print(classifier_cv.best_params_)

{'knn__n_neighbors': 7}
