In [1]:
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('https://talentcocomedia.s3.amazonaws.com/ml-assets/heart-disease.csv')

In [3]:
y = df['target']

In [4]:
x = df.drop('target',axis=1)

In [5]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)

**GridSearchCV**

Logistic Regression

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
lr_model = LogisticRegression(penalty='l2')
val = list(np.arange(1, 10, 1))
parameters = {'solver':('newton-cg', 'liblinear', 'sag', 'saga'), 'C':val}
clf_lr = GridSearchCV(lr_model, parameters)
clf_lr.fit(x_train, y_train)



In [8]:
print(clf_lr.best_params_)

{'C': 2, 'solver': 'liblinear'}


In [9]:
test_pred = clf_lr.predict(x_test)
acc_lr_gs = accuracy_score(test_pred,y_test)
acc_lr_gs

0.7912087912087912

Previously we had got an accuracy of 80.21% which is now improved to 79.12%

KNeighborsClassifier

In [10]:
from sklearn.neighbors import KNeighborsClassifier

neigh = KNeighborsClassifier()

val = list(np.arange(1,30, 1))

parameters = {'n_neighbors':val}

clf_knn = GridSearchCV(neigh, parameters)
clf_knn.fit(x_train, y_train)

In [11]:
print(clf_knn.best_params_)

{'n_neighbors': 7}


In [12]:
test_pred_knn = clf_knn.predict(x_test)
accuracy_knn_gs = accuracy_score(test_pred_knn, y_test)
accuracy_knn_gs

0.6263736263736264

previously we had got an accuracy score of 62.63% which is now improved to 62.6% using GridSearchCV

DecisionTreeClassifier

In [13]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [14]:
clf = DecisionTreeClassifier(random_state=1024)

param_grid = {
              'max_features': ['auto', 'sqrt', 'log2'],
              'ccp_alpha': [0.1, .01, .001],
              'max_depth' : np.arange(5,10,1),
              'criterion' :['gini', 'entropy']
             }

clf_dtc = GridSearchCV(clf, param_grid=param_grid, cv=5, verbose=True)

clf_dtc.fit(x_train, y_train)
     
print(clf_dtc.best_params_)

Fitting 5 folds for each of 90 candidates, totalling 450 fits




{'ccp_alpha': 0.01, 'criterion': 'gini', 'max_depth': 6, 'max_features': 'auto'}




In [15]:
test_pred_dtc = clf_dtc.predict(x_test)
     

accuracy_dtc_gs = metrics.accuracy_score(y_test, test_pred_dtc)
accuracy_dtc_gs

0.7362637362637363

In [None]:
previously we had got an accuracy score of 64.83% which is improved to 73.62% using GridSearchCV

**CONFUSION MATRIX**

since we got a accuracy of 84% using Linear-Regression, we use its predicted accuracy to find the confusion matrix

Logistic Regression

In [16]:
from sklearn.metrics import confusion_matrix
confusion_matrix(test_pred,y_test)

array([[28,  9],
       [10, 44]])

In [17]:
from sklearn.metrics import classification_report

print(classification_report(y_test,test_pred))

              precision    recall  f1-score   support

           0       0.76      0.74      0.75        38
           1       0.81      0.83      0.82        53

    accuracy                           0.79        91
   macro avg       0.79      0.78      0.78        91
weighted avg       0.79      0.79      0.79        91



KNeighborsClassifier

In [18]:
confusion_matrix(test_pred_knn,y_test)

array([[23, 19],
       [15, 34]])

In [19]:
print(classification_report(y_test,test_pred_knn))

              precision    recall  f1-score   support

           0       0.55      0.61      0.57        38
           1       0.69      0.64      0.67        53

    accuracy                           0.63        91
   macro avg       0.62      0.62      0.62        91
weighted avg       0.63      0.63      0.63        91



DecisionTreeClassifier

In [20]:
confusion_matrix(test_pred_dtc,y_test)

array([[28, 14],
       [10, 39]])

In [21]:
print(classification_report(y_test,test_pred_dtc))

              precision    recall  f1-score   support

           0       0.67      0.74      0.70        38
           1       0.80      0.74      0.76        53

    accuracy                           0.74        91
   macro avg       0.73      0.74      0.73        91
weighted avg       0.74      0.74      0.74        91



DecisionTreeClassifier provides a better Recall along with accuracy