In [1]:
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('https://talentcocomedia.s3.amazonaws.com/ml-assets/heart-disease.csv')


In [3]:
y = df['target']


In [4]:
x = df.drop('target',axis=1)


In [5]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)


#Logistic Regression - GridSearchCV


In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

lr_model = LogisticRegression(penalty='l2')

val = list(np.arange(1, 10, 1))

parameters = {'solver':('newton-cg', 'liblinear', 'sag', 'saga'), 'C':val}

clf_lr = GridSearchCV(lr_model, parameters)
clf_lr.fit(x_train, y_train)



In [7]:
print(clf_lr.best_params_)


{'C': 1, 'solver': 'liblinear'}


In [8]:
test_pred = clf_lr.predict(x_test)


In [9]:
acc_lr_gs = accuracy_score(test_pred,y_test)
acc_lr_gs

0.8901098901098901

Previously we had got an accuracy of 72.52%

but now it is improved to 84.61%

#KNeighborsClassifier - GridSearchCV


In [10]:
from sklearn.neighbors import KNeighborsClassifier

neigh = KNeighborsClassifier()

val = list(np.arange(1,30, 1))

parameters = {'n_neighbors':val}

clf_knn = GridSearchCV(neigh, parameters)
clf_knn.fit(x_train, y_train)

In [11]:
print(clf_knn.best_params_)


{'n_neighbors': 25}


In [12]:
test_pred_knn = clf_knn.predict(x_test)


In [13]:
accuracy_knn_gs = accuracy_score(test_pred_knn, y_test)
accuracy_knn_gs

0.6923076923076923

#DecisionTreeClassifier - GridSearchCV


In [14]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
     

In [15]:
clf = DecisionTreeClassifier(random_state=1024)

param_grid = {
              'max_features': ['auto', 'sqrt', 'log2'],
              'ccp_alpha': [0.1, .01, .001],
              'max_depth' : np.arange(5,10,1),
              'criterion' :['gini', 'entropy']
             }

clf_dtc = GridSearchCV(clf, param_grid=param_grid, cv=5, verbose=True)

clf_dtc.fit(x_train, y_train)

Fitting 5 folds for each of 90 candidates, totalling 450 fits




In [16]:
print(clf_dtc.best_params_)


{'ccp_alpha': 0.01, 'criterion': 'gini', 'max_depth': 5, 'max_features': 'auto'}


In [17]:
test_pred_dtc = clf_dtc.predict(x_test)


In [18]:
accuracy_dtc_gs = metrics.accuracy_score(y_test, test_pred_dtc)
accuracy_dtc_gs

0.7912087912087912

#CONFUSION MATRIX


now we had got a accuracy of 84.61% using Linear-Regression, we use its predicted accuracy to find the confusion matrix

###Logistic Regression


In [19]:
from sklearn.metrics import confusion_matrix
confusion_matrix(test_pred,y_test)

array([[35,  4],
       [ 6, 46]])

In [20]:
from sklearn.metrics import classification_report

print(classification_report(y_test,test_pred))

              precision    recall  f1-score   support

           0       0.90      0.85      0.88        41
           1       0.88      0.92      0.90        50

    accuracy                           0.89        91
   macro avg       0.89      0.89      0.89        91
weighted avg       0.89      0.89      0.89        91



###KNeighborsClassifier


In [21]:
confusion_matrix(test_pred_knn,y_test)

array([[27, 14],
       [14, 36]])

In [22]:
print(classification_report(y_test,test_pred_knn))


              precision    recall  f1-score   support

           0       0.66      0.66      0.66        41
           1       0.72      0.72      0.72        50

    accuracy                           0.69        91
   macro avg       0.69      0.69      0.69        91
weighted avg       0.69      0.69      0.69        91



###DecisionTreeClassifier


In [23]:
confusion_matrix(test_pred_dtc,y_test)


array([[30,  8],
       [11, 42]])

In [24]:
print(classification_report(y_test,test_pred_dtc))


              precision    recall  f1-score   support

           0       0.79      0.73      0.76        41
           1       0.79      0.84      0.82        50

    accuracy                           0.79        91
   macro avg       0.79      0.79      0.79        91
weighted avg       0.79      0.79      0.79        91



By looking at comparisions we can conclusion that DecisionTreeClassifier makes a better Recall along with accuracy when compared to other Modals