## Importing Required Modules

In [None]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.metrics import recall_score,precision_score,f1_score

## Loading Dataset

In [3]:
df = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


## Train Test Split

In [4]:
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,0:-1],df.iloc[:,-1],test_size=0.2,random_state=2)

## Training and Prediction
- Selecting model from LogisticRegression and DecisionTreeClassifier using Classifications Metrics

In [6]:
clf1 = LogisticRegression()
clf2 = DecisionTreeClassifier()

clf1.fit(X_train,y_train)
clf2.fit(X_train,y_train)

In [8]:
y_pred1 = clf1.predict(X_test)
y_pred2 = clf2.predict(X_test)

## **Accuracy Score**

In [9]:
print("Accuracy of Logistic Regression",accuracy_score(y_test,y_pred1))
print("Accuracy of Decision Trees",accuracy_score(y_test,y_pred2))

Accuracy of Logistic Regression 0.9016393442622951
Accuracy of Decision Trees 0.8360655737704918


## **Confusion Matrix**

In [11]:
print("Logistic Regression Confusion Matrix\n")
pd.DataFrame(confusion_matrix(y_test,y_pred1),columns=list(range(0,2)))

Logistic Regression Confusion Matrix



Unnamed: 0,0,1
0,26,6
1,0,29


In [12]:
print("Decision Tree Confusion Matrix\n")
pd.DataFrame(confusion_matrix(y_test,y_pred2),columns=list(range(0,2)))

Decision Tree Confusion Matrix



Unnamed: 0,0,1
0,24,8
1,2,27


## Predictions from both models

In [14]:
result = pd.DataFrame()
result['Actual Label'] = y_test
result['Logistic Regression Prediction'] = y_pred1
result['Decision Tree Prediction'] = y_pred2

result.sample(10)

Unnamed: 0,Actual Label,Logistic Regression Prediction,Decision Tree Prediction
257,0,0,0
65,1,1,1
164,1,1,1
29,1,1,1
296,0,1,0
184,0,0,0
161,1,1,1
292,0,0,0
53,1,1,1
147,1,1,1


## **precision_score, recall_score & f1_score**

In [16]:
print("For Logistic regression Model")
print("-"*50)
cdf = pd.DataFrame(confusion_matrix(y_test,y_pred1),columns=list(range(0,2)))
print(cdf)
print("-"*50)
print("Precision - ",precision_score(y_test,y_pred1))
print("Recall - ",recall_score(y_test,y_pred1))
print("F1 score - ",f1_score(y_test,y_pred1))

For Logistic regression Model
--------------------------------------------------
    0   1
0  26   6
1   0  29
--------------------------------------------------
Precision -  0.8285714285714286
Recall -  1.0
F1 score -  0.90625


In [17]:
print("For DT Model")
print("-"*50)
cdf = pd.DataFrame(confusion_matrix(y_test,y_pred2),columns=list(range(0,2)))
print(cdf)
print("-"*50)
print("Precision - ",precision_score(y_test,y_pred2))
print("Recall - ",recall_score(y_test,y_pred2))
print("F1 score - ",f1_score(y_test,y_pred2))

For DT Model
--------------------------------------------------
    0   1
0  24   8
1   2  27
--------------------------------------------------
Precision -  0.7714285714285715
Recall -  0.9310344827586207
F1 score -  0.8437500000000001


## Precision and Recall
Behind the seans the precision and recall will be calculated from both yes and no class

In [18]:
precision_score(y_test,y_pred1,average=None)

array([1.        , 0.82857143])

In [19]:
precision_score(y_test,y_pred2,average=None)

array([0.92307692, 0.77142857])

In [21]:
recall_score(y_test,y_pred2,average=None)

array([0.75      , 0.93103448])