In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
heart_data = pd.read_csv('/content/heart_disease_data.csv')

In [3]:
heart_data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [5]:
heart_data['target'].value_counts()

Unnamed: 0_level_0,count
target,Unnamed: 1_level_1
1,165
0,138


In [6]:
# 1 - defective
# 0 - healthy

In [7]:
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']

In [8]:
print (Y)

0      1
1      1
2      1
3      1
4      1
      ..
298    0
299    0
300    0
301    0
302    0
Name: target, Length: 303, dtype: int64


In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [10]:
print(X.shape, X_train.shape, X_test.shape)

(303, 13) (242, 13) (61, 13)


Model Training

In [11]:
model = LogisticRegression(max_iter=1000)

In [12]:
model.fit(X_train, Y_train)

Model Evaluation

In [13]:
#Accuracy Score on Training Data
from sklearn.metrics import accuracy_score

X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Accuracy on Training data : ', training_data_accuracy)

Accuracy on Training data :  0.8553719008264463


In [14]:
#Accuracy score on Test Data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy on Test data : ', test_data_accuracy)

Accuracy on Test data :  0.8032786885245902


In [15]:
#since there is not a major diffrence between the accuracy of test and training data , there is no issue of underfitting or an over fitting

**PRECISION**

In [16]:
#Precision is the ratio of number of True positive to the total number of Predicted Positive. It measure, out of the total predicted positive , how many are actually positive.

In [17]:
from sklearn.metrics import precision_score

In [18]:
precision_train = precision_score(X_train_prediction, Y_train)
precision_test = precision_score(X_test_prediction, Y_test)

print('Precision on Training data : ', precision_train)
print("------------------------------------------------")
print('Precision on Test data : ', precision_test)

Precision on Training data :  0.9242424242424242
------------------------------------------------
Precision on Test data :  0.8181818181818182


**Recall**

In [19]:
#Recall is the ratio of number of True positive to the total number of actual positive. It measures, out of the total actual positive, how many are predicted as True positive.

In [20]:
from sklearn.metrics import recall_score

In [21]:
recall_train = recall_score(X_train_prediction, Y_train)
recall_test = recall_score(X_test_prediction, Y_test)

print('Recall on Training data : ', recall_train)
print("------------------------------------------------")
print('Recall on Test data : ', recall_test)


Recall on Training data :  0.8299319727891157
------------------------------------------------
Recall on Test data :  0.8181818181818182


**F1_SCORE**

In [22]:
#F1 Score is an important evaluation metric for binary classification that combines Precision and Recall , F1 Score is the harmonin mean of Precision and Recall

In [23]:
from sklearn.metrics import f1_score

In [24]:
f1_score_train = f1_score(X_train_prediction, Y_train)
f1_score_test = f1_score(X_test_prediction, Y_test)

print('F1 Score on Training data : ', f1_score_train)
print("------------------------------------------------")
print('F1 Score on Test data : ', f1_score_test)


F1 Score on Training data :  0.8745519713261649
------------------------------------------------
F1 Score on Test data :  0.8181818181818182


Precision, Recall , & F1_Score FUNCTION

In [25]:
def precision_recall_f1_score(true_labels, pred_labels):
    precision = precision_score(true_labels, pred_labels)
    recall = recall_score(true_labels, pred_labels)
    f1 = f1_score(true_labels, pred_labels)

    print("Precision: {:.2f}".format(precision))
    print("Recall: {:.2f}".format(recall))
    print("F1 Score: {:.2f}".format(f1))

In [26]:
precision_recall_f1_score(X_train_prediction, Y_train)

Precision: 0.92
Recall: 0.83
F1 Score: 0.87


In [27]:
precision_recall_f1_score(X_test_prediction, Y_test)

Precision: 0.82
Recall: 0.82
F1 Score: 0.82
