In [2]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

import time
import joblib

df = pd.read_csv('heart_disease_data.csv')
X = df.drop(['target'], axis = 1)
y = df.target

df_train = pd.read_csv('TrainingSet_df.csv')
df_test = pd.read_csv('TestingSet_df.csv')

X_train = df_train.drop(['target'], axis = 1)
X_test = df_test.drop(['target'], axis = 1)
y_train = df_train.target
y_test = df_test.target

kfold = KFold(n_splits = 10, shuffle = True, random_state = 0)

In [3]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [4]:
print("Logistic Regression w/o KFold\n")
str_time = time.time()
lr = LogisticRegression()
lr.fit(X_train, y_train)
etr_time = time.time()
print("Training Time: " + str(etr_time - str_time))

stt_time = time.time()
y_pred = lr.predict(X_test)
print('Accuracy:', lr.score(X_test, y_test))
print(classification_report(y_test, y_pred))
ett_time = time.time()
print("Testing Time: " + str(ett_time - stt_time)) 

Logistic Regression w/o KFold

Training Time: 0.007181644439697266
Accuracy: 0.8128205128205128
              precision    recall  f1-score   support

           0       0.80      0.78      0.79       176
           1       0.82      0.84      0.83       214

    accuracy                           0.81       390
   macro avg       0.81      0.81      0.81       390
weighted avg       0.81      0.81      0.81       390

Testing Time: 0.016127586364746094


In [5]:
print("Logistic Regression\n")
lr = LogisticRegression(max_iter = 1000)
#Max_Iterations to Remove MAX_ITER Warnings...
scores = []
for i, (train_index, test_index) in enumerate(kfold.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    scores.append(score)
    print('Accuracy',str(i+1),'=',score)
    print('Classification Report',str(i+1),':\n',classification_report(y_test, y_pred))

print('Average Accuracy:', sum(scores) / len(scores))
joblib.dump(lr,'LogisticReg_Model.joblib')

Logistic Regression

Accuracy 1 = 0.7899159663865546
Classification Report 1 :
               precision    recall  f1-score   support

           0       0.75      0.82      0.78        55
           1       0.83      0.77      0.80        64

    accuracy                           0.79       119
   macro avg       0.79      0.79      0.79       119
weighted avg       0.79      0.79      0.79       119

Accuracy 2 = 0.8571428571428571
Classification Report 2 :
               precision    recall  f1-score   support

           0       0.86      0.81      0.84        54
           1       0.85      0.89      0.87        65

    accuracy                           0.86       119
   macro avg       0.86      0.85      0.86       119
weighted avg       0.86      0.86      0.86       119

Accuracy 3 = 0.8067226890756303
Classification Report 3 :
               precision    recall  f1-score   support

           0       0.77      0.78      0.78        51
           1       0.84      0.82      

['LogisticReg_Model.joblib']

In [6]:
t_model = joblib.load('LogisticReg_Model.joblib')
def predictHeartDisease():
    print("Enter the Following Details:")
    age = int(input("Age: "))
    if (age <= 0 or age >= 100):
        print("INVALID INPUT!")
        return
    g = input("Sex (M/F): ")
    if (g == 'M' or g == 'm'):
        sex = 1
    elif (g == 'F' or g == 'f'):
        sex = 0
    else:
        print("INVALID INPUT!")
        return
    print("Chest Pain Type Codes:")
    print("1.Typical Angina\n2.Atypical Angina\n3.Non-Anginal Pain\n4.Asymptomatic")
    cpt = int(input("Chest Pain Type (1/2/3/4): "))
    if (cpt != 1 and cpt != 2 and cpt != 3 and cpt != 3 and cpt != 4):
        print("INVALID INPUT!")
        return
    rbps = int(input("Resting Blood Pressure (in mm Hg): "))
    if (rbps <= 50 or rbps >= 250):
        print("INVALID INPUT!")
        return
    chol = int(input("Serum Cholesterol (in mg/dl): "))
    if (chol < 0 or chol >= 2000):
        print("INVALID INPUT!")
        return
    bsugar = int(input("Fasting Blood Sugar (in mg/dl): "))
    if (bsugar < 0):
        print("INVALID INPUT!")
        return
    elif (bsugar <= 120):
        bsl = 0
    else:
        bsl = 1
    print("Resting Electrocardiogram Result Codes:")
    print("0.Normal\n1.ST-T Wave Abnormality\n2.Left Ventricular")
    rer = int(input("Resting Electrocardiogram Result (0/1/2): "))
    if (rer != 0 and rer != 1 and rer != 2):
        print("INVALID INPUT!")
        return
    maxhr = int(input("Maximum Heart Rate (71-202): "))
    if (maxhr > 205 or maxhr < 65):
        print("INVALID INPUT!")
        return
    g = input("Exercise Induced Angina (Y/N): ")
    if (g == 'Y' or g == 'y'):
        exeg = 1
    elif (g == 'N' or g == 'n'):
        exeg = 0
    else:
        print("INVALID INPUT!")
        return
    op = float(input("Oldpeak - ST: "))
    if (op > 10 or op < -10):
        print("INVALID INPUT!")
        return
    print("Slope of Peak Exercise ST Segment:")
    print("1.Upsloping\n2.Flat\n3.Downsloping")
    slst = int(input("Slope of Peak Exercise ST Segment (1/2/3): "))
    if (slst != 1 and slst != 2 and slst != 3):
        print("INVALID INPUT!")
        return
    test_data = {'1':[age], '2':[sex], '3':[cpt], '4':[rbps], '5':[chol], '6':[bsl], '7':[rer], '8':[maxhr], '9':[exeg], '10':[op], '11':[slst]}
    test = pd.DataFrame(test_data)
    test.columns = X.columns
    classRes = t_model.predict(test)
    if (classRes == 1):
        print("\nPatient has Heart Disease!")
        return
    elif (classRes == 0):
        print("\nPatient is Normal")
        return
    else:
        print("\nSome Anomaly Occured While Processing...")
        return

In [7]:
predictHeartDisease()

Enter the Following Details:
Age: 0
INVALID INPUT!
