In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
heart = pd.read_csv('framingham.csv')
heart.head()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [3]:
heart = heart.dropna()

In [4]:
X = heart[['age', 'currentSmoker', 'totChol', 'BMI', 'heartRate']]
Y = heart['TenYearCHD']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

In [7]:
#Building logistic regression model
logit_md = LogisticRegression().fit(X_train, Y_train)

#Predicting on the test
logit_pred = logit_md.predict_proba(X_test)[:, 1]
print(logit_pred)

#Changing likelihood to labels
logit_label = np.where(logit_pred < .25, 0, 1)
print(logit_label)

[0.17143375 0.06376488 0.17797851 0.11187937 0.05980697 0.47384848
 0.05782981 0.04547949 0.06480492 0.10561954 0.04686762 0.20042632
 0.05384309 0.24517088 0.09617257 0.06354917 0.2132576  0.22966433
 0.05346649 0.14681548 0.12570814 0.10097184 0.07242684 0.06929242
 0.18590444 0.07438256 0.28989511 0.10756095 0.1686409  0.22649028
 0.0304228  0.0392927  0.20452149 0.0746653  0.1716945  0.12598792
 0.15103858 0.0692716  0.12877571 0.28276013 0.1258476  0.23165741
 0.0861352  0.1551017  0.08161641 0.1729931  0.18090168 0.05169135
 0.11902479 0.102377   0.27113905 0.0312669  0.11576708 0.20457117
 0.11299628 0.25329403 0.06405781 0.07584767 0.24198822 0.05602348
 0.12314536 0.06089094 0.1293156  0.07608858 0.10291241 0.22731285
 0.05528134 0.2237354  0.18821749 0.3167566  0.16619888 0.06086079
 0.20884332 0.2417454  0.03339242 0.06482258 0.3811771  0.38912797
 0.04383139 0.18425855 0.08238504 0.15303984 0.13634456 0.3269469
 0.14027661 0.06271022 0.17558384 0.14787977 0.26904755 0.26035

In [8]:
#Confusion matrix
confusion_matrix(Y_test, logit_label)

array([[533,  91],
       [ 64,  44]], dtype=int64)

In [9]:
#Classification report
print(classification_report(Y_test, logit_label))

              precision    recall  f1-score   support

           0       0.89      0.85      0.87       624
           1       0.33      0.41      0.36       108

    accuracy                           0.79       732
   macro avg       0.61      0.63      0.62       732
weighted avg       0.81      0.79      0.80       732

