In [1]:
# Initial imports.
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from collections import Counter

In [2]:
#  Import and read CSV file
heart_df = pd.read_csv("../Data/framinghamid.csv")
heart_df.count()

patientid          4119
male               4119
age                4119
education          4016
currentsmoker      4119
cigsperday         4090
bpmeds             4119
prevalentstroke    4119
prevalenthyp       4119
diabetes           4119
totchol            4119
sysbp              4119
diabp              4119
bmi                4119
heartrate          4119
glucose            3776
tenyearchd         4119
dtype: int64

In [3]:
#  Clean imorted file for machine learning
heart_df = heart_df.drop(columns=["education", "glucose", "patientid"])
heart_df = heart_df.dropna()
heart_df.count()

male               4090
age                4090
currentsmoker      4090
cigsperday         4090
bpmeds             4090
prevalentstroke    4090
prevalenthyp       4090
diabetes           4090
totchol            4090
sysbp              4090
diabp              4090
bmi                4090
heartrate          4090
tenyearchd         4090
dtype: int64

In [4]:
#Seperating target(y) from features (x)
y = heart_df.tenyearchd
X = heart_df.drop(columns=["tenyearchd"], axis=1)

In [5]:
#split into test and training set
X_train, X_test, y_train, y_test = train_test_split(X,
   y, random_state=1)

Counter(y_train)

Counter({1: 465, 0: 2602})

In [6]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [7]:
#creating logisrical regression
classifier = LogisticRegression(solver='lbfgs', random_state=1)
classifier.fit(X_train_scaled, y_train)
classifier

LogisticRegression(random_state=1)

In [8]:
#Create confusion Matrix
predictions = classifier.predict(X_test_scaled)
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
   cm, index=["Actual 0", "Actual 1"],
   columns=["Predicted 0", "Predicted 1"]
)
display(cm_df)

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,869,8
Actual 1,134,12


In [9]:
# print Classification report
print("Classification Report")
print(classification_report(y_test, predictions))

Classification Report
              precision    recall  f1-score   support

           0       0.87      0.99      0.92       877
           1       0.60      0.08      0.14       146

    accuracy                           0.86      1023
   macro avg       0.73      0.54      0.53      1023
weighted avg       0.83      0.86      0.81      1023

