In [5]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import plotly.io as pio
import seaborn as sns
from sklearn import preprocessing, linear_model, metrics, model_selection
import warnings
warnings.filterwarnings("ignore")
pio.renderers.default = "notebook"

In [6]:
!jupyter nbextension enable --py widgetsnbextension --sys-prefix
!jupyter serverextension enable voila --sys-prefix

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: ok
Enabling: voila
- Writing config: C:\Users\brady\AppData\Local\Programs\Python\Python310\etc\jupyter
    - Validating...
      voila 0.4.0 ok


In [7]:
df = pd.read_csv("heart.csv")

def prepare_dataset(df):
    # Turning all object values to a string
    string_cols = df.select_dtypes(include="object").columns
    df[string_cols] = df[string_cols].astype("string")

    # Converting categorical data into dummy values
    df_preped = pd.get_dummies(df, columns=string_cols, drop_first=False)
    # Setting the target heart disease column to the end
    df_preped.drop("HeartDisease", axis=1, inplace=True)
    df_preped = pd.concat([df_preped, df["HeartDisease"]], axis=1)

    return df_preped

df_preped = prepare_dataset(df)

In [8]:
def create_button(description, function, clear_output=True):
    layout = widgets.Layout(width="auto", height="40px")
    button = widgets.Button(description=description, layout=layout, button_style="primary")
    output = widgets.Output()

    def on_button_click(b):
        with output:
            if len(output.outputs) == 0:
                button.disabled = True
                function()
                button.disabled = False
            elif clear_output:
                output.clear_output()
            else:
                button.disabled = True
                output.clear_output()
                function()
                button.disabled = False

    button.on_click(on_button_click)

    display(button)
    display(output)

# Heart Disease Prediction Model Based on Logistic Regression

<br>
<br>

## Plot showing how each data point correlates to the existence of heart disease
Negative numbers represent a lack of correlation and positive numbers represent the presence of a correlation between the two datapoints

In [9]:

def correlation_plot():
    fig = px.imshow(df.corr(), title="Correlation Plot of the Heart Disease Prediction")
    fig.show()

create_button("Show Correlation Plot", correlation_plot)

Button(description='Show Correlation Plot', layout=Layout(height='40px', width='auto'), style=ButtonStyle())

Output()

<br>

## Heart disease distribution between male and female

In [None]:
def heart_disease_distribution_plot():
    fig = px.histogram(df,
                       x="HeartDisease",
                       color="Sex",
                       hover_data=df.columns,
                       title="Heart Disease Distribution by Sex",
                       barmode="group")
    fig.show()

create_button("Show Heart Disease Distribution Plot", heart_disease_distribution_plot)

<br>

## Ratio of male to female participants in the dataset

In [None]:
def male_female_ratio_plot():
    fig = px.histogram(df,
                       x="Sex",
                       hover_data=df.columns,
                       title="Sex Ratio Data")
    fig.show()

create_button("Show Male to Female Ratio Plot", male_female_ratio_plot)

<br>

## Distribution of chest pain types within the dataset

In [None]:
def chest_pain_types_plot():
    fig = px.histogram(df,
                       x="ChestPainType",
                       color="Sex",
                       hover_data=df.columns,
                       title="Chest Pain Type Distribution")
    fig.show()

create_button("Show Chest Pain Types Plot", chest_pain_types_plot)

<br>

## Distribution of max heart rate within the dataset

In [None]:
def max_heart_rate_plot():
    fig = px.histogram(df,
                       x="MaxHR",
                       color="Sex",
                       hover_data=df.columns,
                       title="Max Heart Rate Levels Distribution",)
    fig.update_layout(bargap=0.2)
    fig.show()

create_button("Show Max Heart Rate Plot", max_heart_rate_plot)

<br>

## Distribution of cholesterol levels within the dataset

In [None]:
def cholesterol_levels_plot():
    fig = px.histogram(df,
                       x="Cholesterol",
                       color="Sex",
                       hover_data=df.columns,
                       title="Cholesterol Levels Distribution",)
    fig.update_layout(bargap=0.2)
    fig.show()

create_button("Show Cholesterol Levels Plot", cholesterol_levels_plot)

<br>

## Distribution of resting blood pressure levels within the dataset

In [None]:
def resting_blood_pressure_plot():
    fig = px.histogram(df,
                       x="RestingBP",
                       color="Sex",
                       hover_data=df.columns,
                       title="Resting Blood Pressure Levels Distribution",)
    fig.show()

create_button("Show Resting Blood Pressure Plot", resting_blood_pressure_plot)

<br>

## Relationships between every datapoint in the dataset

In [None]:
def data_relationships_plot():
    plt.figure(figsize=(15, 10))
    hue = "HeartDisease"
    g = sns.pairplot(df, hue=hue, palette="husl")
    plt.title("Looking for Insights in Data")
    plt.legend(title=hue, loc="lower right", handles=g._legend_data.values())
    plt.tight_layout()
    plt.show()

create_button("Show Data Relationships Plot", data_relationships_plot)

<br>

## Frequency polygon histogram describing the distributions for every datapoint

In [None]:
def data_distribution_plot():
    plt.figure(figsize=(15, 10))
    for i, col in enumerate(df.columns, 1):
        plt.subplot(4, 3, i)
        plt.title(f"Distribution of {col} Data")
        sns.histplot(df[col], kde=True)
        plt.tight_layout()
    plt.show()

create_button("Show Data Distribution Plot", data_distribution_plot)

In [12]:
age = 50
sex = "M"
chest_pain_type = "ATA"
resting_bp = 100
cholesterol = 100
fasting_bs = 0
resting_ecg = "Normal"
max_hr = 100
exercise_angina = "N"
oldpeak = 0
st_slope = "Flat"

number_of_folds = 10

<br>

## Here you can input values for each datapoint and recieve a heart disease prediction based on the inputted information

In [None]:
print("What sex were you assigned at birth?")
w = widgets.Dropdown(options=[("Male", "M"),
                                  ("Female", "F")
                                  ])
def on_change(change):
    global sex
    sex = change["new"]

w.observe(on_change, names="value")
display(w)

In [None]:
print("How old are you?")
w = widgets.BoundedIntText(
    min=0,
    max=125,
    step=1,
)

def on_change(change):
    global age
    age = change["new"]

w.observe(on_change, names="value")
display(w)

In [None]:
print("Which of the following types of chest pain do you have?")
w = widgets.Dropdown(options=[("Typical Angina", "TA"),
                          ("Atypical Angina", "ATA"),
                          ("Non-Anginal Pain", "NAP"),
                          ("Asymptomatic", "ASY")
                          ])

def on_change(change):
    global chest_pain_type
    chest_pain_type = change["new"]

w.observe(on_change, names="value")
display(w)

In [None]:
print("What is your resting blood pressure? (mm/Hg)")
w = widgets.BoundedIntText(
    min=0,
    max=400,
    step=1
)

def on_change(change):
    global resting_bp
    resting_bp = change["new"]

w.observe(on_change, names="value")
display(w)

In [None]:
print("What are your serum cholesterol levels? (mm/dl)")
w = widgets.BoundedIntText(
    min=0,
    max=1000,
    step=1
)

def on_change(change):
    global cholesterol
    cholesterol = change["new"]

w.observe(on_change, names="value")
display(w)

In [None]:
print("Check the box if your fasting blood sugar is greater than 120 mm/dl")
w = widgets.Checkbox()

def on_change(change):
    global fasting_bs
    fasting_bs = change["new"]

w.observe(on_change, names="value")
display(w)

In [None]:
print("What are your resting electrocardiogram results?")
w = widgets.Dropdown(options=[("Normal", "Normal"),
                          ("Having ST-T wave abnormality", "ST"),
                          ("Showing probable or definite left ventricular hypertrophy by Estes' criteria", "LVH")
                          ])

def on_change(change):
    global resting_ecg
    resting_ecg = change["new"]

w.observe(on_change, names="value")
display(w)

In [None]:
print("What is the maximum heart rate you have achieved?")
w = widgets.BoundedIntText(
    min=60,
    max=202,
    step=1
)

def on_change(change):
    global max_hr
    max_hr = change["new"]

w.observe(on_change, names="value")
display(w)

In [None]:
print("Check the box if you are diagnosed with exercise-induced angina")
w = widgets.Checkbox()

def on_change(change):
    global exercise_angina
    exercise_angina = change["new"]

w.observe(on_change, names="value")
display(w)

In [None]:
print("What was your oldpeak measurement during the stress test? This is the measurement of ST segment depression on the electrocardiogram")
w = widgets.BoundedFloatText(
    min=-5.0,
    max=5.0,
    step=0.1
)

def on_change(change):
    global oldpeak
    oldpeak = change["new"]

w.observe(on_change, names="value")
display(w)

In [None]:
print("During the stress test, what was the slope of the ST segment on the electrocardiogram?")
w = widgets.Dropdown(options=["Up",
                          "Flat",
                          "Down"
                          ])

def on_change(change):
    global st_slope
    st_slope = change["new"]

w.observe(on_change, names="value")
display(w)

<br>

### Click the button below to see the odds that you have heart disease
You will see that all prodvided metrics will change every time you click the button.<br>
This is because each time the model is retrained on a different distribution of test and training data each time the button is clicked.<br><br>

#### Here you can select the number of folds that the model will run through. The more folds there are the more accurate your prediction will be,
#### though the speed that you recieve your prediction may take a hit

In [None]:
w = widgets.IntSlider(
    value=100,
    min=1,
    max=1000,
    step=1
)

def on_change(change):
    global number_of_folds
    number_of_folds = change["new"]

w.observe(on_change, names="value")
display(w)

In [None]:
def get_user_input():
    # Asigning user values to a new dataframe
    df_input = pd.DataFrame(columns=df_preped.columns)
    df_input["Age"] = [age]

    sex_map = {"M": (1, 0), "F": (0, 1)}
    df_input[["Sex_M", "Sex_F"]] = sex_map.get(sex, (0, 0))

    chest_pain_type_map = {
        "ASY": (1, 0, 0, 0),
        "ATA": (0, 1, 0, 0),
        "NAP": (0, 0, 1, 0),
        "TA": (0, 0, 0, 1)
    }
    df_input["ChestPainType_ASY"], df_input["ChestPainType_ATA"], df_input["ChestPainType_NAP"], df_input[
        "ChestPainType_TA"] = chest_pain_type_map.get(chest_pain_type, (0, 0, 0, 0))

    df_input["RestingBP"] = [resting_bp]

    df_input["Cholesterol"] = [cholesterol]

    df_input["FastingBS"] = [fasting_bs]

    resting_ecg_map = {
        "LVH": (1, 0, 0),
        "Normal": (0, 1, 0),
        "ST": (0, 0, 1)
    }

    df_input["RestingECG_LVH"], df_input["RestingECG_Normal"], df_input["RestingECG_ST"] = resting_ecg_map.get(
        resting_ecg, (0, 0, 0))

    df_input["MaxHR"] = [max_hr]

    exercise_angina_map = {
        "N": (1, 0),
        "Y": (0, 1)
    }
    df_input["ExerciseAngina_N"], df_input["ExerciseAngina_Y"] = exercise_angina_map.get(exercise_angina, (0, 0))

    df_input["Oldpeak"] = [oldpeak]

    st_slope_map = {
        "Down": (1, 0, 0),
        "Flat": (0, 1, 0),
        "Up": (0, 0, 1)
    }
    df_input["ST_Slope_Down"], df_input["ST_Slope_Flat"], df_input["ST_Slope_Up"] = st_slope_map.get(st_slope,
                                                                                                     (0, 0, 0))
    df_input["HeartDisease"] = [-1]  # -1 is just a filler value representing no known diagnosis

    return df_input

In [15]:
def heart_disease_prediction():
    df_input = get_user_input()
    accuracy = 0
    precision = 0
    recall = 0
    f1_score = 0
    prediction = 0

    for i in range(number_of_folds):
        # Using min max scaling to normalize training/test data
        scaler = preprocessing.MinMaxScaler()
        scaler.fit(df_preped)
        df_minmax = scaler.transform(df_preped)
        df_minmax = pd.DataFrame(df_minmax, columns=df_preped.columns)

        # Assigning all dependent and independent test and train datasets
        x = df_minmax.iloc[:, :-1]
        y = df_minmax.iloc[:, -1]
        x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.3)

        # Fitting Logistic Regression Model
        logistic_regression_model = linear_model.LogisticRegression(max_iter=1000)
        logistic_regression_model.fit(x_train, y_train)
        logistic_regression_prediction = logistic_regression_model.predict(x_test)
        accuracy += metrics.accuracy_score(y_test, logistic_regression_prediction) * 100
        precision += metrics.precision_score(y_test, logistic_regression_prediction) * 100
        recall += metrics.recall_score(y_test, logistic_regression_prediction) * 100
        f1_score += metrics.f1_score(y_test, logistic_regression_prediction) * 100

        # Prepping and applying minmax scaling on user inputted data
        df_new = prepare_dataset(df_input)
        df_minmax = scaler.transform(df_new)
        df_minmax = pd.DataFrame(df_minmax, columns=df_new.columns)

        # Making final prediction
        prediction += int(logistic_regression_model.predict(df_minmax.drop(columns=["HeartDisease"])))

    accuracy = round(accuracy / number_of_folds, 2)
    precision = round(precision / number_of_folds, 2)
    recall = round(recall / number_of_folds, 2)
    f1_score = round(f1_score / number_of_folds, 2)

    prediction = round((prediction / number_of_folds) * 100, 2)

    if prediction == 1:
        prediction_result = "The model predicts that you have heart disease,"
    else:
        prediction_result = "The model predicts that you do not have heart disease,"

    print("The model predicts that you have an " + str(prediction) + "% chance to have heart disease with an accuracy of " + str(accuracy) + "%, a precision of " + str(precision) +
          "%, a recall of " + str(recall) + "%, and an f1 score of " + str(f1_score) + "%")

create_button("View Prediction", heart_disease_prediction, False)

Click the button below to see the odds that you have heart disease


Button(description='View Prediction', layout=Layout(height='40px', width='auto'), style=ButtonStyle())

Output()

### How are my results measured?
(Note that each result represents the average between all folds ran, thus, fewer fold values will produce less consistent results
#### Chance of Heart Disease
This number represents the percentage that heart disease might exist based on the user inputted values.
#### Accuracy
Accuracy measures the percentage of correctly predicted labels among all the labels in the test dataset.
#### Precision
Precision measures the percentage of correctly predicted positive labels (true positives) among all the predicted positive labels (true positives and false positives)
#### Recall
Recall measures the percentage of correctly predicted positive labels (true positives) among all the actual positive labels (true positives and false negatives).
#### F1 Score
F1 score is the harmonic mean of precision and recall. It is a measure of the balance between precision and recall.

In [1]:
!pip freeze > requirements.txt