# Heart Disease Prediction

In [2]:
#importing all the essential libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import mean_squared_error
import ipywidgets as widgets
from IPython.display import display

# Loading and preparation of data

In [4]:
# Loading all the dataset
Heart_data=pd.read_csv("heart.csv")

In [5]:
Heart_data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [6]:
Heart_data['Age'].min()

28

In [7]:
Heart_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    object 
 2   ChestPainType   918 non-null    object 
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    object 
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    object 
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    object 
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 86.2+ KB


In [8]:
Heart_data.shape

(918, 12)

In [9]:
Heart_data.select_dtypes("object").head()

Unnamed: 0,Sex,ChestPainType,RestingECG,ExerciseAngina,ST_Slope
0,M,ATA,Normal,N,Up
1,F,NAP,Normal,N,Flat
2,M,ATA,ST,N,Up
3,F,ASY,Normal,Y,Flat
4,M,NAP,Normal,N,Up


In [10]:
Heart_data.nunique()

Age                50
Sex                 2
ChestPainType       4
RestingBP          67
Cholesterol       222
FastingBS           2
RestingECG          3
MaxHR             119
ExerciseAngina      2
Oldpeak            53
ST_Slope            3
HeartDisease        2
dtype: int64

# Conversion of categorical data into numerical data

In [12]:
Label_encoder=LabelEncoder()
Heart_data["ExerciseAngina"]=Label_encoder.fit_transform(Heart_data["ExerciseAngina"])

In [13]:
# coverting categorical data into numerical data
Label_encoder=LabelEncoder()
Heart_data["Sex"]=Label_encoder.fit_transform(Heart_data["Sex"])

In [14]:
One_hot_encoder=OneHotEncoder(handle_unknown='ignore',sparse_output=False).set_output(transform = 'pandas')
ohe_transform=One_hot_encoder.fit_transform(Heart_data[["ChestPainType","RestingECG","ST_Slope"]])

In [15]:
Heart_Data= pd.concat([Heart_data , ohe_transform], axis=1).drop(columns=["ChestPainType","RestingECG","ST_Slope"])

In [16]:
Heart_Data.shape

(918, 19)

# Inspecting correlation between features

In [18]:
Correlation=Heart_Data.corr()
Correlation["HeartDisease"].drop(columns=["HeartDisease"])

Age                  0.282039
Sex                  0.305445
RestingBP            0.107589
Cholesterol         -0.232741
FastingBS            0.267291
MaxHR               -0.400421
ExerciseAngina       0.494282
Oldpeak              0.403951
HeartDisease         1.000000
ChestPainType_ASY    0.516716
ChestPainType_ATA   -0.401924
ChestPainType_NAP   -0.212964
ChestPainType_TA    -0.054790
RestingECG_LVH       0.010670
RestingECG_Normal   -0.091580
RestingECG_ST        0.102527
ST_Slope_Down        0.122527
ST_Slope_Flat        0.554134
ST_Slope_Up         -0.622164
Name: HeartDisease, dtype: float64

# Splitting data into training and testing set

In [20]:
Heart_Data_train, Heart_Data_test=train_test_split(Heart_Data,test_size=0.2, random_state=42)

In [21]:
target="HeartDisease"
Heart_Data_Label=Heart_Data_train[target]
Heart_Data_Train=Heart_Data_train.drop(columns=["HeartDisease"])


In [22]:
Heart_Data_Train["FastingBS"].max()

1

In [23]:
Heart_Data_Train.head()

Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
795,42,1,120,240,1,194,0,0.8,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
25,36,1,130,209,0,178,0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
84,56,1,150,213,1,125,1,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
10,37,0,130,211,0,142,0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
344,51,1,120,0,1,104,0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0


In [24]:
Target="HeartDisease"
Heart_Test_Label=Heart_Data_test[Target]
Heart_Data_Test=Heart_Data_test.drop(columns=["HeartDisease"])

# Model Building

# Logistic Regression

In [27]:
model=make_pipeline(
  StandardScaler(),
  LogisticRegression(solver='liblinear',C=10, random_state=42)

)


In [28]:
model.fit(Heart_Data_Train, Heart_Data_Label )

In [29]:
some_data=Heart_Data.drop(columns=["HeartDisease"]).iloc[:10]
some_data_label=Heart_Data["HeartDisease"].iloc[:10]
list(some_data_label)

[0, 1, 0, 1, 0, 0, 0, 0, 1, 0]

In [30]:
p_prediction=model.predict_proba(some_data)
prediction=model.predict(some_data)
score=model.score(Heart_Data_Train, Heart_Data_Label)
Confusion_matrix=confusion_matrix(some_data_label, prediction)
report=classification_report(some_data_label, prediction)
prediction

array([0, 0, 0, 1, 0, 0, 0, 0, 1, 0], dtype=int64)

In [31]:
score

0.8719346049046321

In [32]:
Confusion_matrix

array([[7, 0],
       [1, 2]], dtype=int64)

In [33]:
prediction=model.predict(Heart_Data_Test)
score=model.score(Heart_Data_Test, Heart_Test_Label)
Confusion_matrix=confusion_matrix(Heart_Test_Label, prediction)
report=classification_report(Heart_Test_Label, prediction)
prediction

array([0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1,
       1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1,
       0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 1, 1, 0, 1], dtype=int64)

In [34]:
score

0.8532608695652174

In [35]:
Confusion_matrix

array([[67, 10],
       [17, 90]], dtype=int64)

# Knn

In [37]:
knn=make_pipeline(
  StandardScaler(),
  KNeighborsClassifier(n_neighbors=3)

)


In [38]:
knn.fit(Heart_Data_Train, Heart_Data_Label )

In [39]:
prediction=knn.predict(Heart_Data_Test)
score=knn.score(Heart_Data_Test, Heart_Test_Label)
Confusion_matrix=confusion_matrix(Heart_Test_Label, prediction)
report=classification_report(Heart_Test_Label, prediction)
prediction

array([0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1,
       1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 1, 0, 0, 1, 1, 0, 1], dtype=int64)

In [40]:
score

0.8315217391304348

In [41]:
Confusion_matrix

array([[69,  8],
       [23, 84]], dtype=int64)

# Communication of result

In [43]:
# Styling the interactive Widgets
age_slider = widgets.IntSlider(value=30, min=1, max=100, description='Age:')
sex_dropdown = widgets.Dropdown(options=['M', 'F'], value='M', description='Sex:')
resting_bp_slider = widgets.IntSlider(value=120, min=0, max=200, description='Resting BP:')
cholesterol_slider = widgets.IntSlider(value=200, min=0, max=400, description='Cholesterol:')
fasting_bs_checkbox = widgets.Checkbox(value=False, description='Fasting BS (1 if > 120 mg/dl):')
max_hr_slider = widgets.IntSlider(value=150, min=60, max=200, description='Max HR:')
exercise_angina_checkbox = widgets.Checkbox(value=False, description='Exercise Angina:')
oldpeak_slider = widgets.FloatSlider(value=0.0, min=-2.0, max=6.0, description='Oldpeak:')

# Chest Pain Type dropdown
chest_pain_type = widgets.Dropdown(options=['ASY', 'ATA', 'NAP', 'TA'], value='ASY', description='Chest Pain Type:')

# Resting ECG dropdown
resting_ecg = widgets.Dropdown(options=['Normal', 'ST', 'LVH'], value='Normal', description='Resting ECG:')

# ST Slope dropdown
st_slope = widgets.Dropdown(options=['Up', 'Flat', 'Down'], value='Flat', description='ST Slope:')

# Prediction Button
predict_button = widgets.Button(description='Predict')

# Display of results
output = widgets.Output()

def on_predict_button_click(b):
    print("Predict button clicked.")
    try:
        # Preparation of input data
        input_data = pd.DataFrame({
            'Age': [age_slider.value],
            'Sex': [1 if sex_dropdown.value == 'M' else 0],  # Change this to 'Sex'
            'RestingBP': [resting_bp_slider.value],
            'Cholesterol': [cholesterol_slider.value],
            'FastingBS': [1 if fasting_bs_checkbox.value else 0],
            'MaxHR': [max_hr_slider.value],
            'ExerciseAngina': [1 if exercise_angina_checkbox.value else 0],
            'Oldpeak': [oldpeak_slider.value],
            # Chest Pain Type features
            'ChestPainType_ASY': [1 if chest_pain_type.value == 'ASY' else 0],
            'ChestPainType_ATA': [1 if chest_pain_type.value == 'ATA' else 0],
            'ChestPainType_NAP': [1 if chest_pain_type.value == 'NAP' else 0],
            'ChestPainType_TA': [1 if chest_pain_type.value == 'TA' else 0],
            # Resting ECG features
            'RestingECG_LVH': [1 if resting_ecg.value == 'LVH' else 0],
            'RestingECG_Normal': [1 if resting_ecg.value == 'Normal' else 0],
            'RestingECG_ST': [1 if resting_ecg.value == 'ST' else 0],
            # ST Slope features
            'ST_Slope_Down': [1 if st_slope.value == 'Down' else 0],
            'ST_Slope_Flat': [1 if st_slope.value == 'Flat' else 0],
            'ST_Slope_Up': [1 if st_slope.value == 'Up' else 0]
        })
        
        # Print input data to verify correctness
        print("Putting data for prediction:", input_data)

        # Make prediction
        prediction = model.predict(input_data)
        
        # Output the result
        with output:
            output.clear_output()  # Clear previous outputs
            print(f'Heart Disease is predicted: {"Yes" if prediction[0] == 1 else "No"}')
    except Exception as e:
        with output:
            output.clear_output()
            print(f"An error has been occurred: {e}")

# click the button to check whether the heart disease is present or not
predict_button.on_click(on_predict_button_click)

# Output display
display(widgets.VBox([
    age_slider,
    sex_dropdown,
    resting_bp_slider,
    cholesterol_slider,
    fasting_bs_checkbox,
    max_hr_slider,
    exercise_angina_checkbox,
    oldpeak_slider,
    chest_pain_type,
    resting_ecg,
    st_slope,
    predict_button,
    output  
]))


VBox(children=(IntSlider(value=30, description='Age:', min=1), Dropdown(description='Sex:', options=('M', 'F')…