In [1]:
# Import Dependencies
import pandas as pd
from path import Path

In [2]:
# Read the Data and Store it into a Pandas DataFrame
file_path = Path("Resources/heart_2020_cleaned.csv")
heart_df = pd.read_csv(file_path)
heart_df.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.6,Yes,No,No,3,30,No,Female,55-59,White,Yes,Yes,Very good,5,Yes,No,Yes
1,No,20.34,No,No,Yes,0,0,No,Female,80 or older,White,No,Yes,Very good,7,No,No,No
2,No,26.58,Yes,No,No,20,30,No,Male,65-69,White,Yes,Yes,Fair,8,Yes,No,No
3,No,24.21,No,No,No,0,0,No,Female,75-79,White,No,No,Good,6,No,No,Yes
4,No,23.71,No,No,No,28,0,Yes,Female,40-44,White,No,Yes,Very good,8,No,No,No


In [3]:
# Rename Index Column as Respondent_ID
heart_df.index.name = 'Respondent_ID'
heart_df.head()

Unnamed: 0_level_0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
Respondent_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,No,16.6,Yes,No,No,3,30,No,Female,55-59,White,Yes,Yes,Very good,5,Yes,No,Yes
1,No,20.34,No,No,Yes,0,0,No,Female,80 or older,White,No,Yes,Very good,7,No,No,No
2,No,26.58,Yes,No,No,20,30,No,Male,65-69,White,Yes,Yes,Fair,8,Yes,No,No
3,No,24.21,No,No,No,0,0,No,Female,75-79,White,No,No,Good,6,No,No,Yes
4,No,23.71,No,No,No,28,0,Yes,Female,40-44,White,No,Yes,Very good,8,No,No,No


In [4]:
# Set index to a Column
heart_df.reset_index(inplace = True)
heart_df.head()

Unnamed: 0,Respondent_ID,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0,No,16.6,Yes,No,No,3,30,No,Female,55-59,White,Yes,Yes,Very good,5,Yes,No,Yes
1,1,No,20.34,No,No,Yes,0,0,No,Female,80 or older,White,No,Yes,Very good,7,No,No,No
2,2,No,26.58,Yes,No,No,20,30,No,Male,65-69,White,Yes,Yes,Fair,8,Yes,No,No
3,3,No,24.21,No,No,No,0,0,No,Female,75-79,White,No,No,Good,6,No,No,Yes
4,4,No,23.71,No,No,No,28,0,Yes,Female,40-44,White,No,Yes,Very good,8,No,No,No


In [5]:
# Create the Demographics Table
demographics_df = heart_df[['Respondent_ID', 'HeartDisease', 'Sex', 'AgeCategory', 'Race']].copy()
demographics_df.head()

Unnamed: 0,Respondent_ID,HeartDisease,Sex,AgeCategory,Race
0,0,No,Female,55-59,White
1,1,No,Female,80 or older,White
2,2,No,Male,65-69,White
3,3,No,Female,75-79,White
4,4,No,Female,40-44,White


In [6]:
# Rename the Columns for the Demographics Table
demographics_renamed_df = demographics_df.rename(columns = {'HeartDisease': 'Heart_Disease', 'AgeCategory': 'Age_Range'})
demographics_renamed_df.head()

Unnamed: 0,Respondent_ID,Heart_Disease,Sex,Age_Range,Race
0,0,No,Female,55-59,White
1,1,No,Female,80 or older,White
2,2,No,Male,65-69,White
3,3,No,Female,75-79,White
4,4,No,Female,40-44,White


In [7]:
# Create the Health Metrics Table
health_metrics_df = heart_df[['Respondent_ID', 'HeartDisease', 'BMI', 'Smoking', 'AlcoholDrinking', 'PhysicalHealth', 'MentalHealth', 'PhysicalActivity', 'GenHealth', 'SleepTime']]
health_metrics_df.head()

Unnamed: 0,Respondent_ID,HeartDisease,BMI,Smoking,AlcoholDrinking,PhysicalHealth,MentalHealth,PhysicalActivity,GenHealth,SleepTime
0,0,No,16.6,Yes,No,3,30,Yes,Very good,5
1,1,No,20.34,No,No,0,0,Yes,Very good,7
2,2,No,26.58,Yes,No,20,30,Yes,Fair,8
3,3,No,24.21,No,No,0,0,No,Good,6
4,4,No,23.71,No,No,28,0,Yes,Very good,8


In [8]:
health_metrics_renamed_df = health_metrics_df.rename(columns = {'HeartDisease': 'Heart_Disease', 
                                                                'AlcoholDrinking': 'Alcohol_Drinking',
                                                               'PhysicalHealth': 'Healthy_Physical_Days',
                                                               'MentalHealth': 'Healthy_Mental_Days',
                                                               'PhysicalActivity': 'Physical_Activity',
                                                               'GenHealth': 'General_Health',
                                                               'SleepTime': 'Sleep_Hours'})
health_metrics_renamed_df.head()

Unnamed: 0,Respondent_ID,Heart_Disease,BMI,Smoking,Alcohol_Drinking,Healthy_Physical_Days,Healthy_Mental_Days,Physical_Activity,General_Health,Sleep_Hours
0,0,No,16.6,Yes,No,3,30,Yes,Very good,5
1,1,No,20.34,No,No,0,0,Yes,Very good,7
2,2,No,26.58,Yes,No,20,30,Yes,Fair,8
3,3,No,24.21,No,No,0,0,No,Good,6
4,4,No,23.71,No,No,28,0,Yes,Very good,8


In [9]:
# Create the Morbidity Table
morbidity_df = heart_df[['Respondent_ID', 'HeartDisease', 'Stroke', 'DiffWalking', 'Diabetic', 'Asthma', 'KidneyDisease', 'SkinCancer']]
morbidity_df.head()

Unnamed: 0,Respondent_ID,HeartDisease,Stroke,DiffWalking,Diabetic,Asthma,KidneyDisease,SkinCancer
0,0,No,No,No,Yes,Yes,No,Yes
1,1,No,Yes,No,No,No,No,No
2,2,No,No,No,Yes,Yes,No,No
3,3,No,No,No,No,No,No,Yes
4,4,No,No,Yes,No,No,No,No


In [10]:
# Rename the columns for the Morbidity Table
morbidity_renamed_df = morbidity_df.rename(columns = {'HeartDisease': 'Heart_Disease', 'DiffWalking': 'Difficulty_Walking', 
                                                      'Diabetic': 'Diabetes', 'KidneyDisease': 'Kidney_Disease', 
                                                      'SkinCancer': 'Skin_Cancer'})
morbidity_renamed_df

Unnamed: 0,Respondent_ID,Heart_Disease,Stroke,Difficulty_Walking,Diabetes,Asthma,Kidney_Disease,Skin_Cancer
0,0,No,No,No,Yes,Yes,No,Yes
1,1,No,Yes,No,No,No,No,No
2,2,No,No,No,Yes,Yes,No,No
3,3,No,No,No,No,No,No,Yes
4,4,No,No,Yes,No,No,No,No
...,...,...,...,...,...,...,...,...
319790,319790,Yes,No,Yes,Yes,Yes,No,No
319791,319791,No,No,No,No,Yes,No,No
319792,319792,No,No,No,No,No,No,No
319793,319793,No,No,No,No,No,No,No
