In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
data_df = pd.read_csv("Resources/Heart_disease_data.csv")
data_df.head()

Unnamed: 0,AGE,GENDER,HEIGHT,WEIGHT,AP_HIGH,AP_LOW,CHOLESTEROL,GLUCOSE,SMOKE,ALCOHOL,PHYSICAL_ACTIVITY,CARDIO_DISEASE
0,50,2,168,62,110,80,1,1,0,0,1,0
1,55,1,156,85,140,90,3,1,0,0,1,1
2,52,1,165,64,130,70,3,1,0,0,0,1
3,48,2,169,82,150,100,1,1,0,0,1,1
4,48,1,156,56,100,60,1,1,0,0,0,0


In [3]:
data_df.count()

AGE                  68783
GENDER               68783
HEIGHT               68783
WEIGHT               68783
AP_HIGH              68783
AP_LOW               68783
CHOLESTEROL          68783
GLUCOSE              68783
SMOKE                68783
ALCOHOL              68783
PHYSICAL_ACTIVITY    68783
CARDIO_DISEASE       68783
dtype: int64

In [4]:
values_entered = data_df.replace({"GENDER": {1 : "Female", 2 : "Male"}, 
                          "CHOLESTEROL": {1: "Normal", 2: "Above Normal", 3: "High"},
                          "GLUCOSE": {1: "Normal", 2: "Above Normal", 3: "High"},
                          "SMOKE": {0: "Non-Smoker", 1: "Smoker"},
                          "ALCOHOL": {0: "Non-Drinker", 1: "Drinker"},
                          "PHYSICAL_ACTIVITY": {0: "Does not Exercise", 1: "Does Exercise"},
                          "CARDIO_DISEASE": {0: "No Cardio related Diesease", 1: "Evidence of Cardio Disease"}})
values_entered

Unnamed: 0,AGE,GENDER,HEIGHT,WEIGHT,AP_HIGH,AP_LOW,CHOLESTEROL,GLUCOSE,SMOKE,ALCOHOL,PHYSICAL_ACTIVITY,CARDIO_DISEASE
0,50,Male,168,62,110,80,Normal,Normal,Non-Smoker,Non-Drinker,Does Exercise,No Cardio related Diesease
1,55,Female,156,85,140,90,High,Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease
2,52,Female,165,64,130,70,High,Normal,Non-Smoker,Non-Drinker,Does not Exercise,Evidence of Cardio Disease
3,48,Male,169,82,150,100,Normal,Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease
4,48,Female,156,56,100,60,Normal,Normal,Non-Smoker,Non-Drinker,Does not Exercise,No Cardio related Diesease
...,...,...,...,...,...,...,...,...,...,...,...,...
68778,53,Male,168,76,120,80,Normal,Normal,Smoker,Non-Drinker,Does Exercise,No Cardio related Diesease
68779,62,Female,158,126,140,90,Above Normal,Above Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease
68780,52,Male,183,105,180,90,High,Normal,Non-Smoker,Drinker,Does not Exercise,Evidence of Cardio Disease
68781,61,Female,163,72,135,80,Normal,Above Normal,Non-Smoker,Non-Drinker,Does not Exercise,Evidence of Cardio Disease


In [5]:
values_entered.value_counts("CARDIO_DISEASE")

CARDIO_DISEASE
No Cardio related Diesease    34742
Evidence of Cardio Disease    34041
dtype: int64

In [6]:
values_entered["AGE"].max()

65

In [7]:
height = (values_entered["HEIGHT"]**2)/10000
height

0        2.8224
1        2.4336
2        2.7225
3        2.8561
4        2.4336
          ...  
68778    2.8224
68779    2.4964
68780    3.3489
68781    2.6569
68782    2.8900
Name: HEIGHT, Length: 68783, dtype: float64

In [8]:
values_entered["BMI"] = (values_entered['WEIGHT']/height)
values_entered["BMI"] 

0        21.967120
1        34.927679
2        23.507805
3        28.710479
4        23.011177
           ...    
68778    26.927438
68779    50.472681
68780    31.353579
68781    27.099251
68782    24.913495
Name: BMI, Length: 68783, dtype: float64

In [9]:
values_entered

Unnamed: 0,AGE,GENDER,HEIGHT,WEIGHT,AP_HIGH,AP_LOW,CHOLESTEROL,GLUCOSE,SMOKE,ALCOHOL,PHYSICAL_ACTIVITY,CARDIO_DISEASE,BMI
0,50,Male,168,62,110,80,Normal,Normal,Non-Smoker,Non-Drinker,Does Exercise,No Cardio related Diesease,21.967120
1,55,Female,156,85,140,90,High,Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease,34.927679
2,52,Female,165,64,130,70,High,Normal,Non-Smoker,Non-Drinker,Does not Exercise,Evidence of Cardio Disease,23.507805
3,48,Male,169,82,150,100,Normal,Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease,28.710479
4,48,Female,156,56,100,60,Normal,Normal,Non-Smoker,Non-Drinker,Does not Exercise,No Cardio related Diesease,23.011177
...,...,...,...,...,...,...,...,...,...,...,...,...,...
68778,53,Male,168,76,120,80,Normal,Normal,Smoker,Non-Drinker,Does Exercise,No Cardio related Diesease,26.927438
68779,62,Female,158,126,140,90,Above Normal,Above Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease,50.472681
68780,52,Male,183,105,180,90,High,Normal,Non-Smoker,Drinker,Does not Exercise,Evidence of Cardio Disease,31.353579
68781,61,Female,163,72,135,80,Normal,Above Normal,Non-Smoker,Non-Drinker,Does not Exercise,Evidence of Cardio Disease,27.099251


In [24]:
def ap_check(row):
    if row['AP_HIGH'] >120:
        res = 'High'
    elif row['AP_LOW'] <80:
        res = 'Low'
    else :
        res = 'Normal'
    return res

In [25]:
values_entered['Blood Pressure'] = values_entered.apply(lambda row: ap_check(row), axis=1)

In [26]:
values_entered

Unnamed: 0,AGE,GENDER,HEIGHT,WEIGHT,AP_HIGH,AP_LOW,CHOLESTEROL,GLUCOSE,SMOKE,ALCOHOL,PHYSICAL_ACTIVITY,CARDIO_DISEASE,BMI,BMI_range,Blood Pressure
0,50,Male,168,62,110,80,Normal,Normal,Non-Smoker,Non-Drinker,Does Exercise,No Cardio related Diesease,21.967120,Normal,Normal
1,55,Female,156,85,140,90,High,Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease,34.927679,Obese,High
2,52,Female,165,64,130,70,High,Normal,Non-Smoker,Non-Drinker,Does not Exercise,Evidence of Cardio Disease,23.507805,Normal,High
3,48,Male,169,82,150,100,Normal,Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease,28.710479,OverWeight,High
4,48,Female,156,56,100,60,Normal,Normal,Non-Smoker,Non-Drinker,Does not Exercise,No Cardio related Diesease,23.011177,Normal,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68778,53,Male,168,76,120,80,Normal,Normal,Smoker,Non-Drinker,Does Exercise,No Cardio related Diesease,26.927438,OverWeight,Normal
68779,62,Female,158,126,140,90,Above Normal,Above Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease,50.472681,Obese,High
68780,52,Male,183,105,180,90,High,Normal,Non-Smoker,Drinker,Does not Exercise,Evidence of Cardio Disease,31.353579,Obese,High
68781,61,Female,163,72,135,80,Normal,Above Normal,Non-Smoker,Non-Drinker,Does not Exercise,Evidence of Cardio Disease,27.099251,OverWeight,High


In [14]:
def bmi_category(row):
    if row<= 18.5:
        res = "Underweight"
    elif row> 18.5 and row< 25:
        res = "Normal"
    elif row> 25 and row< 30:
        res = "OverWeight"
    else:
        res = "Obese"
    return res

In [15]:
values_entered['BMI_range'] = values_entered['BMI'].apply(lambda row: bmi_category(row))

values_entered

Unnamed: 0,AGE,GENDER,HEIGHT,WEIGHT,AP_HIGH,AP_LOW,CHOLESTEROL,GLUCOSE,SMOKE,ALCOHOL,PHYSICAL_ACTIVITY,CARDIO_DISEASE,BMI,BMI_range
0,50,Male,168,62,110,80,Normal,Normal,Non-Smoker,Non-Drinker,Does Exercise,No Cardio related Diesease,21.967120,Normal
1,55,Female,156,85,140,90,High,Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease,34.927679,Obese
2,52,Female,165,64,130,70,High,Normal,Non-Smoker,Non-Drinker,Does not Exercise,Evidence of Cardio Disease,23.507805,Normal
3,48,Male,169,82,150,100,Normal,Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease,28.710479,OverWeight
4,48,Female,156,56,100,60,Normal,Normal,Non-Smoker,Non-Drinker,Does not Exercise,No Cardio related Diesease,23.011177,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68778,53,Male,168,76,120,80,Normal,Normal,Smoker,Non-Drinker,Does Exercise,No Cardio related Diesease,26.927438,OverWeight
68779,62,Female,158,126,140,90,Above Normal,Above Normal,Non-Smoker,Non-Drinker,Does Exercise,Evidence of Cardio Disease,50.472681,Obese
68780,52,Male,183,105,180,90,High,Normal,Non-Smoker,Drinker,Does not Exercise,Evidence of Cardio Disease,31.353579,Obese
68781,61,Female,163,72,135,80,Normal,Above Normal,Non-Smoker,Non-Drinker,Does not Exercise,Evidence of Cardio Disease,27.099251,OverWeight
