In [1]:
# Import our dependencies
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

# Read the cardio_train.csv.
cardio_df = pd.read_csv("resources/cardio_train.csv", delimiter = ";")
cardio_df

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0
1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1
2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1
3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1
4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,99993,19240,2,168,76.0,120,80,1,1,1,0,1,0
69996,99995,22601,1,158,126.0,140,90,2,2,0,0,1,1
69997,99996,19066,2,183,105.0,180,90,3,1,0,1,0,1
69998,99998,22431,1,163,72.0,135,80,1,2,0,0,0,1


In [2]:
cardio_df.dtypes

id               int64
age              int64
gender           int64
height           int64
weight         float64
ap_hi            int64
ap_lo            int64
cholesterol      int64
gluc             int64
smoke            int64
alco             int64
active           int64
cardio           int64
dtype: object

In [3]:
# convert the age column from days to years
cardio_df['age'] = (cardio_df['age']/365).astype('int64')
cardio_df

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,50,2,168,62.0,110,80,1,1,0,0,1,0
1,1,55,1,156,85.0,140,90,3,1,0,0,1,1
2,2,51,1,165,64.0,130,70,3,1,0,0,0,1
3,3,48,2,169,82.0,150,100,1,1,0,0,1,1
4,4,47,1,156,56.0,100,60,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,99993,52,2,168,76.0,120,80,1,1,1,0,1,0
69996,99995,61,1,158,126.0,140,90,2,2,0,0,1,1
69997,99996,52,2,183,105.0,180,90,3,1,0,1,0,1
69998,99998,61,1,163,72.0,135,80,1,2,0,0,0,1


In [4]:
# Calculating BMI (Body Mass Index). Also converting height to inches
cardio_df["BMI"] = cardio_df["weight"]/((cardio_df["height"]/100)**2)
cardio_df["BMI"] = cardio_df["BMI"].astype("int64")
cardio_df

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio,BMI
0,0,50,2,168,62.0,110,80,1,1,0,0,1,0,21
1,1,55,1,156,85.0,140,90,3,1,0,0,1,1,34
2,2,51,1,165,64.0,130,70,3,1,0,0,0,1,23
3,3,48,2,169,82.0,150,100,1,1,0,0,1,1,28
4,4,47,1,156,56.0,100,60,1,1,0,0,0,0,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,99993,52,2,168,76.0,120,80,1,1,1,0,1,0,26
69996,99995,61,1,158,126.0,140,90,2,2,0,0,1,1,50
69997,99996,52,2,183,105.0,180,90,3,1,0,1,0,1,31
69998,99998,61,1,163,72.0,135,80,1,2,0,0,0,1,27


In [5]:
# Calculating Blood Pressure
def BPCatogories(x,y):
    if x<=120 and y<=80:
        return 0
    elif x<=129 and y<=80:
        return 1
    elif x<=139 or y<=89:
        return 2
    elif x<=140 or y<=90:
        return 3
    else:
        return 4
    
cardio_df.insert(1,"bp_cat", cardio_df.apply(lambda row: BPCatogories(row["ap_hi"], row["ap_lo"]), axis=1))
cardio_df['bp_cat'].astype("int64").value_counts()

0    39057
2    15392
3    10644
4     4486
1      421
Name: bp_cat, dtype: int64

In [6]:
cardio_df

Unnamed: 0,id,bp_cat,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio,BMI
0,0,0,50,2,168,62.0,110,80,1,1,0,0,1,0,21
1,1,3,55,1,156,85.0,140,90,3,1,0,0,1,1,34
2,2,2,51,1,165,64.0,130,70,3,1,0,0,0,1,23
3,3,4,48,2,169,82.0,150,100,1,1,0,0,1,1,28
4,4,0,47,1,156,56.0,100,60,1,1,0,0,0,0,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,99993,0,52,2,168,76.0,120,80,1,1,1,0,1,0,26
69996,99995,3,61,1,158,126.0,140,90,2,2,0,0,1,1,50
69997,99996,3,52,2,183,105.0,180,90,3,1,0,1,0,1,31
69998,99998,2,61,1,163,72.0,135,80,1,2,0,0,0,1,27


In [7]:
cardio_df.columns

Index(['id', 'bp_cat', 'age', 'gender', 'height', 'weight', 'ap_hi', 'ap_lo',
       'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio', 'BMI'],
      dtype='object')

In [8]:
new_cardio = pd.DataFrame(cardio_df, columns=['id', 'bp_cat', 'age', 'gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio', 'BMI'])
new_cardio

Unnamed: 0,id,bp_cat,age,gender,cholesterol,gluc,smoke,alco,active,cardio,BMI
0,0,0,50,2,1,1,0,0,1,0,21
1,1,3,55,1,3,1,0,0,1,1,34
2,2,2,51,1,3,1,0,0,0,1,23
3,3,4,48,2,1,1,0,0,1,1,28
4,4,0,47,1,1,1,0,0,0,0,23
...,...,...,...,...,...,...,...,...,...,...,...
69995,99993,0,52,2,1,1,1,0,1,0,26
69996,99995,3,61,1,2,2,0,0,1,1,50
69997,99996,3,52,2,3,1,0,1,0,1,31
69998,99998,2,61,1,1,2,0,0,0,1,27
