In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('/content/cardio_train.csv', sep=";")
print(df.head())

   id    age  gender  height  weight  ap_hi  ap_lo  cholesterol  gluc  smoke  \
0   0  18393       2     168    62.0    110     80            1     1      0   
1   1  20228       1     156    85.0    140     90            3     1      0   
2   2  18857       1     165    64.0    130     70            3     1      0   
3   3  17623       2     169    82.0    150    100            1     1      0   
4   4  17474       1     156    56.0    100     60            1     1      0   

   alco  active  cardio  
0     0       1       0  
1     0       1       1  
2     0       0       1  
3     0       1       1  
4     0       0       0  


In [None]:
df.shape

(70000, 13)

## Understanding Hypertension through Systolic and Diastolic Blood Pressure
Hypertension, commonly known as high blood pressure, is typically diagnosed based on two key measurements -

**Systolic Blood Pressure (SBP):**
*   The pressure in our arteries when heart beats.
*   Measured in millimeters of mercury (mmHg).
*   Criteria for Hypertension: SBP ≥ 140 mmHg.

**Diastolic Blood Pressure (DBP):**
*   The pressure in our arteries when heart rests between beats.
*   Also measured in mmHg.
*   Criteria for Hypertension: DBP ≥ 90 mmHg.


Systolic blood pressure (SBP) is generally higher, typically ranging from about 90 to 140 mmHg in normal to hypertensive individuals. So the higher number in a blood pressure reading which is "ap_hi" in this dataset.

Diastolic blood pressure (DBP) is lower, typically ranging from about 60 to 90 mmHg. SO the lower number in a blood pressure reading "ap_lo" in this dataset.






In [None]:
#Create a new column for hypertension based on the SBP nad DBP
df['is_hypertensive'] = (df['ap_hi'] >= 140) | (df['ap_lo'] >= 90)
df['is_hypertensive'] = df['is_hypertensive'].astype(int)
print(df[['ap_hi', 'ap_lo', 'is_hypertensive']].head(30))

    ap_hi  ap_lo  is_hypertensive
0     110     80                0
1     140     90                1
2     130     70                0
3     150    100                1
4     100     60                0
5     120     80                0
6     130     80                0
7     130     90                1
8     110     70                0
9     110     60                0
10    120     80                0
11    120     80                0
12    120     80                0
13    110     70                0
14    130     90                1
15    120     80                0
16    130     70                0
17    110     70                0
18    100     70                0
19    120     70                0
20    120     80                0
21    130     80                0
22    145     85                1
23    110     60                0
24    150     90                1
25    130    100                1
26    130     90                1
27    120     80                0
28    120     

(This dataset is already cleaned and preprocessed. To do these by ourself; made the dataset complex again, we able to show those steps. "Mapping is based on the dataset description.")

In [None]:
#Mapping integer values to categorical labels
df['gender'] = df['gender'].map({1: 'Female', 2: 'Male'})
df['cholesterol'] = df['cholesterol'].map({1: 'Normal', 2: 'Above Normal', 3: 'Well Above Normal'})
df['gluc'] = df['gluc'].map({1: 'Normal', 2: 'Above Normal', 3: 'Well Above Normal'})
df['smoke'] = df['smoke'].map({0: 'Non-Smoker', 1: 'Smoker'})
df['alco'] = df['alco'].map({0: 'Non-Drinker', 1: 'Drinker'})
df['active'] = df['active'].map({0: 'Inactive', 1: 'Active'})
df['cardio'] = df['cardio'].map({0: 'Negative', 1: 'Positive'})
df['is_hypertensive'] = df['is_hypertensive'].map({0: 'Negative', 1: 'Positive'})

df.head()

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio,is_hypertensive
0,0,18393,Male,168,62.0,110,80,Normal,Normal,Non-Smoker,Non-Drinker,Active,Negative,Negative
1,1,20228,Female,156,85.0,140,90,Well Above Normal,Normal,Non-Smoker,Non-Drinker,Active,Positive,Positive
2,2,18857,Female,165,64.0,130,70,Well Above Normal,Normal,Non-Smoker,Non-Drinker,Inactive,Positive,Negative
3,3,17623,Male,169,82.0,150,100,Normal,Normal,Non-Smoker,Non-Drinker,Active,Positive,Positive
4,4,17474,Female,156,56.0,100,60,Normal,Normal,Non-Smoker,Non-Drinker,Inactive,Negative,Negative


In [None]:
#Renaming columns
df.rename(columns={
    'age': 'Age',
    'gender': 'Gender',
    'height': 'Height_cm',
    'weight': 'Weight_kg',
    'ap_hi': 'Systolic_BP',
    'ap_lo': 'Diastolic_BP',
    'cholesterol': 'Cholesterol',
    'gluc': 'Glucose',
    'smoke': 'Smoker',
    'alco': 'Alcohol_Use',
    'active': 'Physical_Activity',
    'cardio': 'CDV_Risk',
    'is_hypertensive': 'Hypertension'
}, inplace=True)

In [None]:
#Convert age from days to years
df['Age'] = (df['Age'] / 365).astype(int)

In [None]:
df.head()

Unnamed: 0,id,Age,Gender,Height_cm,Weight_kg,Systolic_BP,Diastolic_BP,Cholesterol,Glucose,Smoker,Alcohol_Use,Physical_Activity,CDV_Risk,Hypertension
0,0,50,Male,168,62.0,110,80,Normal,Normal,Non-Smoker,Non-Drinker,Active,Negative,Negative
1,1,55,Female,156,85.0,140,90,Well Above Normal,Normal,Non-Smoker,Non-Drinker,Active,Positive,Positive
2,2,51,Female,165,64.0,130,70,Well Above Normal,Normal,Non-Smoker,Non-Drinker,Inactive,Positive,Negative
3,3,48,Male,169,82.0,150,100,Normal,Normal,Non-Smoker,Non-Drinker,Active,Positive,Positive
4,4,47,Female,156,56.0,100,60,Normal,Normal,Non-Smoker,Non-Drinker,Inactive,Negative,Negative


In [None]:
#df.to_csv('/content/hyperten_train.csv', index=False)