# Cardiovacular Health Data Analytics
---
### This project delves in the world of individual health profiles, specifically focusing on the subsets of male and female 49 below and 50 and above this analytic project aims to discern the relationship trends within key indicators such as stystolic blood presssure (trestbps) and cholesterol level (chol)

### Dataset: https://www.kaggle.com/datasets/johnsmith88/heart-disease-dataset

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Reminder

#### Sex = (M = 1 | F = 0) 

In [5]:
heart = pd.read_csv('heart.csv', usecols=['age', 'sex', 'trestbps', 'chol'])
heart.head(5)

Unnamed: 0,age,sex,trestbps,chol
0,52,1,125,212
1,53,1,140,203
2,70,1,145,174
3,61,1,148,203
4,62,0,138,294


In [6]:
heart.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   age       1025 non-null   int64
 1   sex       1025 non-null   int64
 2   trestbps  1025 non-null   int64
 3   chol      1025 non-null   int64
dtypes: int64(4)
memory usage: 32.2 KB


In [7]:
heart.describe().round(2)

Unnamed: 0,age,sex,trestbps,chol
count,1025.0,1025.0,1025.0,1025.0
mean,54.43,0.7,131.61,246.0
std,9.07,0.46,17.52,51.59
min,29.0,0.0,94.0,126.0
25%,48.0,0.0,120.0,211.0
50%,56.0,1.0,130.0,240.0
75%,61.0,1.0,140.0,275.0
max,77.0,1.0,200.0,564.0


### Split the whole data frame into Males and Females

In [8]:
male_data = heart[heart['sex'] == 1]
female_data = heart[heart['sex'] == 0]

# Male Data Analysis

In [9]:
male_data.describe()

Unnamed: 0,age,sex,trestbps,chol
count,713.0,713.0,713.0,713.0
mean,53.814867,1.0,130.697055,239.237027
std,8.875668,0.0,16.463723,43.155535
min,29.0,1.0,94.0,126.0
25%,47.0,1.0,120.0,208.0
50%,55.0,1.0,130.0,234.0
75%,60.0,1.0,140.0,269.0
max,77.0,1.0,192.0,353.0


In [10]:
male_data.describe().loc[['min', 'mean','max']].round(2)

Unnamed: 0,age,sex,trestbps,chol
min,29.0,1.0,94.0,126.0
mean,53.81,1.0,130.7,239.24
max,77.0,1.0,192.0,353.0


---
### Split the data into two age bracket 49 below and 50 and above

In [11]:
male_data_49_below = male_data[male_data['age'] < 50 ]
male_data_50_above = male_data[male_data['age'] >= 50 ]

### Males 49 and Below

In [12]:
male_data_49_below.describe().round(2)

Unnamed: 0,age,sex,trestbps,chol
count,218.0,218.0,218.0,218.0
mean,42.86,1.0,124.77,231.4
std,4.21,0.0,11.89,43.45
min,29.0,1.0,101.0,149.0
25%,41.0,1.0,118.0,198.0
50%,43.0,1.0,122.0,231.0
75%,46.0,1.0,130.0,256.0
max,49.0,1.0,152.0,321.0


In [13]:
selected_columns = ['trestbps', 'chol']

In [14]:
min_mean_max = male_data_49_below[selected_columns].agg(['min', 'mean', 'max']).round(2)
min_mean_max

Unnamed: 0,trestbps,chol
min,101.0,149.0
mean,124.77,231.4
max,152.0,321.0


### Males 50 and above

In [15]:
male_data_50_above.describe().round(3)

Unnamed: 0,age,sex,trestbps,chol
count,495.0,495.0,495.0,495.0
mean,58.638,1.0,133.309,242.687
std,5.432,0.0,17.498,42.613
min,50.0,1.0,94.0,126.0
25%,54.0,1.0,120.0,212.0
50%,58.0,1.0,130.0,237.0
75%,62.0,1.0,142.0,273.5
max,77.0,1.0,192.0,353.0


In [16]:
male_data_50_above.describe().loc[['min', 'mean', 'max']].round(2)

Unnamed: 0,age,sex,trestbps,chol
min,50.0,1.0,94.0,126.0
mean,58.64,1.0,133.31,242.69
max,77.0,1.0,192.0,353.0


# Female Data Analysis

In [17]:
female_data.describe().round(2)

Unnamed: 0,age,sex,trestbps,chol
count,312.0,312.0,312.0,312.0
mean,55.85,0.0,133.7,261.46
std,9.37,0.0,19.58,64.47
min,34.0,0.0,94.0,141.0
25%,50.0,0.0,120.0,215.0
50%,57.0,0.0,132.0,254.0
75%,63.0,0.0,140.0,302.0
max,76.0,0.0,200.0,564.0


In [18]:
female_data.describe().loc[['min', 'mean', 'max']].round(2)

Unnamed: 0,age,sex,trestbps,chol
min,34.0,0.0,94.0,141.0
mean,55.85,0.0,133.7,261.46
max,76.0,0.0,200.0,564.0


In [19]:
female_data_49_below = female_data[female_data['age'] < 50 ]
female_data_50_above = female_data[female_data['age'] >= 50 ]

#### Female 49 and below

In [20]:
female_data_49_below.describe().round(2)

Unnamed: 0,age,sex,trestbps,chol
count,76.0,76.0,76.0,76.0
mean,42.59,0.0,123.21,229.3
std,3.96,0.0,13.15,46.41
min,34.0,0.0,94.0,141.0
25%,41.0,0.0,112.0,202.75
50%,43.0,0.0,126.0,220.0
75%,45.0,0.0,134.0,265.75
max,49.0,0.0,142.0,341.0


In [21]:
female_data_49_below.describe().loc[['min', 'mean', 'max']].round(2)

Unnamed: 0,age,sex,trestbps,chol
min,34.0,0.0,94.0,141.0
mean,42.59,0.0,123.21,229.3
max,49.0,0.0,142.0,341.0


In [22]:
female_data_50_above.describe().round(2)

Unnamed: 0,age,sex,trestbps,chol
count,236.0,236.0,236.0,236.0
mean,60.12,0.0,137.08,271.81
std,6.0,0.0,20.13,66.11
min,50.0,0.0,100.0,149.0
25%,55.0,0.0,124.0,225.0
50%,60.0,0.0,135.0,265.0
75%,64.0,0.0,150.0,305.0
max,76.0,0.0,200.0,564.0


In [23]:
female_data_50_above.describe().loc[['min', 'mean', 'max']].round(2)

Unnamed: 0,age,sex,trestbps,chol
min,50.0,0.0,100.0,149.0
mean,60.12,0.0,137.08,271.81
max,76.0,0.0,200.0,564.0
