In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [None]:
df = pd.read_csv('medical_examination.csv')
df

In [None]:
# converting height to meters
df['height'] = df['height'] / 100

In [None]:
df['overweight'] = np.where(((df['weight']) / (df['height'] ** 2)) > 25, 1, 0)

In [None]:
df.loc[df['overweight'] == 1]

In [None]:
df['cholesterol'] = np.where((df['cholesterol']) == 1, 0, 1)
df['gluc'] = np.where((df['gluc']) == 1, 0, 1)
df

In [None]:
melt = ['cholesterol', 'gluc', 'smoke', 'alco', 'active', 'overweight']
df_cat = pd.melt(df, id_vars=['cardio'], value_vars=melt, var_name='variable', value_name='value')
df_cat

In [None]:
df_cat.loc[(df_cat['cardio'] == 0) & (df_cat['variable'] == 'cholesterol') & (df_cat['value'] == 1)]

In [None]:
df_cat = df_cat.groupby(['cardio', 'variable', 'value']).size().reset_index(name='total')
df_cat

In [None]:
figure = sns.catplot(
    data=df_cat,
    x='variable',
    y='total',
    hue='value',
    col='cardio',
    kind='bar'
)
fig = figure
fig

In [None]:
df.head()

In [None]:
df_heat = df[
    (df['ap_lo'] <= df['ap_hi']) &
    (df['height'] >= df['height'].quantile(0.025)) &
    (df['height'] <= df['height'].quantile(0.975)) &
    (df['weight'] >= df['weight'].quantile(0.025)) &
    (df['weight'] <= df['weight'].quantile(0.975))
]
df_heat

In [None]:
corr = df_heat.corr()

In [None]:
mask = np.triu(np.ones_like(corr, dtype=bool))

In [None]:
plt.close('all')
fig, ax = plt.subplots(figsize=(12,10))
sns.heatmap(
    corr,
    mask=mask,
    annot=True,
    fmt=".1f",  # forces "0.0" style
    center=0,
    square=True,
    linewidths=0.5,
    cbar_kws={"shrink": 0.5},
    ax=ax
)