## Prerequisites
---


In [None]:
!pip install ydata-profiling
!pip install plotly


In [None]:
!wget https://raw.githubusercontent.com/Lolimipsu/ML_Tutorials/refs/heads/main/freecodecamp/Medical%20Data%20Visualizer/medical_examination.csv

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

import plotly.express as px
from ydata_profiling import ProfileReport

In [None]:
from IPython.display import display
pd.options.display.max_columns = None

## Task
---

Import the data from medical_examination.csv and assign it to the df variable.

In [None]:
df = pd.read_csv('medical_examination.csv')

Add an overweight column to the data. To determine if a person is overweight, first calculate their BMI by dividing their weight in kilograms by the square of their height in meters. If that value is > 25 then the person is overweight. Use the value 0 for NOT overweight and the value 1 for overweight.

In [None]:
#getting bmi
df['overweight'] = df['weight'] / (df['height'] / 100) **2
df['overweight'] = df['overweight'].apply(lambda x: 1 if x > 25 else 0)

Normalize data by making 0 always good and 1 always bad. If the value of cholesterol or gluc is 1, set the value to 0. If the value is more than 1, set the value to 1.

In [None]:
df['cholesterol'] = df['cholesterol'].apply(lambda x: 0 if x == 1 else 1 if x > 1 else 1)
df['gluc'] = df['gluc'].apply(lambda x: 0 if x == 1 else 1 if x > 1 else 1)

Draw the Categorical Plot in the draw_cat_plot function.

In [None]:
# 4
def draw_cat_plot():
    # 5
    df_cat = pd.melt(df, id_vars=['cardio'], value_vars=['cholesterol', 'gluc', 'smoke', 'alco', 'active', 'overweight'])

    # 6
    df_cat = df_cat.groupby(['cardio', 'variable', 'value']).size().reset_index(name='count')
    df_cat = df_cat.rename(columns={'count': 'total'})

    # 7
    sns.catplot(
    data=df_cat, x="variable", y="total", hue='value', col="cardio",
    kind="bar"
    )

    # 8
    fig = plt.gcf()

    # 9
    fig.savefig('catplot.png')
    return fig

In [None]:
draw_cat_plot()

Draw the Heat Map in the draw_heat_map function.

In [None]:
# 10
def draw_heat_map():
    # 11
    # diastolic pressure is higher than systolic
    df_heat = df[df['ap_lo'] <= df['ap_hi']]
    # height is less than the 2.5th percentile
    df_heat = df_heat[df_heat['height'] >= df_heat['height'].quantile(0.025)]
    # height is more than the 97.5th percentile
    df_heat = df_heat[df_heat['height'] <= df_heat['height'].quantile(0.975)]
    # weight is less than the 2.5th percentile
    df_heat = df_heat[df_heat['weight'] >= df_heat['weight'].quantile(0.025)]
    # weight is more than the 97.5th percentile
    df_heat = df_heat[df_heat['weight'] <= df_heat['weight'].quantile(0.975)]

    # 12
    corr = corr = round(df_heat.corr(method='pearson'), 1)

    # 13
    mask = np.triu(np.ones_like(corr, dtype=bool))

    # 14
    fig, ax = plt.subplots(figsize=(10,10))

    # 15
    ax = sns.heatmap(corr, annot = True, square = True, fmt = '0.2', mask=mask)

    # 16
    fig.savefig('heatmap.png')
    return fig

In [None]:
draw_heat_map()

## Code Submission
---

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# 1
df = pd.read_csv('medical_examination.csv')

# 2
df['overweight'] = df['weight'] / (df['height'] / 100) **2
df['overweight'] = df['overweight'].apply(lambda x: 1 if x > 25 else 0)

# 3
df['cholesterol'] = df['cholesterol'].apply(lambda x: 0 if x == 1 else 1 if x > 1 else 1)
df['gluc'] = df['gluc'].apply(lambda x: 0 if x == 1 else 1 if x > 1 else 1)

# 4
def draw_cat_plot():
    # 5
    df_cat = pd.melt(df, id_vars=['cardio'], value_vars=['cholesterol', 'gluc', 'smoke', 'alco', 'active', 'overweight'])

    # 6
    df_cat = df_cat.groupby(['cardio', 'variable', 'value']).size().reset_index(name='count')
    df_cat = df_cat.rename(columns={'count': 'total'})

    # 7
    sns.catplot(
    data=df_cat, x="variable", y="total", hue='value', col="cardio",
    kind="bar"
    )

    # 8
    fig = plt.gcf()

    # 9
    fig.savefig('catplot.png')
    return fig


# 10
def draw_heat_map():
    # 11
    # diastolic pressure is higher than systolic
    df_heat = df[df['ap_lo'] <= df['ap_hi']]
    # height is less than the 2.5th percentile
    df_heat = df_heat[df_heat['height'] >= df_heat['height'].quantile(0.025)]
    # height is more than the 97.5th percentile
    df_heat = df_heat[df_heat['height'] <= df_heat['height'].quantile(0.975)]
    # weight is less than the 2.5th percentile
    df_heat = df_heat[df_heat['weight'] >= df_heat['weight'].quantile(0.025)]
    # weight is more than the 97.5th percentile
    df_heat = df_heat[df_heat['weight'] <= df_heat['weight'].quantile(0.975)]

    # 12
    corr = corr = round(df_heat.corr(method='pearson'), 1)

    # 13
    mask = np.triu(np.ones_like(corr, dtype=bool))

    # 14
    fig, ax = plt.subplots(figsize=(10,10))

    # 15
    ax = sns.heatmap(corr, annot = True, square = True, fmt = '0.2', mask=mask)

    # 16
    fig.savefig('heatmap.png')
    return fig


## Dataset analysis
---

In [None]:
profiling = pd.read_csv('medical_examination.csv')

In [None]:
profiling.info()

In [None]:
profile = ProfileReport(profiling, title="Pandas Profiling Report")
profile