In [4]:
%pip install seaborn

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


def draw_cat_plot():
    # Import data
    df = pd.read_csv('medical_examination.csv')

    # Add 'overweight' column
    df['overweight'] = (df['weight'] / ((df['height'] / 100) ** 2)).apply(lambda x: 1 if x > 25 else 0)

    # Normalize data by making 0 always good and 1 always bad. If the value of cholesterol or gluc is 1, set the value to 0. If the value is more than 1, set the value to 1.
    df['cholesterol'] = df['cholesterol'].apply(lambda x: 0 if x == 1 else 1)
    df['gluc'] = df['gluc'].apply(lambda x: 0 if x == 1 else 1)

    # Draw Categorical Plot
    df_cat = pd.melt(df, id_vars=['cardio'], value_vars=['cholesterol', 'gluc', 'smoke', 'alco', 'active', 'overweight'])
    df_cat = df_cat.groupby(['cardio', 'variable', 'value']).size().reset_index(name='total')
    df_cat = df_cat.rename(columns={'value': 'feature'})

    fig, ax = plt.subplots(figsize=(10, 6))
    sns.catplot(x='feature', y='total', hue='cardio', data=df_cat, kind='bar', ax=ax)
    fig.savefig('catplot.png')
    return fig


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

def draw_cat_plot():
  # Import data
  df = pd.read_csv('medical_examination.csv')

  # Add an "overweight" column to the data
  df['overweight'] = (df['weight'] / ((df['height'] / 100) ** 2) > 25).astype(int)

  # Normalize the data by making 0 always good and 1 always bad. If the value of 'cholesterol' or 'gluc' is 1, make the value 0. If the value is more than 1, make the value 1.
  df['cholesterol'] = (df['cholesterol'] > 1).astype(int)
  df['gluc'] = (df['gluc'] > 1).astype(int)

  # Draw the catplot
  # Create a figure and axes
  fig, axes = plt.subplots(3, 2, figsize=(10, 10))
  # Flatten the axes array for easier iteration
  axes = axes.flatten()

  # Define the categorical features to plot
  categorical_features = ['cholesterol', 'gluc', 'smoke', 'alco', 'active', 'overweight']

  # Iterate over the categorical features and plot them
  for i, feature in enumerate(categorical_features):
    sns.catplot(x=feature, hue='cardio', kind='count', data=df, ax=axes[i])
    axes[i].set_title(f'Distribution of {feature} by Cardio')

  # Remove the last two unused subplots
  fig.delaxes(axes[10])
  fig.delaxes(axes[11])

  # Adjust layout and save the plot
  plt.tight_layout()
  fig.savefig('catplot.png')

def draw_heat_map():
  # Import data
  df = pd.read_csv('medical_examination.csv')

  # Clean the data
  df = df[df['ap_lo'] <= df['ap_hi']]
  df = df[df['height'] >= df['height'].quantile(0.025)]
  df = df[df['height'] <= df['height'].quantile(0.975)]
  df = df[df['weight'] >= df['weight'].quantile(0.025)]
  df = df[df['weight'] <= df['weight'].quantile(0.975)]

  # Calculate the correlation matrix
  corr = df.corr()

  # Generate a mask for the upper triangle
  mask = np.triu(np.ones_like(corr, dtype=bool))

  # Draw the heatmap with the mask
  sns.heatmap(corr, mask=mask, annot=True, fmt=".1f", square=True, linewidths=.5, cbar_kws={"shrink": .5})

  # Save the plot
  plt.savefig('heatmap.png')
