In [None]:
import altair as alt
import pandas as pd
alt.data_transformers.disable_max_rows()


In [None]:
diabetes = pd.read_csv("data/diabetes_tidied_train.csv")
diabetes = diabetes[diabetes["gender"] != "Other"]
diabetes

In [None]:
chart_bmi = alt.Chart(diabetes, title="Distribution of BMI").mark_bar().encode(
    x = alt.X("BMI", bin=alt.Bin(maxbins = 20), scale=alt.Scale(domain=[10, 80])),
    y = alt.Y("count()", title = "Count"),
    color = alt.Color("diabetes", 
                    scale=alt.Scale(domain=[0, 1], range=["blue", "red"]),
                    legend=alt.Legend(values=[0, 1], title="Diabetes (0=no, 1=yes)")
                     )
) 

In [None]:
chart_bmi

In [None]:
chart_age = alt.Chart(diabetes, title="Distribution of Age").mark_bar().encode(
    x=alt.X("Age", bin=alt.Bin(maxbins=30), title="Age (years, binned)"),
    y=alt.Y("count()", title="Count"),
    color=alt.Color("diabetes:N", title="Diabetes Status", 
                    scale=alt.Scale(domain=[0, 1], range=["blue", "red"]),
                    legend=alt.Legend(values=[0, 1], title="Diabetes (0=no, 1=yes)"))
)

chart_age

In [None]:
chart_glucose = alt.Chart(diabetes, title="Distribution of Blood Glucose Levels").mark_bar().encode(
    x=alt.X("blood_glucose_levels", bin=alt.Bin(maxbins=20), title="Blood Glucose Level (random testing, mg/dl)"),
    y=alt.Y("count()", title="Count"),
    color=alt.Color("diabetes:N", title="Diabetes Status", 
                    scale=alt.Scale(domain=[0, 1], range=["blue", "red"]),
                    legend=alt.Legend(values=[0, 1], title="Diabetes (0=no, 1=yes)"))
)


In [None]:
chart_glucose

In [None]:
chart_gender = alt.Chart(diabetes, title="Gender Distribution").mark_bar().encode(
    x=alt.X("gender", title="Gender"),
    y=alt.Y("count()", title="Count"),
    color=alt.Color("diabetes:N", title="Diabetes Status", 
                    scale=alt.Scale(domain=[0, 1], range=["blue", "red"]),
                    legend=alt.Legend(values=[0, 1], title="Diabetes (0=no, 1=yes)"))
)

chart_gender

In [None]:
chart_diabetes = alt.Chart(diabetes, title="Diabetes Diagnosis Distribution").mark_bar().encode(
    x=alt.X("diabetes:O", title="Diabetes (0 = no, 1 = yes)"),  # The ":O" denotes that diabetes is treated as an ordinal variable
    y=alt.Y("count()", title="Count")
)


In [None]:
chart_diabetes

In [None]:
high_glucose = diabetes[diabetes['blood_glucose_levels'] > 220]
diabetes_status_counts = high_glucose['diabetes'].value_counts()


In [None]:
high_glucose

In [None]:
diabetes_status_counts

In [None]:
# Count of diabetic males
diabetic_males_count = diabetes[(diabetes['gender'] == 'Male') & (diabetes['diabetes'] == 1)].shape[0]

# Count of diabetic females
diabetic_females_count = diabetes[(diabetes['gender'] == 'Female') & (diabetes['diabetes'] == 1)].shape[0]

diabetic_males_count, diabetic_females_count


In [None]:
combined_chart = alt.hconcat(chart_bmi, chart_age, chart_gender, chart_glucose, chart_diabetes).resolve_scale(color='independent')


In [None]:
combined_chart