In [41]:
import pandas as pd
import requests
import json
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
import warnings
warnings.simplefilter(action = 'ignore', category = FutureWarning)
import plotly.express as px

In [2]:
df = pd.read_csv('ObesityDataSet_raw_and_data_sinthetic.csv')
df

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,Female,21.000000,1.620000,64.000000,yes,no,2.0,3.0,Sometimes,no,2.000000,no,0.000000,1.000000,no,Public_Transportation,Normal_Weight
1,Female,21.000000,1.520000,56.000000,yes,no,3.0,3.0,Sometimes,yes,3.000000,yes,3.000000,0.000000,Sometimes,Public_Transportation,Normal_Weight
2,Male,23.000000,1.800000,77.000000,yes,no,2.0,3.0,Sometimes,no,2.000000,no,2.000000,1.000000,Frequently,Public_Transportation,Normal_Weight
3,Male,27.000000,1.800000,87.000000,no,no,3.0,3.0,Sometimes,no,2.000000,no,2.000000,0.000000,Frequently,Walking,Overweight_Level_I
4,Male,22.000000,1.780000,89.800000,no,no,2.0,1.0,Sometimes,no,2.000000,no,0.000000,0.000000,Sometimes,Public_Transportation,Overweight_Level_II
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2106,Female,20.976842,1.710730,131.408528,yes,yes,3.0,3.0,Sometimes,no,1.728139,no,1.676269,0.906247,Sometimes,Public_Transportation,Obesity_Type_III
2107,Female,21.982942,1.748584,133.742943,yes,yes,3.0,3.0,Sometimes,no,2.005130,no,1.341390,0.599270,Sometimes,Public_Transportation,Obesity_Type_III
2108,Female,22.524036,1.752206,133.689352,yes,yes,3.0,3.0,Sometimes,no,2.054193,no,1.414209,0.646288,Sometimes,Public_Transportation,Obesity_Type_III
2109,Female,24.361936,1.739450,133.346641,yes,yes,3.0,3.0,Sometimes,no,2.852339,no,1.139107,0.586035,Sometimes,Public_Transportation,Obesity_Type_III


In [19]:
def shorten_type(weight_type):
    if 'Normal' in weight_type:
        return 'Normal_Weight'
    if 'Overweight' in weight_type:
        return 'Overweight'
    if 'Obesity' in weight_type:
        return 'Obesity'
    else:
        return 'Insufficient_Weight'
    
    
df['BMI'] = df['Weight']/(df['Height']**2)
df['Short_Weight_Type'] = df['NObeyesdad'].apply(shorten_type)

In [35]:
print('Normal weight sample size: ', len(df[df['Short_Weight_Type'] == 'Normal_Weight']))
print('Insufficient weight sample size: ', len(df[df['Short_Weight_Type'] == 'Insufficient_Weight']))
print('Overweight sample size: ', len(df[df['Short_Weight_Type'] == 'Overweight']))
print('Obesity sample size: ', len(df[df['Short_Weight_Type'] == 'Obesity']))

Normal weight sample size:  287
Insufficient weight sample size:  272
Overweight sample size:  580
Obesity sample size:  972


In [47]:
fig = px.violin(df, 
                x='Short_Weight_Type', 
                y='FAF', 
                color='Short_Weight_Type', 
                box=True, 
                title='Physical Activity Frequency vs. Obesity Level',
                labels={'FAF': 'Physical Activity Frequency',
                        'Short_Weight_Type' : 'Weight Type'})

fig.show()

In [76]:
history_counts = df.groupby(['family_history_with_overweight', 'Short_Weight_Type']).size().reset_index(name='count')
history_counts['count'] = df_counts.groupby('family_history_with_overweight')['count'].transform(lambda x: (x / x.sum()) * 100)

px.bar(history_counts, 
             x='family_history_with_overweight', 
             y='count', 
             color='Short_Weight_Type', 
             title='Family Overweight History v.s Obesity Rate',
             labels={'family_history_with_overweight': 'Family Overweight History', 'count': 'Percentage (%)'},
             barmode='group',
             text=df_counts['count'].round(1))

In [105]:
chart = alt.Chart(history_counts).mark_bar().encode(
    x=alt.X('family_history_with_overweight:N', title='Family Overweight History', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('count:Q', title='Percentage (%)'),
    color=alt.Color('Short_Weight_Type:N', title='Obesity Level'),
    column=alt.Column('Short_Weight_Type:N', title='Obesity Level'),
    tooltip=['family_history_with_overweight', 'Short_Weight_Type', alt.Tooltip('count:Q', format='.2f')]
).properties(
    title='Family Overweight History vs. Weight Type',
    width=150,
    height=400
).interactive()

chart.show()

In [54]:
fig = px.box(df, 
             x="family_history_with_overweight", 
             y="BMI", 
             color="family_history_with_overweight",
             title="BMI v.s Family Overweight History",
             labels={"family_history_with_overweight": "Family Overweight History"})

fig.show()

In [98]:
px.scatter(df, x='BMI', y='Age',
             color='NObeyesdad',
             title='BMI vs. Age',
             labels={'NObeyesdad' : 'Weight Type'})

In [99]:
options = list(set(df['NObeyesdad']).union({None}))
labels = options = list(set(df['NObeyesdad']).union({'All'}))

input_dropdown = alt.binding_select(options = options,
                                   labels = labels, 
                                   name = 'Weight Type: ')

selection = alt.selection_point(fields = ['NObeyesdad'], bind = input_dropdown)

alt.Chart(df).mark_point().encode(
    x='BMI:Q',  # Continuous axis for Age
    y='Age:Q',  # Continuous axis for BMI
    color=alt.condition(selection, alt.Color('NObeyesdad:N'), alt.value('lightgray'))  # Color based on weight type
).add_params(
    selection
)

In [101]:
input_dropdown = alt.binding_select(options=[None, 'Normal_Weight', 'Insufficient_Weight', 'Overweight', 'Obesity'],
                                    labels=['All', 'Normal_Weight', 'Insufficient_Weight', 'Overweight', 'Obesity'], 
                                    name='Weight Type: ')

selection = alt.selection_point(fields=['Short_Weight_Type'], bind=input_dropdown)

alt.Chart(df).mark_point().encode(
    x='BMI:Q',
    y='Age:Q',
    color=alt.condition(selection, alt.Color('Short_Weight_Type:N'), alt.value('lightgray'))
).add_params(
    selection
)

In [110]:
input_dropdown = alt.binding_select(options = [None,'Normal_Weight', 'Insufficient_Weight', 'Overweight', 'Obesity'],
                                   labels = ['All', 'Normal_Weight', 'Insufficient_Weight', 'Overweight', 'Obesity'], 
                                   name = 'Weight Type: ')

selection = alt.selection_point(fields = ['Short_Weight_Type'], bind = input_dropdown)

alt.Chart(df).mark_circle().encode(
    x = 'SCC:O',
    y = 'NCP:Q', 
    color = alt.condition(selection, alt.Color('Short_Weight_Type:N'), alt.value('lightgray'))
).add_params(
    selection
)