In [1]:
#Import dependencies
import numpy as np
import pandas as pd
import plotly.express as px 
from dash import Dash, html, dcc, Input, Output

In [2]:
#Behavior dataFrame
behavior = pd.read_csv('cdcbehavior.csv')

In [3]:
#Dropping data
behavior.drop(['YearEnd','Datasource','Class','Data_Value_Unit','Data_Value_Type','Data_Value_Alt',
               'ClassID','TopicID','QuestionID','DataValueTypeID','LocationID',
               'Data_Value_Footnote_Symbol','Data_Value_Footnote',
               'GeoLocation','Age(years)', 'Education', 'Gender', 'Income','Race/Ethnicity',
               'StratificationCategory1', 'Stratification1','StratificationID1','StratificationCategoryId1'],
              axis=1,inplace=True)

behavior= behavior[behavior['LocationAbbr']!= 'PR'] 
behavior= behavior[behavior['LocationAbbr']!= 'GU']
behavior= behavior[behavior['LocationAbbr']!= 'VI']
behavior= behavior[behavior['LocationAbbr']!= 'DC'] #can remove or not remove

behavior= behavior[behavior['Total']=='Total']

#Rename and replace questions to decrease length of text
behavior.rename(columns={'YearStart':'Year','High_Confidence_Limit ':'High_Confidence_Limit'},inplace=True)
behavior['Topic'].replace(to_replace='Physical Activity - Behavior',value='Physical Activity',inplace=True)
behavior['Topic'].replace(to_replace='Obesity / Weight Status',value='Obesity',inplace=True)
behavior['Topic'].replace(to_replace='Fruits and Vegetables - Behavior',value='Nutrition',inplace=True)

behavior['Question'].replace(to_replace='Percent of adults who achieve at least 150 minutes a week of moderate-intensity aerobic physical activity or 75 minutes a week of vigorous-intensity aerobic activity (or an equivalent combination)',
                             value='% of adults with >= 150 min/wk of moderate aerobic or 75 min/wk of vigorous aerobic activity (or an = mix)',inplace=True)
behavior['Question'].replace(to_replace='Percent of adults who achieve at least 300 minutes a week of moderate-intensity aerobic physical activity or 150 minutes a week of vigorous-intensity aerobic activity (or an equivalent combination)',
                             value='% of adults with >= 300 min/wk of moderate aerobic or 150 min/wk of vigorous aerobic activity (or an = mix)',inplace=True)
behavior['Question'].replace(to_replace='Percent of adults who engage in no leisure-time physical activity',
                             value='% of adults with no leisure-time physical activity',inplace=True)
behavior['Question'].replace(to_replace='Percent of adults aged 18 years and older who have obesity',
                             value='% of adults aged >= 18 years with obesity',inplace=True)
behavior['Question'].replace(to_replace='Percent of adults aged 18 years and older who have an overweight classification',
                             value='% of adults aged >= 18 years with an overweight classification',inplace=True)
behavior['Question'].replace(to_replace='Percent of adults who engage in muscle-strengthening activities on 2 or more days a week',
                             value='% of adults with muscle-strengthening activities >= 2 days/wk',inplace=True)
behavior['Question'].replace(to_replace='Percent of adults who achieve at least 150 minutes a week of moderate-intensity aerobic physical activity or 75 minutes a week of vigorous-intensity aerobic physical activity and engage in muscle-strengthening activities on 2 or more days a week',
                             value='% of adults with >= 150 min/wk of moderate aerobic or 75 min/wk of vigorous aerobic physical activity & muscle-strengthening activities on >= 2 days/wk',inplace=True)
behavior['Question'].replace(to_replace='Percent of adults who report consuming fruit less than one time daily',
                             value='% of adults who report consuming fruit < once/day',inplace=True)
behavior['Question'].replace(to_replace='Percent of adults who report consuming vegetables less than one time daily',
                             value='% of adults who report consuming vegetables < once/day',inplace=True)

#Drop
#behavior = behavior[behavior['Year']<2018] #get the year range
behavior.drop('Total',axis=1,inplace=True) #finished selecting total as total

#Organize 
behavior.sort_values(by=['Year','LocationAbbr','Topic'],inplace=True)
behavior.reset_index(drop=True,inplace=True)
behavior = behavior[['Year','LocationAbbr','LocationDesc','Topic','Question','Low_Confidence_Limit','Data_Value','High_Confidence_Limit',
                     'Sample_Size']]

In [4]:
#Check behavior dataframe size and columns 
print(behavior.shape)
behavior.columns

(2745, 9)


Index(['Year', 'LocationAbbr', 'LocationDesc', 'Topic', 'Question',
       'Low_Confidence_Limit', 'Data_Value', 'High_Confidence_Limit',
       'Sample_Size'],
      dtype='object')

In [5]:
#View the dataframe of behavior
behavior.head()

Unnamed: 0,Year,LocationAbbr,LocationDesc,Topic,Question,Low_Confidence_Limit,Data_Value,High_Confidence_Limit,Sample_Size
0,2011,AK,Alaska,Obesity,% of adults aged >= 18 years with obesity,25.3,27.4,29.7,3336
1,2011,AK,Alaska,Obesity,% of adults aged >= 18 years with an overweigh...,36.5,38.9,41.3,3336
2,2011,AK,Alaska,Physical Activity,% of adults with muscle-strengthening activiti...,31.5,33.8,36.3,3294
3,2011,AK,Alaska,Physical Activity,% of adults with >= 300 min/wk of moderate aer...,35.3,37.7,40.1,3186
4,2011,AK,Alaska,Physical Activity,% of adults with >= 150 min/wk of moderate aer...,55.4,57.9,60.4,3212


In [6]:
#Check for Na
behavior.isna().sum() #no na

Year                     0
LocationAbbr             0
LocationDesc             0
Topic                    0
Question                 0
Low_Confidence_Limit     0
Data_Value               0
High_Confidence_Limit    0
Sample_Size              0
dtype: int64

In [7]:
#Find unique years
behavior.Year.unique()

array([2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],
      dtype=int64)

In [8]:
#Find number of unique locations
print(behavior['LocationAbbr'].nunique()) #51: 50 States + US(National) (+ DC optional and removed)

51


In [9]:
#Isolate the national level information of US 
behaviorUS = behavior[behavior['LocationAbbr']=='US']
behaviorUS.reset_index(drop=True,inplace=True)

In [10]:
#Display the US dataframe
print(behaviorUS.shape)
behaviorUS.head()

(54, 9)


Unnamed: 0,Year,LocationAbbr,LocationDesc,Topic,Question,Low_Confidence_Limit,Data_Value,High_Confidence_Limit,Sample_Size
0,2011,US,National,Obesity,% of adults aged >= 18 years with obesity,27.2,27.4,27.7,470700
1,2011,US,National,Obesity,% of adults aged >= 18 years with an overweigh...,35.5,35.8,36.1,470531
2,2011,US,National,Physical Activity,% of adults with >= 150 min/wk of moderate aer...,51.3,51.6,51.9,458088
3,2011,US,National,Physical Activity,% of adults with >= 150 min/wk of moderate aer...,20.3,20.6,20.8,453721
4,2011,US,National,Physical Activity,% of adults with >= 300 min/wk of moderate aer...,31.5,31.8,32.1,454894


In [11]:
#Isolate all the state information in a dataframe called behavior
behavior= behavior[behavior['LocationAbbr']!= 'US']
behavior.shape

(2691, 9)

In [12]:
#Number of unique locations not including the national US and DC options, leaving 50 states
print(behavior['LocationAbbr'].nunique()) #50: 50 States (+ DC optional and removed) 

50


In [13]:
#The topic options
behavior['Topic'].unique()

array(['Obesity', 'Physical Activity', 'Nutrition'], dtype=object)

In [14]:
#The number of unique questions and their text
print(behavior['Question'].nunique())
behavior['Question'].unique()

9


array(['% of adults aged >= 18 years with obesity',
       '% of adults aged >= 18 years with an overweight classification',
       '% of adults with muscle-strengthening activities >= 2 days/wk',
       '% of adults with >= 300 min/wk of moderate aerobic or 150 min/wk of vigorous aerobic activity (or an = mix)',
       '% of adults with >= 150 min/wk of moderate aerobic or 75 min/wk of vigorous aerobic activity (or an = mix)',
       '% of adults with >= 150 min/wk of moderate aerobic or 75 min/wk of vigorous aerobic physical activity & muscle-strengthening activities on >= 2 days/wk',
       '% of adults with no leisure-time physical activity',
       '% of adults who report consuming fruit < once/day',
       '% of adults who report consuming vegetables < once/day'],
      dtype=object)

In [15]:
#Saving the behavior data as behavior.csv
behavior.to_csv('behavior.csv',header=True,index=False)

In [16]:
#load policy data
policy = pd.read_csv('cdcpolicy.csv')

In [17]:
#Dropping unused columns
policy.drop(['Quarter','DataSource','Title','Comments','GeoLocation','Citation','StatusAltValue','DataType','EnactedDate','EffectiveDate',
             'DisplayOrder','PolicyTypeID','HealthTopicID','PolicyTopicID','SettingID','ProvisionID'],axis=1,inplace=True)
policy = policy[policy['LocationAbbr']!='DC']
#Organizing the columns
policy.sort_values(['Year','LocationAbbr','LocationDesc','HealthTopic'],inplace=True)
policy.reset_index(drop=True,inplace=True)

In [18]:
#View the policy data
policy.head()

Unnamed: 0,Year,LocationAbbr,LocationDesc,HealthTopic,PolicyTopic,Setting,Status
0,2001,AL,Alabama,Nutrition,Access to Healthy Foods,Community,Enacted
1,2001,AL,Alabama,Nutrition,Food Security,Community,Enacted
2,2001,AL,Alabama,Nutrition,Farm Direct Foods,Community,Enacted
3,2001,AL,Alabama,Nutrition,Farmers Markets,Community,Enacted
4,2001,AL,Alabama,Nutrition,Appropriations,Community,Enacted


In [19]:
#Check the number of unique locations within policy dataframe
print(policy['LocationAbbr'].nunique())
#Check the unique values of location within the policy dataframe
policy['LocationAbbr'].unique()

50


array(['AL', 'AR', 'AZ', 'CA', 'CT', 'DE', 'GA', 'HI', 'IA', 'IL', 'LA',
       'MA', 'MD', 'ME', 'MN', 'MS', 'MT', 'NC', 'NH', 'NY', 'OH', 'OK',
       'PA', 'RI', 'TN', 'TX', 'UT', 'VT', 'WA', 'CO', 'FL', 'ID', 'NJ',
       'NM', 'SC', 'SD', 'VA', 'WV', 'AK', 'ND', 'NV', 'OR', 'WY', 'KY',
       'MI', 'MO', 'IN', 'KS', 'WI', 'NE'], dtype=object)

In [20]:
#Check options of health topics within policy
policy['HealthTopic'].unique()

array(['Nutrition', 'Obesity', 'Physical Activity'], dtype=object)

In [21]:
#Check the options of policy status
policy.Status.unique()

array(['Enacted', 'Dead', 'Vetoed', 'Introduced'], dtype=object)

In [22]:
#Create policyEnacted
#copy to prevent assigning and selecting at once and only select enacted
policyEnacted = policy.loc[policy.Status == 'Enacted',:].copy()
#Remove the status because we already selected the enacted, and remove location in full spelling because
#the abbreviations are already saved
policyEnacted.drop(['Status','LocationDesc'],axis=1,inplace=True)
#Resorting the columns
policyEnacted.sort_values(['Year','LocationAbbr','HealthTopic'],inplace=True)
#Reset the index count
policyEnacted.reset_index(drop=True,inplace=True)

In [23]:
#Check the number of unique years
print(policyEnacted.Year.nunique())
#Check the exact years
policyEnacted.Year.unique()


17


array([2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
       2012, 2013, 2014, 2015, 2016, 2017], dtype=int64)

In [24]:
#View the dataframe of enacted policies
policyEnacted.head()

Unnamed: 0,Year,LocationAbbr,HealthTopic,PolicyTopic,Setting
0,2001,AL,Nutrition,Access to Healthy Foods,Community
1,2001,AL,Nutrition,Food Security,Community
2,2001,AL,Nutrition,Farm Direct Foods,Community
3,2001,AL,Nutrition,Farmers Markets,Community
4,2001,AL,Nutrition,Appropriations,Community


In [25]:
#Remove the PolicyTopic and Setting columns 
policyEnacted.set_index(['Year','LocationAbbr','HealthTopic'],inplace=True)

In [26]:
#Group the data in a groupby
policyEnactedgb = policyEnacted.groupby(['Year','LocationAbbr','HealthTopic'])

In [27]:
#View the groupby count
policyEnactedgb.PolicyTopic.count()

Year  LocationAbbr  HealthTopic      
2001  AL            Nutrition             8
      AR            Nutrition             7
                    Obesity               1
      AZ            Nutrition             4
                    Obesity               2
                                         ..
2017  TN            Nutrition             3
                    Physical Activity     1
      UT            Nutrition             6
      VT            Nutrition            14
      WY            Nutrition             2
Name: PolicyTopic, Length: 1273, dtype: int64

In [28]:
#View the groupby dataframe
policyEnacted

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PolicyTopic,Setting
Year,LocationAbbr,HealthTopic,Unnamed: 3_level_1,Unnamed: 4_level_1
2001,AL,Nutrition,Access to Healthy Foods,Community
2001,AL,Nutrition,Food Security,Community
2001,AL,Nutrition,Farm Direct Foods,Community
2001,AL,Nutrition,Farmers Markets,Community
2001,AL,Nutrition,Appropriations,Community
...,...,...,...,...
2017,VT,Nutrition,Agriculture and Farming,School/After School
2017,VT,Nutrition,Access to Healthy Foods,Community
2017,VT,Nutrition,Farm Direct Foods,School/After School
2017,WY,Nutrition,Agriculture and Farming,Community


In [29]:
#Copy the count to retrieve the number of enacted policies by states
policyEnacted['NumPolicies']= policyEnactedgb.PolicyTopic.count().copy()

In [30]:
#View first 20
policyEnacted.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PolicyTopic,Setting,NumPolicies
Year,LocationAbbr,HealthTopic,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2001,AL,Nutrition,Access to Healthy Foods,Community,8
2001,AL,Nutrition,Food Security,Community,8
2001,AL,Nutrition,Farm Direct Foods,Community,8
2001,AL,Nutrition,Farmers Markets,Community,8
2001,AL,Nutrition,Appropriations,Community,8
2001,AL,Nutrition,Disparities/Equity,Community,8
2001,AL,Nutrition,Food Assistance Programs,Community,8
2001,AL,Nutrition,Agriculture and Farming,Community,8
2001,AR,Nutrition,Appropriations,Community,7
2001,AR,Nutrition,Appropriations,Community,7


In [31]:
#Reset the index
policyEnacted.reset_index(inplace=True)

In [32]:
#View again
policyEnacted

Unnamed: 0,Year,LocationAbbr,HealthTopic,PolicyTopic,Setting,NumPolicies
0,2001,AL,Nutrition,Access to Healthy Foods,Community,8
1,2001,AL,Nutrition,Food Security,Community,8
2,2001,AL,Nutrition,Farm Direct Foods,Community,8
3,2001,AL,Nutrition,Farmers Markets,Community,8
4,2001,AL,Nutrition,Appropriations,Community,8
...,...,...,...,...,...,...
11245,2017,VT,Nutrition,Agriculture and Farming,School/After School,14
11246,2017,VT,Nutrition,Access to Healthy Foods,Community,14
11247,2017,VT,Nutrition,Farm Direct Foods,School/After School,14
11248,2017,WY,Nutrition,Agriculture and Farming,Community,2


In [33]:
#Save the policy enacted data
policyEnacted.to_csv('policyEnacted.csv')

In [34]:
#Merge the behavior and hte policy enacted data into a dataframe called cdcdata
cdcdata = behavior.merge(policyEnacted,how='left',left_on=['Year','LocationAbbr','Topic'],right_on=['Year','LocationAbbr','HealthTopic'])

In [35]:
#View the combined dataframe
cdcdata

Unnamed: 0,Year,LocationAbbr,LocationDesc,Topic,Question,Low_Confidence_Limit,Data_Value,High_Confidence_Limit,Sample_Size,HealthTopic,PolicyTopic,Setting,NumPolicies
0,2011,AK,Alaska,Obesity,% of adults aged >= 18 years with obesity,25.3,27.4,29.7,3336,Obesity,Appropriations,Community,1.0
1,2011,AK,Alaska,Obesity,% of adults aged >= 18 years with an overweigh...,36.5,38.9,41.3,3336,Obesity,Appropriations,Community,1.0
2,2011,AK,Alaska,Physical Activity,% of adults with muscle-strengthening activiti...,31.5,33.8,36.3,3294,Physical Activity,Appropriations,Community,6.0
3,2011,AK,Alaska,Physical Activity,% of adults with muscle-strengthening activiti...,31.5,33.8,36.3,3294,Physical Activity,Built Environment and Street-Scale Design,Community,6.0
4,2011,AK,Alaska,Physical Activity,% of adults with muscle-strengthening activiti...,31.5,33.8,36.3,3294,Physical Activity,Bicycling,Community,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7088,2020,WV,West Virginia,Obesity,% of adults aged >= 18 years with obesity,37.5,39.1,40.7,5467,,,,
7089,2020,WV,West Virginia,Physical Activity,% of adults with no leisure-time physical acti...,28.3,29.7,31.1,5868,,,,
7090,2020,WY,Wyoming,Obesity,% of adults aged >= 18 years with obesity,28.7,30.7,32.7,4410,,,,
7091,2020,WY,Wyoming,Obesity,% of adults aged >= 18 years with an overweigh...,34.6,36.6,38.7,4410,,,,


In [36]:
#Replace Nans with N/A for Number of Policies
cdcdata['NumPolicies'].replace(to_replace=np.nan,value='Missing',inplace=True)

In [37]:
#Drop the following columns to remove redundant or unused information
cdcdata.drop(['HealthTopic','PolicyTopic','Setting'],axis=1,inplace=True)

In [38]:
#Confirm the change from NaN to Missing
cdcdata

Unnamed: 0,Year,LocationAbbr,LocationDesc,Topic,Question,Low_Confidence_Limit,Data_Value,High_Confidence_Limit,Sample_Size,NumPolicies
0,2011,AK,Alaska,Obesity,% of adults aged >= 18 years with obesity,25.3,27.4,29.7,3336,1.0
1,2011,AK,Alaska,Obesity,% of adults aged >= 18 years with an overweigh...,36.5,38.9,41.3,3336,1.0
2,2011,AK,Alaska,Physical Activity,% of adults with muscle-strengthening activiti...,31.5,33.8,36.3,3294,6.0
3,2011,AK,Alaska,Physical Activity,% of adults with muscle-strengthening activiti...,31.5,33.8,36.3,3294,6.0
4,2011,AK,Alaska,Physical Activity,% of adults with muscle-strengthening activiti...,31.5,33.8,36.3,3294,6.0
...,...,...,...,...,...,...,...,...,...,...
7088,2020,WV,West Virginia,Obesity,% of adults aged >= 18 years with obesity,37.5,39.1,40.7,5467,Missing
7089,2020,WV,West Virginia,Physical Activity,% of adults with no leisure-time physical acti...,28.3,29.7,31.1,5868,Missing
7090,2020,WY,Wyoming,Obesity,% of adults aged >= 18 years with obesity,28.7,30.7,32.7,4410,Missing
7091,2020,WY,Wyoming,Obesity,% of adults aged >= 18 years with an overweigh...,34.6,36.6,38.7,4410,Missing


In [39]:
#Save the cdcdata dataframe
cdcdata.to_csv('cdcdata.csv',header=True)

In [40]:
#Read to cdcdata the dataframe
cdcdata=pd.read_csv('cdcdata.csv',index_col=0)

In [None]:
app = Dash(__name__)

app.layout = html.Div(children=[
    html.H1(children='Health Behavior within the United States',
            style={'text-align':'center','color': '#004C54'}),
    html.Div(children='CDC Adult Data on Physical Activity, Nutrition, and Obesity (2011-2020) and related Policy Data (2011-2017)',
             style={'text-align':'center','color': '#004C54'}),
    
    html.Hr(),
    html.Br(),
    
    dcc.Slider(
        min=cdcdata['Year'].min(),
        max=cdcdata['Year'].max(),
        step=None,
        value=cdcdata['Year'].min(),
        marks={str(year):str(year) for year in cdcdata['Year'].unique()},
        id='year_slider'),
    
    html.Br(),
    
    html.Div(children='Topic:',style={'text-align':'left','color': '#004C54'}),
    dcc.Dropdown(options=[{'label':s,'value':s} for s in (cdcdata['Topic'].unique())],
                 value='Physical Activity',
                 id='topic_dropdown',
                placeholder='Select Topic',
                clearable=False),
    
    html.Br(),
    
    html.Div(children='Question:',style={'text-align':'left','color': '#004C54'}),
    dcc.Dropdown(id='question_dropdown',
                placeholder='Select Question',
                clearable=False),
    
    html.Br(),
    html.Hr(),
    
    dcc.Graph(id='my_map',figure={})
])

@app.callback(Output('question_dropdown','options'),
             Input('topic_dropdown','value'))
def set_question_options(selected_topic):
    dff = cdcdata[cdcdata['Topic']==selected_topic]
    return [{'label': i,'value': i} for i in (dff['Question'].unique())]


@app.callback(Output('question_dropdown','value'),
             Input('question_dropdown','options'))
def set_question_values(available_options):
    return [k['value'] for k in available_options][0]


@app.callback(
    Output('my_map','figure'),
    [Input('year_slider','value'),
    Input('topic_dropdown','value'),
    Input('question_dropdown','value')]
)
def update_map_by_slider(selected_year,selected_topic,selected_question):
    dff = cdcdata.copy()
    dff = dff[dff['Year']==selected_year]
    dff = dff[dff['Topic']==selected_topic]
    dff = dff[dff['Question']==selected_question]
    
    fig = px.choropleth(data_frame=dff,locationmode='USA-states',locations='LocationAbbr',
                        scope='usa',color='Data_Value',
                        hover_data=['High_Confidence_Limit','Data_Value','Low_Confidence_Limit','Sample_Size','NumPolicies'],
                        labels={'LocationAbbr':'Location',
                               'Low_Confidence_Limit':'Low Confidence Limit (%)',
                               'Data_Value':'Value in Question (%)',
                               'High_Confidence_Limit':'High Confidence Limit (%)',
                               'Sample_Size':'Sample Size',
                               'NumPolicies':'Number of Enacted Policies'},
                       template='plotly_dark')
    return fig


if __name__ == '__main__':
    app.run_server()
    
#viewer.show(app)