# Missouri State Dashboard
---

## Python Setup

In [None]:
# import packages for data manipulation
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import geopandas as gpd
from collections import OrderedDict

# packages for visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# for interactivity
from ipywidgets import *
import ipywidgets as widgets
%matplotlib notebook

import warnings
warnings.filterwarnings('ignore')

# set Seaborn context/style
sns.set_context(context='notebook')

In [None]:
# Database connection
engine = create_engine('postgresql://@10.10.2.10/appliedda')

## NAICS Codes and Descriptions

In [None]:
# Build NAICS codes and descriptions
query = '''
SELECT naics_us_code as code, naics_us_title as title 
FROM naics_2012 
WHERE char_length(naics_us_code) = 2 OR naics_us_code like '%%-%%'
'''
naics = pd.read_sql(query, engine)

In [None]:
code_title = ["{}: {}".format(str(item[1]),item[0]) for item in zip(list(naics['title']),list(naics['code']))]

# Fix codes with ranges, add code 32, and create dictionary
naics['code'] = naics['code'].apply(lambda x: x.replace(r'-',','))
naics['code'] = naics['code'].apply(lambda x: x.replace(r'31,33','31,32,33'))
naics_dict = dict(zip(code_title,list(naics['code'])))

# Create an industries_all list
industries_all = []
for s in naics_dict.values():
    codes = str(s).split(',')
    for code in codes:
        industries_all.append(code)
industries_all = str(industries_all).replace(r'[', '').replace(r']', '')
naics_dict['All'] = 'All'
naics_od = OrderedDict(sorted(naics_dict.items(), key=lambda t: t[0]))

## Define User Interaction (UI) Components

### Toggle

In [None]:
plot_toggle = widgets.ToggleButtons(
    options=['Level', 'Variation'],
    value='Variation',
    description='',
    disabled=False,
    button_style='',
    tooltips=['Display absolute counts', 'Display variation over time']
)

### Sliders

**Time Range:**

In [None]:
options = [(" Q{} {} ".format(qtr, year), (qtr,year)) for year in range(2006,2016) for qtr in range(1,5)]
time_slider = widgets.SelectionSlider(
    options=options,
    description='Quarter',
    disabled=False,
    continuous_update=False,
    layout=Layout(width='60%')
)
time_slider.layout.visibility = 'hidden'

time_range_slider = widgets.SelectionRangeSlider(
    options=options,
    index=(0,len(options)-1),
    description='Time Range',
    disabled=False,
    layout=Layout(width='60%')
)

**Wage Range:**

In [None]:
min_wage_text = widgets.IntText(
    value=0,
    step=1000,
    description='Min',
    disabled=False
)

max_wage_text = widgets.IntText(
    value=10000000,
    step=1000,
    description='Max',
    disabled=False
)

### Dropdowns

**Plotted Metric:**

In [None]:
indicator_dropdown = widgets.Dropdown(
    options={'Total Jobs': 'Total Jobs', 'Average Quarterly Earnings': 'Average Quarterly Earnings'},
    description='Indicator'
)

**Optional QWI Metrics Subset:**

In [None]:
# Not yet implemented #
# indicator_dropdown = widgets.Dropdown(
#     options={'Beginning of Quarter Employment': 'Total Jobs',
#              'End of Quarter Employment': 'Total Jobs',
#              'Full Quarter Employment' : 'Total Jobs',
#              'Accessions': 'Total Jobs',
#              'Accessions to Consecutive Quarter Status': 'Total Jobs',
#              'Accessions to Full Quarter Status': 'Total Jobs',
#              'Separations': 'Total Jobs',
#              'New Hires': 'Total Jobs',
#              'Recalls': 'Total Jobs'},
#     description='Indicator'
# )

**Industry Subset Dropdown:**

In [None]:
industry_dropdown = widgets.Dropdown(
    options=naics_od,
    value='All',
    description='Industry'
)

### Button

In [None]:
generate_button = widgets.Button(
    description='Generate Plot',
    disabled=False,
    tooltip='Generate Plot'
)

## Build Panel Layout

In [None]:
panel_items = [
    HBox([plot_toggle]),
    VBox([time_slider, time_range_slider]),
    HBox([VBox([Label(''),indicator_dropdown, industry_dropdown]),
    VBox([Label('        Quarterly Earnings Range'),min_wage_text,max_wage_text])]),
    HBox([generate_button])
]

input_panel = VBox(panel_items)
out = Output()

In [None]:
# Helper function for changing display
def on_value_change(change):
    if change['new'] == 'Level':
        time_range_slider.layout.visibility = 'hidden'
        time_slider.layout.visibility = 'visible'
    elif change['new'] == 'Variation':
        time_slider.layout.visibility = 'hidden'
        time_range_slider.layout.visibility = 'visible'
plot_toggle.observe(on_value_change, names='value')

## Geographic Components with `geopandas`

### Get County Shapes

In [None]:
### statefp: 29 for MO, 17 for IL ###
qry = """
SELECT statefp, countyfp, name,
    ST_Transform(geom, 102698) geom 
FROM tl_2016_us_county 
WHERE statefp = '29' OR statefp = '17'
"""
counties = gpd.read_postgis(qry, engine, geom_col='geom')
counties['coords'] = counties.geometry.apply(lambda x: x.representative_point().coords[0])

### Get State Borders

In [None]:
qry = """
SELECT statefp, ST_Transform(geom, 102698) geom 
FROM tl_2016_states 
WHERE statefp = '29'
"""
state = gpd.read_postgis(qry, engine, geom_col='geom')
state['coords'] = state.geometry.apply(lambda x: x.representative_point().coords[0])

## Dashboard Functions

In [None]:
def run_query():
    # Grab all of the values from the UI components
    plot_type = plot_toggle.value
    year_qtr = time_slider.value
    qtr, year = year_qtr[0], year_qtr[1]
    start, end = time_range_slider.value
    start_qtr, start_year, end_qtr, end_year = start[0], start[1], end[0], end[1]
    indicator = indicator_dropdown.value
    min_wage = min_wage_text.value
    max_wage = max_wage_text.value
    industry = industry_dropdown.value
    
    # Set industry codes
    if industry == 'All':
        industry = "({}) or naics is null".format(industries_all)
    else:
        temp_industry = ""
        for i in industry.split(','):
            temp_industry += "'{}',".format(i)
        industry = temp_industry[:-1]
        industry = "({})".format(industry)
    
    # Query data for each state of interest
    states = ['mo', 'il']
    df = pd.DataFrame()
    for s in states:
        data_table = "dashboard_wage_buckets_{}".format(s)
        query = open('{}_buckets.sql'.format(plot_type.lower(), 'r')).read()
        query = query.replace(r'(\n|\t)', '')

        if plot_type == 'Level':
            query = query.format(data_table = data_table,
                             geography = 'cnty',
                             year = year,
                             qtr = qtr,
                             min_wage = min_wage,
                             max_wage = max_wage,
                             industries = industry
                            )
        elif plot_type == 'Variation':
            query = query.format(data_table = data_table,
                             geography = 'cnty',
                             start_year = start_year,
                             start_qtr = start_qtr,
                             end_year = end_year,
                             end_qtr = end_qtr,
                             min_wage = min_wage,
                             max_wage = max_wage,
                             industries = industry
                            )

        # Create dataframe on first iteration
        if s == 'mo':
            df = pd.read_sql(query, engine)
            df['statefp'] = '29'
        elif s == 'il':
            df_temp = pd.read_sql(query, engine)
            df_temp['statefp'] = '17'
            df = pd.concat([df, df_temp])
    
    return df

In [None]:
def generate_plot(button_obj):
    out.clear_output()
    with out:
        # Query and merge data
        df = run_query()
        cnty_df = pd.merge(counties, df, left_on=['countyfp', 'statefp'], right_on=['cnty', 'statefp'])

        # Configure plot settings
        sns.set_style('white')
        f, ax = plt.subplots(1, figsize=(10,8))
        
        # Plot basemap so counties with no data appear gray
        counties.plot(ax=ax, edgecolor='black', color='lightgray', hatch='//')
    
        # Plot county data
        if plot_toggle.value == 'Level':
            column = 'jobs'
            if indicator_dropdown.value == 'Average Quarterly Earnings':
                column = 'avg_wage'
            colmap = sns.cubehelix_palette(8, start=2.9, rot=0, dark=.1, light=.95, as_cmap=True)
            cnty_df.plot(column, ax=ax, legend=True, edgecolor='black', cmap=colmap)
        elif plot_toggle.value == 'Variation':
            column = 'change_in_jobs_pct'
            if indicator_dropdown.value == 'Average Quarterly Earnings':
                column = 'change_in_avg_wage_pct'
            bound = cnty_df[column].abs().max()
            colmap = sns.diverging_palette(10, 150, center='light', as_cmap=True)
            cnty_df.plot(column, ax=ax, legend=True, edgecolor='black', cmap=colmap, vmax=bound, vmin=bound*-1)

        # Plot MO state boundary
        state.plot(ax=ax, edgecolor='black', linewidth='2', color=(0,0,0,0))
        ax.set_xbound(upper=4500000)
        ax.set_ybound(upper=1850000)
        ax.axis('off')
        plt.show()

In [None]:
generate_button.on_click(generate_plot)

In [None]:
display(input_panel)
display(out)