# Dashboard testing

- pan/zoom on a given graph
- multiple graphs updated with selection change (eg year updates map and chart highlight)

In [1]:
# import packages for data manipulation
from sqlalchemy import create_engine
import pandas as pd
import geopandas as gpd

# packages for visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# set Seaborn context/style
# sns.set_context(context='notebook')
# sns.set_style('darkgrid')

# for interactivity
from ipywidgets import *

# to get zoom/pan option in plots
%matplotlib notebook

In [2]:
# database connection
DB = 'template_postgis'
USER = 'postgres'
HOST = 'localhost'
PORT = '5432'
PW = 'postgres'

DB_CONNECTION_STRING = 'postgresql://{username}:{password}@{host}:{port}/{database}'.format(
username=USER, password=PW, host=HOST, port=PORT, database=DB)

engine = create_engine(DB_CONNECTION_STRING)

### Functions to get data

In [3]:
# function to get data - cbsafp = '28140' is KCMO metro area
# returns the base, Block level data
def getWAC(seg='s000', jt='jt01', years=xrange(2002,2016), cbsafp = '28140'):
    # to pull data from database
    DFs = []
    # get data
    for year in years:
        qry = """
        SELECT *, left(w_geocode, 5) countyfp, {YR} AS year 
        FROM lodes.mo_wac_{SEG}_{JT}_{YR}
        WHERE left(w_geocode, 5) 
        IN (SELECT geoid FROM tl_2016_us_county WHERE cbsafp = '{CBSA}')
        """.format(SEG=seg, JT=jt, YR=year, CBSA=cbsafp)
        df = pd.read_sql(qry, engine)
#         df.drop(columns='createdate', inplace=True)
#         df['year'] = year
        DFs.append(df)
    # combine DFs into single dataframe
    df = pd.concat(DFs)
    return(df)

In [16]:
# get geographic data
# functions return pandas.DataFrame objects based on the counties in a given CBSA 

def _counties(cbsafp='28140', crs='102698'):
    # get counties
    qry = """
    SELECT geoid, name,
        ST_Transform(geom, {CRS}) geom 
    FROM tl_2016_us_county 
    WHERE cbsafp = '{CBSA}'
    """.format(CBSA=cbsafp, CRS=crs)

    counties = gpd.read_postgis(qry, engine, geom_col='geom')
    counties['coords'] = counties.geometry.apply(lambda x: x.representative_point().coords[0])
    return(counties)

def _blkgrps(cbsafp='28140', crs='102698'):
    # get BlockGroups for map

    qry = """
    SELECT blockgroup,
        ST_Transform(geom, {CRS}) geom 
    FROM tl_2016_29_blkgrp
    WHERE left(blockgroup, 5) 
        IN (SELECT geoid
            FROM tl_2016_us_county 
            WHERE cbsafp =  '{CBSA}')
    """.format(CBSA='28140', CRS=crs)

    blkgrps = gpd.read_postgis(qry, engine, geom_col='geom')
    blkgrps['coords'] = blkgrps.geometry.apply(lambda x: x.representative_point().coords[0])
    return(blkgrps)


# def get_geogs(geos = ['counties', 'blockgroups'], cbsafp='28140', crs='102698'):
#     # hold retrieved data:
#     GDFs = {}
#     # get data for each of the geos passed:
#     for g in geos:
#         GDFs[g] = geoFunc[g]
#     return(GDFs)
    

### find pan/zoom option

In [5]:
counties = _counties()
df = getWAC(years=range(2004,2012))

In [8]:
print(df.columns.tolist()[1:-3])

[u'c000', u'ca01', u'ca02', u'ca03', u'ce01', u'ce02', u'ce03', u'cns01', u'cns02', u'cns03', u'cns04', u'cns05', u'cns06', u'cns07', u'cns08', u'cns09', u'cns10', u'cns11', u'cns12', u'cns13', u'cns14', u'cns15', u'cns16', u'cns17', u'cns18', u'cns19', u'cns20', u'cr01', u'cr02', u'cr03', u'cr04', u'cr05', u'cr07', u'ct01', u'ct02', u'cd01', u'cd02', u'cd03', u'cd04', u'cs01', u'cs02', u'cfa01', u'cfa02', u'cfa03', u'cfa04', u'cfa05', u'cfs01', u'cfs02', u'cfs03', u'cfs04', u'cfs05']


In [9]:
# NO %matplotlib magic used, testing claim
# from https://stackoverflow.com/questions/10655217/ipython-notebook-pylab-inline-zooming-of-a-plot
# that %matplotlib notebook defaults to interactivity

# using @interact() in this way gives error 
# "TypeError: plot_cnty() takes exactly 2 arguments (1 given)"
# but seems to still work fine
@interact(plot_cnty,yr=df['year'].unique(), col=df.columns.tolist()[1:-3])
def plot_cnty(yr, col): 
    f, ax = plt.subplots(1, figsize=(12,8))
    # get specified data summarized by county
    cnty_df = pd.merge(counties,
                    df[df['year']==yr].groupby('countyfp')[col].sum().reset_index(), 
                    left_on='geoid', right_on='countyfp')
    colmap = sns.cubehelix_palette(8, start=2, rot=0, dark=0, light=.95, as_cmap=True)
    cnty_df.plot(col, ax=ax, legend=True, cmap=colmap) 
    
    for idx, row in cnty_df.iterrows():
        ax.annotate(row['name'], row['coords'],
                     horizontalalignment='left', size='medium', color='white', 
                     weight='semibold',
                   bbox=dict(boxstyle="round,pad=0.3", fc="grey", ec="k", lw=0.5))
    
    ax.tick_params(
        axis='both',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='off', # labels along the bottom edge are off
        labelleft='off')
    
    f.suptitle('{} in {} by county, KCMO'.format(col, yr))

# interact(plot_cnty,yr=df['year'].unique(), col=df.columns.tolist()[1:])

TypeError: plot_cnty() takes exactly 2 arguments (1 given)

In [10]:
def plot_cnty(yr, col): 
    f, ax = plt.subplots(1, figsize=(12,8))
    # get specified data summarized by county
    cnty_df = pd.merge(counties,
                    df[df['year']==yr].groupby('countyfp')[col].sum().reset_index(), 
                    left_on='geoid', right_on='countyfp')
    colmap = sns.cubehelix_palette(8, start=2, rot=0, dark=0, light=.95, as_cmap=True)
    cnty_df.plot(col, ax=ax, legend=True, cmap=colmap) 
    
    for idx, row in cnty_df.iterrows():
        ax.annotate(row['name'], row['coords'],
                     horizontalalignment='left', size='medium', color='white', 
                     weight='semibold',
                   bbox=dict(boxstyle="round,pad=0.3", fc="grey", ec="k", lw=0.5))
    
    ax.tick_params(
        axis='both',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='off', # labels along the bottom edge are off
        labelleft='off')
    
    f.suptitle('{} in {} by county, KCMO'.format(col, yr))

interact(plot_cnty,yr=df['year'].unique(), col=df.columns.tolist()[1:])

<function __main__.plot_cnty>

In [30]:
# plot counties
def plot_cnty(yr, col): 
    f, ax = plt.subplots(1)#, figsize=(8,8))
    # get specified data summarized by county
    cnty_df = pd.merge(counties,
                    df[df['year']==yr].groupby('countyfp')[col].sum().reset_index(), 
                    left_on='geoid', right_on='countyfp')
    colmap = sns.cubehelix_palette(8, start=2, rot=0, dark=0, light=.95, as_cmap=True)
    cnty_df.plot(col, ax=ax, legend=True, cmap=colmap) 
    
    for idx, row in cnty_df.iterrows():
        ax.annotate(row['name'], row['coords'],
                     horizontalalignment='left', size='medium', color='white', 
                     weight='semibold',
                   bbox=dict(boxstyle="round,pad=0.3", fc="grey", ec="k", lw=0.5))
    
    ax.tick_params(
        axis='both',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='off', # labels along the bottom edge are off
        labelleft='off')
    
    f.suptitle('{} in {} by county, KCMO'.format(col, yr))

interact(plot_cnty,yr=df['year'].unique(), col=df.columns.tolist()[1:])

<function __main__.plot_cnty>

In [17]:
blocks = _blkgrps()

In [19]:
df['bg'] = df['w_geocode'].apply(lambda g: g[:12])

In [28]:
def plot_bg(yr, col): 
    sns.set_style('whitegrid')
    f, ax = plt.subplots(1)#, figsize=(6,6))
    # get specified data summarized by county
    block_df = pd.merge(blocks,
                    df[df['year']==yr].groupby('bg')[col].sum().reset_index(), 
                    left_on='blockgroup', right_on='bg')
    colmap = sns.cubehelix_palette(8, start=2, rot=0, dark=0, light=.95, as_cmap=True)
    counties.plot(ax=ax, color='white', edgecolor='grey')
    block_df.plot(col, ax=ax, legend=True, cmap=colmap) 
    
    for idx, row in counties.iterrows():
        ax.annotate(row['name'], row['coords'],
                     horizontalalignment='left', size='medium', color='white', 
                     weight='semibold',
                   bbox=dict(boxstyle="round,pad=0.3", fc="grey", ec="k", lw=0.5))
    
    ax.tick_params(
        axis='both',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='off', # labels along the bottom edge are off
        labelleft='off')
    
    f.suptitle('{} in {} by Block Group, KCMO'.format(col, yr))

interact(plot_bg,yr=df['year'].unique(), col=df.columns.tolist()[1:-3])

<function __main__.plot_bg>

In [23]:
print(df.columns.tolist())

[u'w_geocode', u'c000', u'ca01', u'ca02', u'ca03', u'ce01', u'ce02', u'ce03', u'cns01', u'cns02', u'cns03', u'cns04', u'cns05', u'cns06', u'cns07', u'cns08', u'cns09', u'cns10', u'cns11', u'cns12', u'cns13', u'cns14', u'cns15', u'cns16', u'cns17', u'cns18', u'cns19', u'cns20', u'cr01', u'cr02', u'cr03', u'cr04', u'cr05', u'cr07', u'ct01', u'ct02', u'cd01', u'cd02', u'cd03', u'cd04', u'cs01', u'cs02', u'cfa01', u'cfa02', u'cfa03', u'cfa04', u'cfa05', u'cfs01', u'cfs02', u'cfs03', u'cfs04', u'cfs05', u'createdate', u'countyfp', u'year', 'bg']


In [24]:
# data prep for compare_cols()
dfpl = df.groupby('year')[df.columns.tolist()[1:-4]].sum().reset_index()
dfpl2 = pd.melt(dfpl, id_vars='year', value_vars=df.columns.tolist()[1:-4])

In [32]:
def compare_cols(col1, col2):
    
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,6))
    f.suptitle('Comparing {} and {} over time'.format(col1, col2))

    sns.barplot('year', 'value', 'variable', data=dfpl2[dfpl2['variable'].isin([col2, col1])], ax=ax1)
    ax1.set_ylabel('Count')
#     ax1.set_title('{} and {}'.format(col1, col2))
#     ax1.yaxis.set_major_formatter(FormatStrFormatter('%.0f'))

    ax2.plot(dfpl[col1], dfpl[col2], 'bo--')
#     sns.pointplot(x=col1, y=col2, hue='year', data=dfpl2, markers='o', linestyles='--', ax=ax2)
    ax2.set_ylabel(col2)
    ax2.set_xlabel(col1)

    for i, r in dfpl.iterrows():
        ax2.annotate(r['year'], (r[col1]+0.2, r[col2]+0.2),
                     horizontalalignment='left', size='medium', color='black', 
                     weight='semibold')
#         f.text(r['year'], r[col1]+0.2, r[col2]+0.2,
#                      horizontalalignment='left', size='medium', color='black', 
#                      weight='semibold')
#         ax2.arrow(r[col1], r[col2], r[col1]+0.5, r[col2]+0.5, shape='full',
#                   lw=0, length_includes_head=True, head_width=.5)
    # return f? or plt?
#     return(f) # no, duplicates image - maybe because %matplotlib magic?

interact(compare_cols, col1=df.columns.tolist()[1:-2], col2=df.columns.tolist()[1:-2])

<function __main__.compare_cols>