### Title

In [20]:
import requests
import os
import pandas as pd


from bokeh.plotting import figure, show 
from bokeh.layouts import gridplot
from ipywidgets import interact, interact_manual
from bokeh.io import output_notebook
output_notebook()

from bokeh.models.widgets import Panel, Tabs
from bokeh.models import HoverTool

In [54]:
path = '/Users/Sarah/Documents/GitHub/Sarah-Discussion-Notebooks/'
URLS = [('https://data.cityofchicago.org/api/views/kn9c-c2s2/rows.csv?accessType=DOWNLOAD', 'Chicago_SES.csv'), 
        ('https://data.cityofchicago.org/api/views/j6cj-r444/rows.csv?accessType=DOWNLOAD', 'Chicago_Death.csv')]

base_url = 'https://data.cityofchicago.org/api/views/{}/rows.csv?accessType=DOWNLOAD'
file_name = [('kn9c-c2s2', 'Chicago_SES.csv'),
            ('j6cj-r444','Chicago_Death.csv')]

In [57]:
def download_data(url, filename):
    if filename not in os.listdir():
        response = requests.get(url)
        output = response.text
        with open(filename, 'w') as ofile:
            ofile.write(output)
    else:
        print('file is present, continuing without download')
     

def read_data(path, filename):
    if filename.endswith('.csv'):
        df = pd.read_csv(os.path.join(path, filename))
    elif filename.endswith('.xls'):
        df = pd.read_excel(os.path.join(path, filename))
    else:
        return 'unexpected file type in read_data'
    return df

def build_urls(base_url, num):
    return base_url.format(num)    
    
        
def parse_death(death_df):
    death_df.rename(columns = {'Community Area': 'Community Area Number'}, inplace=True)
    avg_an_death = death_df.pivot(index = 'Community Area Number', columns='Cause of Death', 
                                  values='Average Adjusted Rate 2006 - 2010')
    avg_an_death.drop(0, axis = 0, inplace = True) #drop the Chicago Total
    avg_an_death.reset_index(inplace = True)
    return avg_an_death

def merge_dfs(SES_df, death_df,):     
    SES_death = SES_df.merge(death_df, on='Community Area Number', how = 'inner')
    
    #drop colums with Nan (all cols dropped for this df are completely empty)
    SES_death.dropna(axis=1,inplace=True)

    return SES_death


def prep_df(base_url, file_name):
    urls = [] 
    for n, f in file_name:
        urls.append((build_urls(base_url, n), f)) 
 
    # download if not present andread in df
    df_contents = []
    for url, filename in urls:
        download_data(url,filename)
        df = read_data(path, filename)
        if filename == 'Chicago_Death.csv':
                df_contents.append(parse_death(df))
        else:
            df_contents.append(df)

    merged = merge_dfs(df_contents[0], df_contents[1])
    
    return merged

use_df = prep_df(base_url, file_name)

file is present, continuing without download


In [58]:
use_df.head()

Unnamed: 0,Community Area Number,COMMUNITY AREA NAME,PERCENT OF HOUSING CROWDED,PERCENT HOUSEHOLDS BELOW POVERTY,PERCENT AGED 16+ UNEMPLOYED,PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA,PERCENT AGED UNDER 18 OR OVER 64,PER CAPITA INCOME,HARDSHIP INDEX,All Causes,...,Coronary heart disease,Diabetes-related,Firearm-related,"Injury, unintentional","Kidney disease (nephritis, nephrotic syndrome and nephrosis)",Liver disease and cirrhosis,Lung cancer,Prostate cancer in males,Stroke (cerebrovascular disease),Suicide (intentional self-harm)
0,1.0,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0,895.2,...,204.6,31.4,4.8,25.7,21.3,11.5,41.7,26.3,32.3,8.4
1,2.0,West Ridge,7.8,17.2,8.8,20.8,38.5,23040,46.0,689.9,...,136.3,24.5,3.9,21.1,21.0,7.7,37.8,16.9,34.3,6.1
2,3.0,Uptown,3.8,24.0,8.9,11.8,22.2,35787,20.0,934.0,...,213.6,37.1,4.8,26.1,24.6,14.6,48.1,25.9,39.4,9.1
3,4.0,Lincoln Square,3.4,10.9,8.2,13.4,25.5,37524,17.0,698.7,...,137.9,20.3,2.9,23.9,25.5,10.4,42.9,23.7,30.6,6.1
4,5.0,North Center,0.3,7.5,5.2,4.5,26.2,57123,6.0,681.2,...,140.1,19.4,1.5,23.2,21.9,11.2,41.2,15.2,39.8,9.2


In [59]:
def point_plot(merged_df, x_val, y_val, x_lab, y_lab, plot_title):
    hover = HoverTool(tooltips = [(x_lab, '@x'), (y_lab, '@y')])
    #cite https://towardsdatascience.com/data-visualization-with-bokeh-in-python-part-one-getting-started-a11655a467d4

    plot = figure(title=plot_title, 
                x_axis_label=x_lab, y_axis_label=y_lab, tools=[hover])

    plot.circle(merged_df[x_val], merged_df[y_val], line_width = 4)#, legend_label='complaints against officers hired this year')
    #https://github.com/bokeh/bokeh/blob/branch-2.3/examples/app/crossfilter/main.py

    show(plot)

point_plot(use_df,
           'HARDSHIP INDEX',
           'Coronary heart disease',
           'Hardship Index', 
           'Average Annual Deaths by Heart Disease', 
           'A Descriptive Title')

In [60]:
point_plot(use_df,
           'PERCENT HOUSEHOLDS BELOW POVERTY', 
           'Diabetes-related',
           'Percent of Households in Poverty', 
           'Average Annual Diabetes-related Deaths', 
           'A Descriptive Title')

In [43]:
cause_of_death = merged.columns[9:29]
cause_of_death #intermediary output -don't forget to remove

Index(['All Causes', 'All causes in females', 'All causes in males',
       'Alzheimers disease', 'Assault (homicide)', 'Breast cancer in females',
       'Cancer (all sites)', 'Colorectal cancer', 'Coronary heart disease',
       'Diabetes-related', 'Firearm-related', 'Injury, unintentional',
       'Kidney disease (nephritis, nephrotic syndrome and nephrosis)',
       'Liver disease and cirrhosis', 'Lung cancer',
       'Prostate cancer in males', 'Stroke (cerebrovascular disease)',
       'Suicide (intentional self-harm)'],
      dtype='object')

In [47]:
@interact(cause_of_death=cause_of_death) #dropdown menue
def do_the_thing(cause_of_death=cause_of_death[0]):
    point_plot(merged,
               'PERCENT HOUSEHOLDS BELOW POVERTY', 
               cause_of_death,
               'Percent of Households in Poverty',
               'Average Annual Deaths by {}'.format(cause_of_death),
               'A Descriptive Title')
    

interactive(children=(Dropdown(description='cause_of_death', options=('All Causes', 'All causes in females', '…