### Title

In [20]:
import requests
import os
import pandas as pd


from bokeh.plotting import figure, show 
from bokeh.layouts import gridplot
from ipywidgets import interact, interact_manual
from bokeh.io import output_notebook
output_notebook()

from bokeh.models.widgets import Panel, Tabs
from bokeh.models import HoverTool

In [13]:
PATH = '/Users/Sarah/Documents/GitHub/Sarah-Discussion-Notebooks/'
URLS = [('https://data.cityofchicago.org/api/views/kn9c-c2s2/rows.csv?accessType=DOWNLOAD', 'Chicago_SES.csv'), 
        ('https://data.cityofchicago.org/api/views/j6cj-r444/rows.csv?accessType=DOWNLOAD', 'Chicago_Death.csv')]
        

In [22]:
def download_data(url, filename):
    response = requests.get(url)
    if filename.endswith('.csv'):
        open_as = 'w'
        output = response.text
        #return open_as
        print('it was a csv') #debug
    elif filename.endswith('.xls'):
        open_as = 'wb'
        output = response.content
        #return open_as
        print('it was a xls') #debug
    else:
        return 'unexpected file type in download_data'
    
    with open(filename, open_as) as ofile:
        ofile.write(output)

def read_data(path, filename):
    if filename.endswith('.csv'):
        df = pd.read_csv(os.path.join(path, filename))
    elif filename.endswith('.xls'):
        df = pd.read_excel(os.path.join(path, filename))
    else:
        return 'unexpected file type in read_data'
    return df

def build_urls(base_url, num, file_name):
    return base_url.format(num,file)    
    

def run(url,filename):
    if filename not in os.listdir():
        download_data(url, filename)
    else:
        print('file is present, continuing without download')

        
def parse_death(death_df):
    death_df.rename(columns = {'Community Area': 'Community Area Number'}, inplace=True)
    avg_an_death = death_df.pivot(index = 'Community Area Number', columns='Cause of Death', 
                                  values='Average Adjusted Rate 2006 - 2010')
    avg_an_death.drop(0, axis = 0, inplace = True) #drop the Chicago Total
    avg_an_death.reset_index(inplace = True)
    return avg_an_death

def merge_dfs(SES_df, death_df,):     
    SES_death = SES_df.merge(death_df, on='Community Area Number', how = 'inner')
    
    #drop colums with Nan (all cols dropped for this df are completely empty)
    SES_death.dropna(axis=1,inplace=True)

    return SES_death


df_contents = []
for url, filename in URLS:
    run(url,filename)
    df = read_data(PATH, filename)
    if filename == 'Chicago_Death.csv':
            df_contents.append(parse_death(df))
    else:
        df_contents.append(df)

merged = merge_dfs(df_contents[0], df_contents[1])

file is present, continuing without download
file is present, continuing without download


In [25]:
merged.columns

Index(['Community Area Number', 'COMMUNITY AREA NAME',
       'PERCENT OF HOUSING CROWDED', 'PERCENT HOUSEHOLDS BELOW POVERTY',
       'PERCENT AGED 16+ UNEMPLOYED',
       'PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA',
       'PERCENT AGED UNDER 18 OR OVER 64', 'PER CAPITA INCOME ',
       'HARDSHIP INDEX', 'All Causes', 'All causes in females',
       'All causes in males', 'Alzheimers disease', 'Assault (homicide)',
       'Breast cancer in females', 'Cancer (all sites)', 'Colorectal cancer',
       'Coronary heart disease', 'Diabetes-related', 'Firearm-related',
       'Injury, unintentional',
       'Kidney disease (nephritis, nephrotic syndrome and nephrosis)',
       'Liver disease and cirrhosis', 'Lung cancer',
       'Prostate cancer in males', 'Stroke (cerebrovascular disease)',
       'Suicide (intentional self-harm)'],
      dtype='object')

In [36]:
def point_plot(merged_df, x_val, y_val, x_lab, y_lab, plot_title):
    hover = HoverTool(tooltips = [(x_lab, '@x'), (y_lab, '@y')])
    #cite https://towardsdatascience.com/data-visualization-with-bokeh-in-python-part-one-getting-started-a11655a467d4

    plot = figure(title=plot_title, 
                x_axis_label=x_lab, y_axis_label=y_lab, tools=[hover])

    plot.circle(merged_df[x_val], merged_df[y_val], line_width = 4)#, legend_label='complaints against officers hired this year')
    #https://github.com/bokeh/bokeh/blob/branch-2.3/examples/app/crossfilter/main.py

    show(plot)

point_plot(merged,
           'HARDSHIP INDEX',
           'Coronary heart disease',
           'Hardship Index', 
           'Average Annual Deaths by Heart Disease', 
           'A Descriptive Title')

In [37]:
point_plot(merged,
           'PERCENT HOUSEHOLDS BELOW POVERTY', 
           'Diabetes-related',
           'Percent of Households in Poverty', 
           'Average Annual Diabetes-related Deaths', 
           'A Descriptive Title')

In [None]:
def my_plot(year, full_year):
    hover = HoverTool(tooltips = [('year', '@x'), ('count', '@y')])
    #cite https://towardsdatascience.com/data-visualization-with-bokeh-in-python-part-one-getting-started-a11655a467d4
     
    plot = figure(title='Number of Complaints Compared to Number of Officers Hired', 
                  x_axis_label='Year hired', y_axis_label='count', tools=[hover])


    plot.line(year['Year_hired'], year['count'], line_width = 4, legend_label='complaints against officers hired this year')
    plot.line(full_year['Year_hired'], full_year['count'], line_width = 4, legend_label='number of officers hired this year', color = 'orange')
    
    
    #plot.y_range = Range1d(start=0, end=2100)
    #plot.extra_y_ranges = {"NumStations": Range1d(start=0, end=2100)}
    #plot.add_layout(LinearAxis(y_range_name="NumStations"), 'right')
    #https://stackoverflow.com/questions/25199665/one-chart-with-two-different-y-axis-ranges-in-bokeh

    return plot
    show(plot)