In [32]:
import pandas as pd
import geopandas as gpd
import json

import bokeh
from bokeh.io import output_notebook, show, push_notebook
from bokeh.models import CustomJS, ColumnDataSource
from bokeh.plotting import figure, output_file, save
from bokeh import plotting
from bokeh.models import FactorRange
from bokeh.transform import dodge
from bokeh.models import Legend
from bokeh.palettes import Category20c
from bokeh.models import Slider
from bokeh.io import show, output_file
from bokeh.io.doc import curdoc
from bokeh.layouts import column
from bokeh.models.widgets import Panel, Tabs
from bokeh.resources import CDN
from bokeh.embed import file_html

from bokeh.models import CategoricalColorMapper, HoverTool, ColumnDataSource, Panel
from bokeh.models.widgets import CheckboxGroup, Slider, RangeSlider, Tabs

from bokeh.layouts import column, row, WidgetBox
from bokeh.palettes import Category20_16

from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application


from bokeh.io import save, show, output_file, output_notebook, reset_output, export_png
from bokeh.plotting import figure
from bokeh.io.doc import curdoc
from bokeh.models import (
    GeoJSONDataSource, ColumnDataSource, ColorBar, Slider, Spacer,
    HoverTool, TapTool, Panel, Tabs, Legend, Toggle, LegendItem, Button, Select
)
from bokeh.events import ButtonClick
from bokeh.palettes import brewer
from bokeh.models.callbacks import CustomJS
from bokeh.models.widgets import Div
from bokeh.layouts import widgetbox, row, column
from matplotlib import pyplot as plt
from matplotlib.colors import rgb2hex

# Data preparation

### Filter data set
We start out by doing an overall "cleanup" of the data set. First we add a datetime column with a proper format, and then we remove all entries that are not part of the focuscalls defined below. Finally, we remove all unnecessary columns. Please note that the data set must be downloaded in order for the following code to run. 

In [6]:
# Load the data set - can be found here: 
# https://data.sfgov.org/Public-Safety/Fire-Department-Calls-for-Service/nuek-vuh3
df = pd.read_csv('Fire.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
# Set up date format
format = '%m/%d/%Y %I:%M:%S %p'
df['Datetime'] = pd.to_datetime(df['Received DtTm'], format=format)
df = df.set_index(pd.DatetimeIndex(df['Datetime']))

In [8]:
# Define focus calls
focuscalls = ['Medical Incident', 'Structure Fire', 'Alarms', 'Traffic Collision', 
              'Citizen Assist / Service Call', 'Outside Fire', 'Water Rescue', 'Vehicle Fire', 
              'Gas Leak (Natural and LP Gases)', 'Electrical Hazard', 'Elevator / Escalator Rescue', 
              'Odor (Strange / Unknown)', 'Smoke Investigation (Outside)']

In [9]:
# Define neighborhoods
neighborhoods = df['Neighborhooods - Analysis Boundaries'].unique()
neighborhoods = neighborhoods[0:-1]

In [29]:
# Define districts
districts = ['Central', 'Southern', 'Bayview', 'Mission', 'Park', 'Richmond', 'Ingleside', 'Taraval',
            'Northern', 'Tenderloin']

In [10]:
# Filter the data set and drop nan's and unnecessary columns 
#cols = ['Call Number', 'Unit ID', 'Incident Number', 'Call Type', 'Call Final Disposition', 
#        'Address', 'City', 'Box', 'Final Priority', 'Call Type Group', 'Unit Type', 
#        'Neighborhooods - Analysis Boundaries',  'Location', 'Datetime', 'Supervisor Districts' ]

df = df[df['Call Type'].isin(focuscalls)]
df.dropna(how='any')
df = df[df['Neighborhooods - Analysis Boundaries'].isin(neighborhoods)]
df.dropna(how='any')

cols = ['Call Number', 'Call Type', 'Final Priority', 'Call Type Group',
        'Neighborhooods - Analysis Boundaries',  'Location', 'Datetime', 'Supervisor Districts' ]
df_fil = df[cols]
df_fil['year'] = df_fil.index.year

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


### Dataframes for choropleth map
In this section we make 6 different dataframes for plotting on the choropleth map. Having individual dataframes is very helpful in this case, as we will be assigning e.g. bins and colors based on the values in the dataframes. 

In [11]:
# Create dataframe containing all calls grouped by neighborhood
nhood_all = df_fil.groupby(
    ['year', 'Neighborhooods - Analysis Boundaries']
).agg(
    {
    'Call Number': ['count']
    }
)

nhood_all = nhood_all.set_axis(nhood_all.columns.map('_'.join), axis=1, inplace=False)
nhood_all = nhood_all.reset_index(level=[0,1])

nhood_all.rename(columns={
    'Neighborhooods - Analysis Boundaries': 'Neighborhood',
    'Call Number_count': 'Calls',
}, inplace=True)

# save and show dataframe
nhood_all.to_csv('nhood_all.csv', index=False)
nhood_all.head()

Unnamed: 0,year,Neighborhood,Calls
0,2000,Bayview Hunters Point,9835
1,2000,Bernal Heights,3023
2,2000,Castro/Upper Market,3764
3,2000,Chinatown,3203
4,2000,Excelsior,3073


In [12]:
# Create dataframe containing all calls grouped by neighborhood and priority
nhood_priority = df_fil.groupby(
    ['year', 'Neighborhooods - Analysis Boundaries', 'Final Priority']
).agg(
    {
    'Call Number': ['count']
    }
)

nhood_priority = nhood_priority.set_axis(nhood_priority.columns.map('_'.join), axis=1, inplace=False)
nhood_priority = nhood_priority.reset_index(level=[0,1,2])

nhood_priority.rename(columns={
    'Neighborhooods - Analysis Boundaries': 'Neighborhood',
    'Call Number_count': 'Calls',
    'Final Priority': 'Priority',
}, inplace=True)

# save and show dataframe
nhood_priority.to_csv('nhood_priority.csv', index=False)
nhood_priority.head()

Unnamed: 0,year,Neighborhood,Priority,Calls
0,2000,Bayview Hunters Point,2,1199
1,2000,Bayview Hunters Point,3,8636
2,2000,Bernal Heights,2,440
3,2000,Bernal Heights,3,2583
4,2000,Castro/Upper Market,2,635


In [13]:
# Create dataframe containing all medical incident calls grouped by neighborhood
nhood_medical = df_fil.groupby(
    ['year', 'Neighborhooods - Analysis Boundaries', 'Call Type']
).agg(
    {
    'Call Number': ['count']
    }
)
nhood_medical = nhood_medical.set_axis(nhood_medical.columns.map('_'.join), axis=1, inplace=False)
nhood_medical = nhood_medical.reset_index(level=[0,1,2])

nhood_medical.rename(columns={
    'Neighborhooods - Analysis Boundaries': 'Neighborhood',
    'Call Number_count': 'Calls',
    'Call Type': 'Type'
}, inplace=True)

# save and show dataframe
nhood_medical = nhood_medical[nhood_medical['Type'] == 'Medical Incident']
nhood_medical.to_csv('nhood_medical.csv', index=False)
nhood_medical.head()

Unnamed: 0,year,Neighborhood,Type,Calls
5,2000,Bayview Hunters Point,Medical Incident,5681
16,2000,Bernal Heights,Medical Incident,2180
27,2000,Castro/Upper Market,Medical Incident,2643
38,2000,Chinatown,Medical Incident,1995
48,2000,Excelsior,Medical Incident,2017


In [14]:
# Create dataframe containing all calls grouped by district
district_all = df_fil.groupby(
    ['year', 'Supervisor Districts']
).agg(
    {
    'Call Number': ['count']
    }
)

district_all = district_all.set_axis(district_all.columns.map('_'.join), axis=1, inplace=False)
district_all = district_all.reset_index(level=[0,1])

district_all.rename(columns={
    'Supervisor Districts': 'District',
    'Call Number_count': 'Calls',
}, inplace=True)
district_all = district_all.astype({'District': 'object'})

# save and show dataframe
district_all.to_csv('district_all.csv', index=False)
district_all.head()

Unnamed: 0,year,District,Calls
0,2000,1,9599
1,2000,2,7483
2,2000,3,5457
3,2000,4,7342
4,2000,5,10294


In [15]:
# Create dataframe containing all calls grouped by district and priority
district_priority = df_fil.groupby(
    ['year', 'Supervisor Districts', 'Final Priority']
).agg(
    {
    'Call Number': ['count']
    }
)

district_priority = district_priority.set_axis(district_priority.columns.map('_'.join), axis=1, inplace=False)
district_priority = district_priority.reset_index(level=[0,1,2])

district_priority.rename(columns={
    'Supervisor Districts': 'District',
    'Call Number_count': 'Calls',
    'Final Priority': 'Priority'
}, inplace=True)
district_priority = district_priority.astype({'District': 'object'})

# save and show dataframe
district_priority.to_csv('district_priority.csv', index=False)
district_priority.head()

Unnamed: 0,year,District,Priority,Calls
0,2000,1,2,1038
1,2000,1,3,8561
2,2000,2,2,955
3,2000,2,3,6528
4,2000,3,2,743


In [16]:
# Create dataframe containing all medical incident calls grouped by district
district_medical = df_fil.groupby(
    ['year', 'Supervisor Districts', 'Call Type']
).agg(
    {
    'Call Number': ['count']
    }
)

district_medical = district_medical.set_axis(district_medical.columns.map('_'.join), axis=1, inplace=False)
district_medical = district_medical.reset_index(level=[0,1,2])

district_medical.rename(columns={
    'Supervisor Districts': 'District',
    'Call Number_count': 'Calls',
    'Call Type': 'Type'
}, inplace=True)
district_medical = district_medical.astype({'District': 'object'})
district_medical = district_medical[district_medical['Type'] == 'Medical Incident']

# save and show dataframe
district_medical.to_csv('district_medical.csv', index=False)
district_medical.head()

Unnamed: 0,year,District,Type,Calls
5,2000,1,Medical Incident,5267
17,2000,2,Medical Incident,5027
29,2000,3,Medical Incident,4079
41,2000,4,Medical Incident,4950
53,2000,5,Medical Incident,6813


### Dataframe for histogram
Next, we create the dataframe that we will be using for our interactive histogram.

In [17]:
cols = ['Call Type', 'Neighborhooods - Analysis Boundaries', 'Datetime']
df_hist = df[cols]
df_hist['year'] = df_hist.index.year
df_hist = df_hist.drop('Datetime', axis = 1)

# Take a sample of dataframe, as csv file will otherwise be too large 
df_hist = df_hist.sample(700000)

# Save and show dataframe
df_hist.to_csv('histdata.csv', index=False)
df_hist.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,Call Type,Neighborhooods - Analysis Boundaries,year
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2006-02-12 02:38:52,Traffic Collision,Treasure Island,2006
2015-09-19 22:31:22,Medical Incident,Bayview Hunters Point,2015
2005-01-11 14:09:30,Structure Fire,Potrero Hill,2005
2009-06-19 20:16:05,Medical Incident,Mission Bay,2009
2007-12-11 01:16:12,Vehicle Fire,Inner Sunset,2007


# Choropleth map
In this section we present the code used to generate the choropleth map. We have found inspiration and help on the following sites:
- https://jimking100.github.io/2019-09-04-Post-3/ 
- https://cbouy.github.io/2019/06/09/interactive-map.html

Please note that all interactive elements only work if the code is run on a bokeh server. We have hosted the plot as an app on Heroku, and deployed it with github in order to display it on our website. The actual app can be found on https://firedepartment-sanfran.herokuapp.com/Map3.

In [36]:
nhood_all = pd.read_csv('nhood_all.csv')
nhood_priority = pd.read_csv('nhood_priority.csv')
nhood_medical = pd.read_csv('nhood_medical.csv')

district_all = pd.read_csv('district_all.csv')
district_priority = pd.read_csv('district_priority.csv')
district_medical = pd.read_csv('district_medical.csv')

In [37]:
district_all = district_all.astype({'District': 'object'})
district_priority = district_priority.astype({'District': 'object'})
district_medical = district_medical.astype({'District': 'object'})

In [38]:
nhood = gpd.read_file("Neighborhoods.geojson")
nhood = nhood.rename(columns={'nhood':'Neighborhood'}).set_geometry('geometry')
nhood.crs = {'init': 'epsg:4326'}

In [39]:
district = gpd.read_file("Districts.geojson")
district = district.rename(columns={'supervisor':'District'}).set_geometry('geometry')
district = district.drop(columns=['supname', 'supdist', 'numbertext', 'supdistpad'])
district.sort_values(by=["District"])
district = district.astype({'District': 'int32'})
district.sort_values(by=["District"], inplace=True)
district.crs = {'init': 'epsg:4326'}

In [40]:
district_all.sort_values(by=["District"], inplace=True)
district_priority2 = district_priority[district_priority["Priority"] == 2]
district_priority3 = district_priority[district_priority["Priority"] == 3]

In [41]:
# Create bins
bins = [0,10000,20000,30000,40000,60000,90000,100000]
# Create labels
bin_labels = [f'≤{bins[1]}'] + [f'{bins[i]}-{bins[i+1]}' for i in range(1,len(bins)-2)] + [f'>{bins[-2]}']

In [42]:
# Create bins
bins1 = [0,2000,5000,10000,15000,20000,30000,50000,76000]
# Create labels
bin_labels1 = [f'≤{bins1[1]}'] + [f'{bins1[i]}-{bins1[i+1]}' for i in range(1,len(bins1)-2)] + [f'>{bins1[-2]}']

In [43]:
# Assign each row to a bin
nhood_all['bin'] = pd.cut(
    nhood_all['Calls'], bins=bins1, right=True, include_lowest=True, precision=0, labels=bin_labels1,
).astype(str)

nhood_priority['bin'] = pd.cut(
    nhood_priority['Calls'], bins=bins1, right=True, include_lowest=True, precision=0, labels=bin_labels1,
).astype(str)

nhood_medical['bin'] = pd.cut(
    nhood_medical['Calls'], bins=bins1, right=True, include_lowest=True, precision=0, labels=bin_labels1,
).astype(str)

district_all['bin'] = pd.cut(
    district_all['Calls'], bins=bins, right=True, include_lowest=True, precision=0, labels=bin_labels,
).astype(str)

district_priority2['bin'] = pd.cut(
    district_priority2['Calls'], bins=bins1, right=True, include_lowest=True, precision=0, labels=bin_labels1,
).astype(str)

district_priority3['bin'] = pd.cut(
    district_priority3['Calls'], bins=bins, right=True, include_lowest=True, precision=0, labels=bin_labels,
).astype(str)

district_medical['bin'] = pd.cut(
    district_medical['Calls'], bins=bins1, right=True, include_lowest=True, precision=0, labels=bin_labels1,
).astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [44]:
# Merge the geographic data with fire data
nhood_a = pd.merge(nhood, nhood_all, on='Neighborhood', how='left')
nhood_p = pd.merge(nhood, nhood_priority, on='Neighborhood', how='left')
nhood_m = pd.merge(nhood, nhood_medical, on='Neighborhood', how='left')

district_a = pd.merge(district, district_all, on='District', how='left')
district_p2 = pd.merge(district, district_priority2, on='District', how='left')
district_p3 = pd.merge(district, district_priority3, on='District', how='left')
district_m = pd.merge(district, district_medical, on='District', how='left')

In [45]:
# Define a yellow to red color palette
palette = brewer['YlOrRd'][len(bins)-1]
# Reverse color order so that dark red corresponds to highest obesity
palette = palette[::-1]

palette1 = brewer['YlOrRd'][len(bins1)-1]
palette1 = palette1[::-1]

def val_to_color(value, nan_color='#d9d9d9'):
    if isinstance(value, str): return nan_color
    for i in range(1,len(bins)):
        if value <= bins[i]:
            return palette[i-1]
        
def val_to_color1(value, nan_color='#d9d9d9'):
    if isinstance(value, str): return nan_color
    for i in range(1,len(bins1)):
        if value <= bins1[i]:
            return palette1[i-1]

# Assign number of calls to a color
nhood_a['color'] = nhood_a['Calls'].apply(val_to_color1)
nhood_p['color'] = nhood_p['Calls'].apply(val_to_color1)
nhood_m['color'] = nhood_m['Calls'].apply(val_to_color1)

district_a['color'] = district_a['Calls'].apply(val_to_color)
district_m['color'] = district_m['Calls'].apply(val_to_color)
district_p2['color'] = district_p2['Calls'].apply(val_to_color1)
district_p3['color'] = district_p3['Calls'].apply(val_to_color)

In [46]:
# Assign x coordinates
def bin_to_cbar_x(value):
    if value == 'No data': return -2
    for i,b in enumerate(bin_labels):
        if value == b:
            return 5*(i+1)
        
def bin_to_cbar_x1(value):
    if value == 'No data': return -2
    for i,b in enumerate(bin_labels1):
        if value == b:
            return 5*(i+1)
        
nhood_a['cbar_x'] = nhood_a['bin'].apply(bin_to_cbar_x1)
nhood_p['cbar_x'] = nhood_p['bin'].apply(bin_to_cbar_x1)
nhood_m['cbar_x'] = nhood_m['bin'].apply(bin_to_cbar_x1)

district_a['cbar_x'] = district_a['bin'].apply(bin_to_cbar_x)
district_m['cbar_x'] = district_m['bin'].apply(bin_to_cbar_x1)
district_p2['cbar_x'] = district_p2['bin'].apply(bin_to_cbar_x1)
district_p3['cbar_x'] = district_p3['bin'].apply(bin_to_cbar_x)

# Assign width
nhood_a['cbar_w'] = nhood_a['Calls'].apply(lambda x: 5 if x == 'No data' else 4.7)
nhood_p['cbar_w'] = nhood_p['Calls'].apply(lambda x: 5 if x == 'No data' else 4.7)
nhood_m['cbar_w'] = nhood_m['Calls'].apply(lambda x: 5 if x == 'No data' else 4.7)

district_a['cbar_w'] = district_a['Calls'].apply(lambda x: 5 if x == 'No data' else 4.7)
district_m['cbar_w'] = district_m['Calls'].apply(lambda x: 5 if x == 'No data' else 4.7)
district_p2['cbar_w'] = district_p2['Calls'].apply(lambda x: 5 if x == 'No data' else 4.7)
district_p3['cbar_w'] = district_p3['Calls'].apply(lambda x: 5 if x == 'No data' else 4.7)

In [47]:
# Create a function the returns json_data for the year selected by the user
def json_data(selectedYear):
    yr = selectedYear
    cr = select.value
    
    # Pull selected year and priority
    if cr == "High priority calls":
        df = nhood_p[(nhood_p['year'] == yr) & (nhood_p['Priority'] == 3)]
    elif cr == "Low priority calls":
        df = nhood_p[(nhood_p['year'] == yr) & (nhood_p['Priority'] == 2)]
    elif cr == "Medical incident calls":
        df = nhood_m[nhood_m['year'] == yr]
    elif cr == "All calls":
        df = nhood_a[nhood_a['year'] == yr]

    # Convert to json
    df_json = json.loads(df.to_json())

    # Convert to json preferred string-like object 
    json_data = json.dumps(df_json)
    return json_data

In [48]:
# Create a function the returns json_data for the year selected by the user
def json_data1(selectedYear):
    yr = selectedYear
    cr = select1.value
    
    # Pull selected year and priority
    if cr == "High priority calls":
        df = district_p3[district_p3['year'] == yr]
    elif cr == "Low priority calls":
        df = district_p2[district_p2['year'] == yr]
    elif cr == "Medical incident calls":
        df = district_m[district_m['year'] == yr]
    elif cr == "All calls":
        df = district_a[district_a['year'] == yr]

    # Convert to json
    df_json = json.loads(df.to_json())

    # Convert to json preferred string-like object 
    json_data1 = json.dumps(df_json)
    return json_data1

In [49]:
# Hover tool for the map
map_hover = HoverTool(tooltips=[
    ('Neighborhood','@Neighborhood'),
    ('Number of calls', '@All calls')
])

select = Select(title='Select criteria', value='All calls', options=['All calls', 'High priority calls',
                                                                    'Low priority calls', 'Medical incident calls'])
select1 = Select(title='Select criteria', value='All calls', options=['All calls', 'High priority calls',
                                                                    'Low priority calls', 'Medical incident calls'])

In [50]:
# Input sources
nhood_a.sort_values(by=["Neighborhood","year"], inplace=True)
nhood_p.sort_values(by=["Neighborhood","year"], inplace=True)
nhood_m.sort_values(by=["Neighborhood","year"], inplace=True)

# Source that will contain all necessary data for the map
nhood_geo = GeoJSONDataSource(geojson=nhood_a.to_json())
nhood_src = GeoJSONDataSource(geojson=json_data(2010))
district_geo = GeoJSONDataSource(geojson=district_a.to_json())
district_src = GeoJSONDataSource(geojson=json_data1(2010))

In [51]:
# Create map with neighborhoods
def make_nmap():
    cr = select.value
    
    p = figure(
        title = cr + ' to fire department by neighborhood in San Francisco in 2010',
        plot_height=700 , plot_width=650,
        toolbar_location=None, tools="tap,pan,wheel_zoom,box_zoom,save,reset", toolbar_sticky=False,
        active_scroll="wheel_zoom",
    )
    p.title.text_font_size = '12pt'
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.axis.visible = False
    
    map_hover = HoverTool(tooltips=[
    ('Neighborhood','@Neighborhood'),
    ('Number of calls', '@Calls')
    ])

    # Add hover tool
    p.add_tools(map_hover)

    # Add patches (countries) to the figure
    patches = p.patches(
        'xs','ys', source=nhood_src,
        fill_color='color',
        line_color='black', line_width=0.25, fill_alpha=1,
        hover_fill_color='color',
    )
    # outline when we hover over a country
    patches.hover_glyph.line_color = '#3bdd9d'
    patches.hover_glyph.line_width = 3
    patches.nonselection_glyph = None
    
    return p

In [52]:
# Create map with districts 
def make_dmap():
    cr = select1.value
    
    f = figure(
        title = cr + ' to fire department by district in San Francisco in 2010',
        plot_height=700 , plot_width=650,
        toolbar_location=None, tools="tap,pan,wheel_zoom,box_zoom,save,reset", toolbar_sticky=False,
        active_scroll="wheel_zoom",
    )
    f.title.text_font_size = '12pt'
    f.xgrid.grid_line_color = None
    f.ygrid.grid_line_color = None
    f.axis.visible = False
    
    map_hover = HoverTool(tooltips=[
    ('District','@District'),
    ('Number of calls', '@Calls')
    ])

    # Add hover tool
    f.add_tools(map_hover)

    # Add patches (countries) to the figure
    patches = f.patches(
        'xs','ys', source=district_src,
        fill_color='color',
        line_color='black', line_width=0.25, fill_alpha=1,
        hover_fill_color='color',
    )
    # outline when we hover over a country
    patches.hover_glyph.line_color = '#3bdd9d'
    patches.hover_glyph.line_width = 3
    patches.nonselection_glyph = None
    
    return f

In [53]:
# Create interactive colorbar for the neighborhood map
def make_nbar():
    p_bar = figure(
        title=None, plot_height=400 , plot_width=130,
        tools="tap", toolbar_location=None, match_aspect=True,
    )
    p_bar.xgrid.grid_line_color = None
    p_bar.ygrid.grid_line_color = None
    p_bar.outline_line_color = None
    p_bar.xaxis.visible = False 
    
    cr = select.value
    if cr == "All calls":
        df = nhood_a
    elif cr == "High priority calls" or cr == "Low priority calls":
        df = nhood_p
    elif cr == "Medical incident calls":
        df = nhood_m

    # set the title and ticks of the colorbar
    p_bar.yaxis.axis_label = "Number of calls"
    p_bar.yaxis.ticker = sorted(df['cbar_x'].unique())
    p_bar.yaxis.major_label_overrides = dict([(i[0],i[1]) for i in df.groupby(['cbar_x','bin']).describe().index])
    p_bar.yaxis.axis_label_text_font_size = "10pt"
    p_bar.yaxis.major_label_text_font_size = "8pt"

    # activate the hover but hide tooltips
    hover_bar = HoverTool(tooltips=None)
    p_bar.add_tools(hover_bar)

    # plot the rectangles for the colorbar
    cbar = p_bar.rect(x=0, y='cbar_x', width=1.7, height='cbar_w',
        color='color', source=nhood_src,
        hover_line_color='#3bdd9d', hover_fill_color='color')

    # outline when hovering over the colorbar legend
    cbar.hover_glyph.line_width = 4
    cbar.nonselection_glyph = None
    
    return p_bar

In [54]:
# Create interactive colorbar for the district map
def make_dbar():
    f_bar = figure(
        title=None, plot_height=400 , plot_width=130,
        tools="tap", toolbar_location=None, match_aspect=True,
    )
    f_bar.xgrid.grid_line_color = None
    f_bar.ygrid.grid_line_color = None
    f_bar.outline_line_color = None
    f_bar.xaxis.visible = False 
    
    cr = select1.value
    if cr == "All calls":
        df = district_a
    elif cr == "High priority calls":
        df = district_p3
    elif cr == "Low priority calls":
        df = district_p2
    elif cr == "Medical incident calls":
        df = district_m

    # set the title and ticks of the colorbar
    f_bar.yaxis.axis_label = "Number of calls"
    f_bar.yaxis.ticker = sorted(df['cbar_x'].unique())
    f_bar.yaxis.major_label_overrides = dict([(i[0],i[1]) for i in df.groupby(['cbar_x','bin']).describe().index])
    f_bar.yaxis.axis_label_text_font_size = "10pt"
    f_bar.yaxis.major_label_text_font_size = "8pt"

    # activate the hover but hide tooltips
    hover_bar = HoverTool(tooltips=None)
    f_bar.add_tools(hover_bar)

    # plot the rectangles for the colorbar
    cbar = f_bar.rect(x=0, y='cbar_x', width=1.7, height='cbar_w',
        color='color', source=district_src,
        hover_line_color='#3bdd9d', hover_fill_color='color')

    # outline when hovering over the colorbar legend
    cbar.hover_glyph.line_width = 4
    cbar.nonselection_glyph = None
    
    return f_bar

In [55]:
def callback_sli(attr, old, new):
    # The input yr is the year selected from the slider
    yr = slider.value
    cr = select.value

    new_data = json_data(yr)
    
    # Update the data
    p.title.text = cr + " to fire department by neighborhood in San Francisco in " + str(yr)
    nhood_src.geojson = new_data

In [56]:
def callback_sli1(attr, old, new):
    # The input yr is the year selected from the slider
    yr = slider1.value
    cr = select1.value

    new_data = json_data1(yr)
    
    # Update the data
    f.title.text = cr + " to fire department by district in San Francisco in " + str(yr)
    district_src.geojson = new_data

In [57]:
def callback_sel(attr, old, new):
    yr = slider.value
    cr = select.value

    new_data = json_data(yr)
    nhood_src.geojson = new_data

    p = make_nmap()
    p_bar = make_nbar()
    
    p.title.text = cr + " to fire department by neighborhood in San Francisco"

    tab_n = Panel(title="Neighborhoods",
    child=row(
        p_bar, 
        column(p, 
            row(widgetbox(slider),
            widgetbox(select)))
    ))

    tabs = Tabs(tabs=[ tab_n, tab_d ])
    
    footer = Div(text="""
    Data: <a href="https://data.sfgov.org/Public-Safety/Fire-Department-Calls-for-Service/nuek-vuh3"> 
    Fire Department Calls for Service</a>""")
    
    layout = column(tabs, footer)
    curdoc().clear()
    curdoc().add_root(layout)

In [58]:
def callback_sel1(attr, old, new):
    yr = slider1.value
    cr = select1.value

    new_data = json_data1(yr)
    district_src.geojson = new_data

    f = make_dmap()
    f_bar = make_dbar()
    
    f.title.text = cr + " to fire department by district in San Francisco"

    tab_d = Panel(title="Districts",
    child=row(
        f_bar, 
        column(f, 
            row(widgetbox(slider1),
            widgetbox(select1)))
    ))

    tabs = Tabs(tabs=[ tab_n, tab_d ])
    tabs.active=1
    
    footer = Div(text="""
    Data: <a href="https://data.sfgov.org/Public-Safety/Fire-Department-Calls-for-Service/nuek-vuh3"> 
    Fire Department Calls for Service</a>""")
    
    layout = column(tabs, footer)
    curdoc().clear()
    curdoc().add_root(layout)

In [59]:
nhood_geo = GeoJSONDataSource(geojson=nhood_a.to_json())
nhood_src = GeoJSONDataSource(geojson=json_data(2010))

district_geo = GeoJSONDataSource(geojson=district_a.to_json())
district_src = GeoJSONDataSource(geojson=json_data1(2010))

slider = Slider(start=2000, end=2019, value=2010, step=1, title="Year")
slider1 = Slider(start=2000, end=2019, value=2010, step=1, title="Year")

slider.on_change('value', callback_sli)
slider1.on_change('value', callback_sli1)

select.on_change('value', callback_sel)
select1.on_change('value', callback_sel1)

p = make_nmap()
p_bar = make_nbar()

tab_n = Panel(title="Neighborhoods",
    child=row(
        p_bar, 
        column(p,
            row(widgetbox(slider),
            widgetbox(select)))
    ))

f = make_dmap()
f_bar = make_dbar()

tab_d = Panel(title="Districts",
    child=row(
        f_bar, 
        column(f, 
            row(widgetbox(slider1),
            widgetbox(select1)))
    ))

tabs = Tabs(tabs=[ tab_n, tab_d])





In [60]:
# Add layout to current document
footer = Div(text="""
Data: <a href="https://data.sfgov.org/Public-Safety/Fire-Department-Calls-for-Service/nuek-vuh3"> Fire Department Calls for Service</a>
""")
layout = column(tabs, footer)
curdoc().add_root(layout)

# Interactive histogram
In this section we present the code used to generate the interactive histogram. We are aiming to make a plot similar to the one we made during week 8. 

Please note that the interactive elements only work if the code is run on a bokeh server. We have hosted the plot as an app on Heroku, and deployed it with github in order to display it on our website. The actual app can be found on https://histogram-sanfran.herokuapp.com/Final-histogram.

In [18]:
histdata = pd.read_csv('histdata.csv')

In [19]:
def make_dataset(df1, year = 2000):

    total = df1['Call Type'].value_counts()
    df_group = df1.groupby(['Neighborhooods - Analysis Boundaries'])

    l = []
    for g in df_group.groups:
        tmp = df_group.get_group(g)
        l.append(tmp["Call Type"].value_counts()/total)

    df_l =  pd.DataFrame(l)
    df_int = pd.DataFrame(columns = focuscalls)
    df_norm = pd.concat([df_int, df_l])
    df_norm.insert(loc=0, column='Neighborhoods', value=neighborhoods)

    return ColumnDataSource(df_norm)

In [20]:
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '12pt'
    p.title.text_font_style = 'bold'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '10pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '10pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'

    return p

In [21]:
def make_plot1(src):
    p1 = figure(title = "Calls per Neighborhood", plot_width=900, plot_height = 600, x_axis_label = "Neighborhood", 
                y_axis_label = "Relative frequency", x_range = FactorRange(factors=neighborhoods))
    p1.xaxis.major_label_orientation = 1

    bar_colors=Category20c[len(focuscalls)]
    bar = {}
    items = []
    for indx, i in enumerate(focuscalls):
        bar[i] = p1.vbar(x='Neighborhoods',  top = i, source = src, color = bar_colors[indx], 
                    width = 0.7, alpha = 0.8, muted_alpha = 0.03, muted=True) 
        items.append((i, [bar[i]]))

    p1 = style(p1)
    legend = Legend(items=items, click_policy="mute")
    p1.add_layout(legend, "left") 

    return p1

In [22]:
def make_plot2(df):
    histdata = df[df['Call Type'].isin(sorted(focuscalls))]
    
    # Count total number of each call type
    total = df['Call Type'].value_counts()
    df_group = histdata.groupby(['Neighborhooods - Analysis Boundaries'])

    l = []
    for g in df_group.groups:
        tmp = df_group.get_group(g)
        l.append(tmp["Call Type"].value_counts()/total)

    df_norm = pd.DataFrame(l)
    df_norm.insert(loc=0, column='Neighborhoods', value=neighborhoods)

    src = ColumnDataSource(df_norm)

    p2 = figure(title = "Calls per Neighborhood", plot_width=900, plot_height = 600, x_axis_label = "Neighborhood", 
                y_axis_label = "Relative frequency", x_range = FactorRange(factors=neighborhoods))
    p2.xaxis.major_label_orientation = 1

    bar_colors=Category20c[len(focuscalls)]
    bar = {} 
    items = []
    for indx,i in enumerate(focuscalls):
        bar[i] = p2.vbar(x='Neighborhoods', top = i, source = src, color = bar_colors[indx], 
                width = 0.7, muted_alpha = 0.03, alpha = 0.7, muted=True) 
        items.append((i, [bar[i]]))

    p2 = style(p2)
    legend = Legend(items=items, click_policy="mute")
    p2.add_layout(legend, "left")

    return p2

In [23]:
# Update function that accounts for all 3 controls

def update(attr, old, new):

    # Change year to selected value
    year = year_select.value

    df_go = histdata[histdatadf_fil['year'] == year]

    # Create new ColumnDataSource
    new_src = make_dataset(df_go, year = year)

    # Update the data on the plot
    src.data.update(new_src.data)

In [24]:
# Make slider
year_select = Slider(start=histdata['year'].min(), end=histdata['year'].max(), step=1, value=2000, title="Year")
year_select.on_change('value', update)
year_start = year_select.value

initial_df = histdata[histdata['year'] == year_select.value]
src = make_dataset(initial_df, year = year_start)

# Make plots
p1 = make_plot1(src)
p2 = make_plot2(histdata)

# Put controls in a single element
controls = bokeh.models.Column(year_select)

# Create a row layout
layout1 = column(p1, controls)
layout2 = column(p2)

# Make tabs with the layouts 
tab1 = Panel(child=layout1, title = 'Call types pr. year')
tab2 = Panel(child=layout2, title = 'Call types all time')
tabs = Tabs(tabs=[tab1, tab2])

In [25]:
# Add layout to the current document
layout = column(tabs)
curdoc().add_root(layout)