In [1]:
import pandas as pd
import geopandas as gpd
import json

import bokeh
from bokeh import plotting
from bokeh.models import FactorRange
from bokeh.palettes import Category20c
from bokeh.layouts import column, row, widgetbox
from bokeh.io import save, show, output_file, output_notebook, reset_output, export_png
from bokeh.plotting import figure
from bokeh.io.doc import curdoc
from bokeh.models import (
    GeoJSONDataSource, ColumnDataSource, ColorBar, Slider, Spacer,
    HoverTool, TapTool, Panel, Tabs, Legend, Toggle, LegendItem, Button, Select, CategoricalColorMapper
)
from bokeh.palettes import brewer
from bokeh.models.widgets import Div
from matplotlib import pyplot as plt
from matplotlib.colors import rgb2hex

# Interactive histogram
In this section we present the code used to generate the interactive histogram. We are aiming to make a plot similar to the one we made during week 8. 

Please note that the interactive elements only work if the code is run on a bokeh server. We have hosted the plot as an app on Heroku, and deployed it with github in order to display it on our website. The actual app can be found on https://histogram-sanfran.herokuapp.com/Final-histogram.

In [2]:
histdata = pd.read_csv('histdata.csv')
histdata = histdata[(histdata['year'] >= 2001) & (histdata['year'] <= 2019)]

In [3]:
neighborhoods = histdata['Neighborhooods - Analysis Boundaries'].unique().tolist()

focuscalls = ['Medical Incident', 'Structure Fire', 'Alarms', 'Traffic Collision', 
              'Citizen Assist / Service Call', 'Outside Fire', 'Water Rescue', 'Vehicle Fire', 
              'Gas Leak (Natural and LP Gases)', 'Electrical Hazard', 'Elevator / Escalator Rescue', 
              'Odor (Strange / Unknown)', 'Smoke Investigation (Outside)', 'Other']

In [4]:
def make_dataset(df1, year = 2000):

    total = df1['Call Type'].value_counts()
    df_group = df1.groupby(['Neighborhooods - Analysis Boundaries'])

    l = []
    for g in neighborhoods:
        tmp = df_group.get_group(g)
        l.append(tmp["Call Type"].value_counts()/total)

    df_l =  pd.DataFrame(l)
    df_int = pd.DataFrame(columns = focuscalls)
    df_norm = pd.concat([df_int, df_l])
    df_norm.insert(loc=0, column='Neighborhoods', value=neighborhoods)

    return ColumnDataSource(df_norm)

In [5]:
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '12pt'
    p.title.text_font_style = 'bold'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '10pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '10pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'

    return p

In [6]:
def make_plot1(src):
    p1 = figure(title = "Calls per Neighborhood", plot_width=900, plot_height = 600, x_axis_label = "Neighborhood", 
                y_axis_label = "Relative frequency", x_range = FactorRange(factors=neighborhoods))
    p1.xaxis.major_label_orientation = 1

    bar_colors=Category20c[len(focuscalls)]
    bar = {}
    items = []
    for indx, i in enumerate(focuscalls):
        bar[i] = p1.vbar(x='Neighborhoods',  top = i, source = src, color = bar_colors[indx], 
                    width = 0.7, alpha = 0.8, muted_alpha = 0.03, muted=True) 
        items.append((i, [bar[i]]))

    p1 = style(p1)
    legend = Legend(items=items, click_policy="mute")
    p1.add_layout(legend, "left") 

    return p1

In [7]:
def make_plot2(df):
    histdata = df[df['Call Type'].isin(sorted(focuscalls))]
    
    # Count total number of each call type
    total = df['Call Type'].value_counts()
    df_group = histdata.groupby(['Neighborhooods - Analysis Boundaries'])

    l = []
    for g in neighborhoods:
        tmp = df_group.get_group(g)
        l.append(tmp["Call Type"].value_counts()/total)

    df_norm = pd.DataFrame(l)
    df_norm.insert(loc=0, column='Neighborhoods', value=neighborhoods)

    src = ColumnDataSource(df_norm)

    p2 = figure(title = "Calls per Neighborhood", plot_width=900, plot_height = 600, x_axis_label = "Neighborhood", 
                y_axis_label = "Relative frequency", x_range = FactorRange(factors=neighborhoods))
    p2.xaxis.major_label_orientation = 1

    bar_colors=Category20c[len(focuscalls)]
    bar = {} 
    items = []
    for indx,i in enumerate(focuscalls):
        bar[i] = p2.vbar(x='Neighborhoods', top = i, source = src, color = bar_colors[indx], 
                width = 0.7, muted_alpha = 0.03, alpha = 0.7, muted=True) 
        items.append((i, [bar[i]]))

    p2 = style(p2)
    legend = Legend(items=items, click_policy="mute")
    p2.add_layout(legend, "left")

    return p2

In [8]:
# Update function that accounts for all 3 controls

def update(attr, old, new):

    # Change year to selected value
    year = year_select.value

    df_go = histdata[histdata['year'] == year]

    # Create new ColumnDataSource
    new_src = make_dataset(df_go, year = year)

    # Update the data on the plot
    src.data.update(new_src.data)

In [11]:
# Make slider
year_select = Slider(start=histdata['year'].min(), end=2019, step=1, value=2001, title="Year")
year_select.on_change('value', update)
year_start = year_select.value

initial_df = histdata[histdata['year'] == year_select.value]
src = make_dataset(initial_df, year = year_start)

# Make plots
p1 = make_plot1(src)
p2 = make_plot2(histdata)

# Put controls in a single element
controls = bokeh.models.Column(year_select)

# Create a row layout
layout1 = column(p1, controls)
layout2 = column(p2)

# Make tabs with the layouts 
tab1 = Panel(child=layout1, title = 'Call types pr. year')
tab2 = Panel(child=layout2, title = 'Call types all time')
tabs = Tabs(tabs=[tab1, tab2])

In [12]:
# Add layout to the current document
layout = column(tabs)
curdoc().add_root(layout)