In [1]:
import pandas as pd
import geopandas as gpd
import json

import bokeh
from bokeh import plotting
from bokeh.models import FactorRange
from bokeh.palettes import Category20c
from bokeh.layouts import column, row, widgetbox
from bokeh.io import save, show, output_file, output_notebook, reset_output, export_png
from bokeh.plotting import figure
from bokeh.io.doc import curdoc
from bokeh.models import (
    GeoJSONDataSource, ColumnDataSource, ColorBar, Slider, Spacer,
    HoverTool, TapTool, Panel, Tabs, Legend, Toggle, LegendItem, Button, Select, CategoricalColorMapper
)
from bokeh.palettes import brewer
from bokeh.models.widgets import Div
from matplotlib import pyplot as plt
from matplotlib.colors import rgb2hex

import warnings
from pandas.core.common import SettingWithCopyWarning

from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application

output_notebook()

# Interactive histogram
In this section we present the code used to generate the interactive histogram that shows distribution of calls based on hour, week, month and year. We are aiming to make a plot similar to the one we made during week 8. Please note that this code is very similar to the code we used to generate the interactive histogram that shows distribution of call types throughout neighborhoods. However, as this plot does not have e.g. a slider, it can simply be saved as an html file and uploaded to the website.

In [2]:
df_fil = pd.read_csv('histdata.csv')

In [3]:
df_fil = df_fil[(df_fil['year'] <= 2019) & (df_fil['year'] >= 2001)]

In [4]:
neighborhoods = df_fil['Neighborhooods - Analysis Boundaries'].unique().tolist()
month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
day = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
hour = ['00.00', '01.00', '02.00', '03.00', '04.00', '05.00', '06.00', '07.00', '08.00', '09.00', '10.00', '11.00', '12.00', '13.00', '14.00', '15.00', '16.00', '17.00', '18.00', '19.00', '20.00', '21.00', '22.00', '23.00']
year = ['2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']
focuscalls = ['Medical Incident', 'Structure Fire', 'Alarms', 'Traffic Collision', 'Other', 
              'Citizen Assist / Service Call', 'Outside Fire', 'Water Rescue', 'Vehicle Fire', 
              'Gas Leak (Natural and LP Gases)', 'Electrical Hazard', 'Elevator / Escalator Rescue', 
              'Odor (Strange / Unknown)', 'Smoke Investigation (Outside)']

In [5]:
def make_plot_hour(df1):

    total = df1['Call Type'].value_counts()
    df_group = df1.groupby(['hour'])
    n = list(df_group.groups.keys())  

    l = []
    l1 = []
    for g in df_group.groups:
        tmp = df_group.get_group(g)
        l.append(tmp["Call Type"].value_counts()/total)
        l1.append(tmp["Call Type"].value_counts())

    df_l = pd.DataFrame(l)
    df_l1 = pd.DataFrame(l1)
    df_int = pd.DataFrame(columns = focuscalls)
    df_norm = pd.concat([df_int, df_l])
    df_full = pd.concat([df_int, df_l1])
    df_norm.insert(loc=0, column='Hour', value=hour)
    df_full.insert(loc=0, column='Hour', value=hour) 

    src = ColumnDataSource(df_norm)

    p1 = figure(title = "Calls per hour", plot_width=900, plot_height = 600, x_axis_label = "Hour of day", y_axis_label = "Relative frequency", 
            x_range = FactorRange(factors=hour))
    p1.xaxis.major_label_orientation = 1

    bar_colors=Category20c[len(focuscalls)]
    bar = {} # to store vbars
    items = []
    for indx, i in enumerate(focuscalls):
        bar[i] = p1.vbar(x='Hour', top = i, source = src, color = bar_colors[indx], 
                    width = 0.7, alpha = 0.8, muted_alpha = 0.03, muted=True)
        items.append((i, [bar[i]]))

    p1 = style(p1)

    legend = Legend(items=items, click_policy="mute")
    p1.add_layout(legend, "left") 

    return p1

In [6]:
def make_plot_day(df1):

    total = df1['Call Type'].value_counts()
    df_group = df1.groupby(['day'])
    n = list(df_group.groups.keys())  

    l = []
    l1 = []
    for g in df_group.groups:
        tmp = df_group.get_group(g)
        l.append(tmp["Call Type"].value_counts()/total)
        l1.append(tmp["Call Type"].value_counts())

    df_l = pd.DataFrame(l)
    df_l1 = pd.DataFrame(l1)
    df_int = pd.DataFrame(columns = focuscalls)
    df_norm = pd.concat([df_int, df_l])
    df_full = pd.concat([df_int, df_l1])
    df_norm.insert(loc=0, column='Day', value=day)
    df_full.insert(loc=0, column='Day', value=day) 

    src = ColumnDataSource(df_norm)

    p1 = figure(title = "Calls per day", plot_width=900, plot_height = 600, x_axis_label = "Day of week", y_axis_label = "Relative frequency", 
            x_range = FactorRange(factors=day))
    p1.xaxis.major_label_orientation = 1

    bar_colors=Category20c[len(focuscalls)]
    bar = {} # to store vbars
    items = []
    for indx, i in enumerate(focuscalls):
        bar[i] = p1.vbar(x='Day', top = i, source = src, color = bar_colors[indx], 
                    width = 0.7, alpha = 0.8, muted_alpha = 0.03, muted=True)
        items.append((i, [bar[i]]))

    p1 = style(p1)

    legend = Legend(items=items, click_policy="mute")
    p1.add_layout(legend, "left") 

    return p1 

In [7]:
def make_plot_month(df1):

    total = df1['Call Type'].value_counts()
    df_group = df1.groupby(['month'])
    n = list(df_group.groups.keys()) 

    l = []
    l1 = []
    for g in df_group.groups:
        tmp = df_group.get_group(g)
        l.append(tmp["Call Type"].value_counts()/total)
        l1.append(tmp["Call Type"].value_counts())

    df_l = pd.DataFrame(l)
    df_l1 = pd.DataFrame(l1)
    df_int = pd.DataFrame(columns = focuscalls)
    df_norm = pd.concat([df_int, df_l])
    df_full = pd.concat([df_int, df_l1])
    df_norm.insert(loc=0, column='Month', value=month)
    df_full.insert(loc=0, column='Month', value=month) 

    src = ColumnDataSource(df_norm)

    p1 = figure(title = "Calls per month", plot_width=900, plot_height = 600, x_axis_label = "Month of year", y_axis_label = "Relative frequency", 
            x_range = FactorRange(factors=month))
    p1.xaxis.major_label_orientation = 1

    bar_colors=Category20c[len(focuscalls)]
    bar = {} # to store vbars
    items = []
    for indx, i in enumerate(focuscalls):
        bar[i] = p1.vbar(x='Month', top = i, source = src, color = bar_colors[indx], 
                    width = 0.7, alpha = 0.8, muted_alpha = 0.03, muted=True)
        items.append((i, [bar[i]]))

    p1 = style(p1)

    legend = Legend(items=items, click_policy="mute")
    p1.add_layout(legend, "left") 

    return p1

In [8]:
def make_plot_year(df1):

    total = df1['Call Type'].value_counts()
    df_group = df1.groupby(['year'])
    n = list(df_group.groups.keys())  

    l = []
    l1 = []
    for g in df_group.groups:
        tmp = df_group.get_group(g)
        l.append(tmp["Call Type"].value_counts()/total)
        l1.append(tmp["Call Type"].value_counts())

    df_l = pd.DataFrame(l)
    df_l1 = pd.DataFrame(l1)
    df_int = pd.DataFrame(columns = focuscalls)
    df_norm = pd.concat([df_int, df_l])
    df_full = pd.concat([df_int, df_l1])
    df_norm.insert(loc=0, column='Year', value=year)
    df_full.insert(loc=0, column='Year', value=year) 

    src = ColumnDataSource(df_norm)

    p1 = figure(title = "Calls per year", plot_width=900, plot_height = 600, x_axis_label = "Year", y_axis_label = "Relative frequency", 
            x_range = FactorRange(factors=year))
    p1.xaxis.major_label_orientation = 1

    bar_colors=Category20c[len(focuscalls)]
    bar = {} # to store vbars
    items = []
    for indx, i in enumerate(focuscalls):
        bar[i] = p1.vbar(x='Year', top = i, source = src, color = bar_colors[indx], 
                    width = 0.7, alpha = 0.8, muted_alpha = 0.03, muted=True)
        items.append((i, [bar[i]]))

    p1 = style(p1)

    legend = Legend(items=items, click_policy="mute")
    p1.add_layout(legend, "left") 

    return p1

In [9]:
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '12pt'
    p.title.text_font_style = 'bold'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '10pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '10pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'

    return p

In [10]:
p_hour = make_plot_hour(df_fil)
p_week = make_plot_day(df_fil)
p_month = make_plot_month(df_fil)
p_year = make_plot_year(df_fil)

# Create a row layout
layout_hour = column(p_hour)
layout_day = column(p_week)
layout_month = column(p_month)
layout_year = column(p_year)

# Make a tab with the layout 
tab_hour = Panel(child=layout_hour, title = 'Hour')
tab_day = Panel(child=layout_day, title = 'Weekday')
tab_month = Panel(child=layout_month, title = 'Month')
tab_year = Panel(child=layout_year, title = 'Year')
tabs = Tabs(tabs=[tab_hour, tab_day, tab_month, tab_year])


In [11]:
layout = column(tabs)
output_file("TimeHistogram.html")
show(layout)