## Imports

In [45]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
from bokeh.plotting import figure, show, output_notebook, save
from bokeh.models import HoverTool, value, LabelSet, Legend, ColumnDataSource
output_notebook()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [46]:
from math import pi
import pandas as pd

from bokeh.io import show
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    LinearColorMapper,
    BasicTicker,
    PrintfTickFormatter,
    ColorBar,
)
from bokeh.plotting import figure

In [47]:
from bokeh.core.properties import value

In [48]:
import calendar

In [49]:
from bokeh.models import FactorRange

## Loading data

In [50]:
sf_crime = pd.read_csv('data/Monthly_Property_Crime_2005_to_2015.csv')

In [51]:
sf_crime.head(10)

Unnamed: 0,Date,Category,IncidntNum
0,02/01/2014 12:00:00 AM,BURGLARY,506
1,02/01/2007 12:00:00 AM,VANDALISM,531
2,07/01/2012 12:00:00 AM,BURGLARY,522
3,07/01/2013 12:00:00 AM,LARCENY/THEFT,3318
4,08/01/2010 12:00:00 AM,VANDALISM,694
5,10/01/2011 12:00:00 AM,BURGLARY,421
6,07/01/2006 12:00:00 AM,VEHICLE THEFT,673
7,08/01/2011 12:00:00 AM,LARCENY/THEFT,2148
8,12/01/2014 12:00:00 AM,STOLEN PROPERTY,90
9,12/01/2012 12:00:00 AM,STOLEN PROPERTY,81


In [52]:
sf_crime.dtypes

Date          object
Category      object
IncidntNum     int64
dtype: object

In [53]:
sf_crime['Date'] = pd.to_datetime(sf_crime.Date)

sf_crime['Category'] = sf_crime.Category.astype('category')

sf_crime.shape

(792, 3)

In [54]:
sf_crime['year'] = sf_crime.Date.dt.year

sf_crime['month'] = sf_crime.Date.dt.month

sf_crime.head()

Unnamed: 0,Date,Category,IncidntNum,year,month
0,2014-02-01,BURGLARY,506,2014,2
1,2007-02-01,VANDALISM,531,2007,2
2,2012-07-01,BURGLARY,522,2012,7
3,2013-07-01,LARCENY/THEFT,3318,2013,7
4,2010-08-01,VANDALISM,694,2010,8


In [55]:
sf_crime.year.describe()

count     792.000000
mean     2010.000000
std         3.164276
min      2005.000000
25%      2007.000000
50%      2010.000000
75%      2013.000000
max      2015.000000
Name: year, dtype: float64

In [56]:
years = sorted(list(sf_crime.year.unique()))

In [57]:
years

[2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015]

In [58]:
years = [str(x) for x in years]

In [59]:
years

['2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015']

In [60]:
months = sorted(list(sf_crime.month.unique()))

In [61]:
months = [calendar.month_abbr[i] for i in months]

In [62]:
months

['Jan',
 'Feb',
 'Mar',
 'Apr',
 'May',
 'Jun',
 'Jul',
 'Aug',
 'Sep',
 'Oct',
 'Nov',
 'Dec']

In [63]:
sf_crime['year'] = sf_crime['year'].astype('str')

In [64]:
df1 = sf_crime.groupby(['year','month'])[['IncidntNum']].sum().reset_index()

In [65]:
df1['month'] = df1.month.apply(lambda x: calendar.month_abbr[x])

In [66]:
df1.head()

Unnamed: 0,year,month,IncidntNum
0,2005,Jan,4991
1,2005,Feb,4166
2,2005,Mar,4815
3,2005,Apr,4711
4,2005,May,5120


## Heatmap

In [67]:
# this is the colormap from the original NYTimes plot
colors = [
    "#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1",
    "#cc7878", "#933b41", "#550b1d"
]
mapper = LinearColorMapper(
    palette=colors, low=df1.IncidntNum.min(), high=df1.IncidntNum.max())

source = ColumnDataSource(df1)

TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"

p = figure(
    title="SF Crime rate over the years ({0} - {1})".format(years[0], years[-1]),
    x_range=years,
    y_range=list(reversed(months)),
    x_axis_location="above",
    plot_width=900,
    plot_height=900,
    tools=TOOLS,
    toolbar_location='below')

p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3

p.rect(
    x="year",
    y="month",
    width=1,
    height=1,
    source=source,
    fill_color={
        'field': 'IncidntNum',
        'transform': mapper
    },
    line_color=None)

color_bar = ColorBar(
    color_mapper=mapper,
    major_label_text_font_size="10pt",
    ticker=BasicTicker(desired_num_ticks=len(colors)),
    formatter=PrintfTickFormatter(),
    label_standoff=6,
    border_line_color=None,
    location=(0, 0))
p.add_layout(color_bar, 'right')

p.select_one(HoverTool).tooltips = [
    ('Period', '@month @year'),
    ('Number of crimes', '@IncidntNum'),
]

show(p)  # show the plot

  elif np.issubdtype(type(obj), np.float):


## Bar chart

In [68]:
sf_crime.head()

Unnamed: 0,Date,Category,IncidntNum,year,month
0,2014-02-01,BURGLARY,506,2014,2
1,2007-02-01,VANDALISM,531,2007,2
2,2012-07-01,BURGLARY,522,2012,7
3,2013-07-01,LARCENY/THEFT,3318,2013,7
4,2010-08-01,VANDALISM,694,2010,8


In [69]:
# output_file("bar_nested.html")
crime_type = list(sf_crime.Category.unique())
years

['2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015']

In [70]:
df = sf_crime.loc[sf_crime.year.isin(years)].groupby(
    ['Category', 'year'])[['IncidntNum']].sum().reset_index().pivot(
        index='Category', columns='year', values='IncidntNum').reset_index()

In [71]:
data = {
    'crime_type': list(df.Category),
    '2011': list(df['2011']),
    '2012': list(df['2012']),
    '2013': list(df['2013']),
    '2014': list(df['2014']),
    '2015': list(df['2015'])
}

x = [(crime, year) for crime in crime_type for year in years[6:]]
counts = sum(
    zip(data['2011'], data['2012'], data['2013'], data['2014'],
        data['2015']), ())  # like an hstack
source = ColumnDataSource(data=dict(x=x, counts=counts))

TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
p = figure(
    x_range=FactorRange(*x),
    plot_height=600,
    plot_width=1000,
    title="Crime Counts by Type and Year",
    tools=TOOLS,
    toolbar_location='below')

p.vbar(x='x', top='counts', width=0.9, source=source)

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None

p.select_one(HoverTool).tooltips = [
    ('Type|year', '@x'),
    ('Number of crimes', '@counts'),
]

show(p)

## Line chart

In [72]:
sf_crime.head()

Unnamed: 0,Date,Category,IncidntNum,year,month
0,2014-02-01,BURGLARY,506,2014,2
1,2007-02-01,VANDALISM,531,2007,2
2,2012-07-01,BURGLARY,522,2012,7
3,2013-07-01,LARCENY/THEFT,3318,2013,7
4,2010-08-01,VANDALISM,694,2010,8


In [73]:
sf_crime = sf_crime.sort_values(by='Date')

In [74]:
df_grp_date = sf_crime.groupby('Date')[['IncidntNum']].sum().reset_index()

In [75]:
df_grp_date.head()

Unnamed: 0,Date,IncidntNum
0,2005-01-01,4991
1,2005-02-01,4166
2,2005-03-01,4815
3,2005-04-01,4711
4,2005-05-01,5120


In [76]:
df_grp_date.dtypes

Date          datetime64[ns]
IncidntNum             int64
dtype: object

In [77]:
from bokeh.plotting import figure, output_file, show
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"

p = figure(
    x_axis_type='datetime',
    plot_width=1000,
    plot_height=400,
    tools=TOOLS,
    toolbar_location='below',
    title="Crime Counts Trend")

# add a line renderer
p.line(df_grp_date.Date.dt.date, df_grp_date.IncidntNum, line_width=2)

p.select_one(HoverTool).tooltips = [
    ('Date', '@x{%F}'),
    ('Number of crimes', '$y{int}'),
]
p.xaxis.axis_label = 'Date'
p.yaxis.axis_label = 'Count'
p.select_one(HoverTool).formatters = {'x': 'datetime'}

show(p)

## Multi-line chart

In [78]:
grp_dt_crime = sf_crime.groupby(['Date','Category'])[['IncidntNum']].sum().reset_index()

In [79]:
df = grp_dt_crime.pivot(index = 'Date',columns='Category',values='IncidntNum')

In [80]:
df.columns

CategoricalIndex(['ARSON', 'BURGLARY', 'LARCENY/THEFT', 'STOLEN PROPERTY',
                  'VANDALISM', 'VEHICLE THEFT'],
                 categories=['ARSON', 'BURGLARY', 'LARCENY/THEFT', 'STOLEN PROPERTY', 'VANDALISM', 'VEHICLE THEFT'], ordered=False, name='Category', dtype='category')

In [81]:
df.columns = ['ARSON', 'BURGLARY', 'LARCENY/THEFT', 'STOLEN PROPERTY',
                  'VANDALISM', 'VEHICLE THEFT']

In [82]:
df.reset_index()

Unnamed: 0,Date,ARSON,BURGLARY,LARCENY/THEFT,STOLEN PROPERTY,VANDALISM,VEHICLE THEFT
0,2005-01-01,21,698,2038,56,493,1685
1,2005-02-01,17,547,1734,40,462,1366
2,2005-03-01,19,654,2035,46,585,1476
3,2005-04-01,24,551,1920,41,581,1594
4,2005-05-01,13,622,2181,56,658,1590
5,2005-06-01,23,586,2253,40,622,1394
6,2005-07-01,23,691,2307,50,643,1429
7,2005-08-01,20,595,2406,43,616,1412
8,2005-09-01,17,531,2197,36,559,1430
9,2005-10-01,18,526,2169,49,655,1774


In [83]:
df.columns

Index(['ARSON', 'BURGLARY', 'LARCENY/THEFT', 'STOLEN PROPERTY', 'VANDALISM',
       'VEHICLE THEFT'],
      dtype='object')

In [84]:
df.reset_index(inplace=True)

In [85]:
colors = ['#000003', '#410967', '#932567', '#DC5039', '#FBA40A', '#FCFEA4']

In [86]:
from bokeh.plotting import figure, output_file, show
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"

p = figure(
    x_axis_type='datetime',
    plot_width=1000,
    plot_height=600,
    tools=TOOLS,
    toolbar_location='below',
    title="Crime Counts Trend")

# add a line renderer
p.line(df.Date.dt.date, df['ARSON'], line_width=2,color=colors[0], legend='Arson')
p.line(df.Date.dt.date, df['BURGLARY'], line_width=2,color=colors[1], legend='Burglary')
p.line(df.Date.dt.date, df['LARCENY/THEFT'], line_width=2,color=colors[2], legend='Larceny')
p.line(df.Date.dt.date, df['STOLEN PROPERTY'], line_width=2,color=colors[3], legend='Stolen property')
p.line(df.Date.dt.date, df['VANDALISM'], line_width=2,color=colors[4], legend='Vandalism')
p.line(df.Date.dt.date, df['VEHICLE THEFT'], line_width=2,color=colors[5], legend='Vehicle theft')


p.select_one(HoverTool).tooltips = [
    ('Date', '@x{%F}'),
    ('Number of crimes', '$y{int}'),
]
p.xaxis.axis_label = 'Date'
p.yaxis.axis_label = 'Count'
p.select_one(HoverTool).formatters = {'x': 'datetime'}

p.legend.location = "top_left"

show(p)