In [57]:
import csv
import numpy as np
import pandas as pd
from math import radians, cos, sin, asin, sqrt
import matplotlib.pyplot as plt
import bokeh
from bokeh.models import ColumnDataSource, RangeTool
from bokeh.palettes import Category20_14
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
from bokeh.models import FactorRange
from bokeh.models import Legend
from bokeh.layouts import column
from scipy.signal import savgol_filter # for smoothing a curve

In [2]:
data = pd.read_csv("Police_Department_Incident_Reports__Historical_2003_to_May_2018.csv") 

In [43]:
data['Date'] = pd.to_datetime(data['Date']) - pd.to_timedelta(7, unit='d')
data['Year'] = pd.DatetimeIndex(data['Date']).year
data = data.loc[(data['Year'] >= 2012) & (data['Year'] <= 2016)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [7]:
set(data.PdDistrict.unique()[:-1])

{'BAYVIEW',
 'CENTRAL',
 'INGLESIDE',
 'MISSION',
 'NORTHERN',
 'PARK',
 'RICHMOND',
 'SOUTHERN',
 'TARAVAL',
 'TENDERLOIN'}

In [111]:
focus_district = 'TENDERLOIN'
df = data.loc[(data['PdDistrict'] == focus_district)]
district = df.groupby([pd.Grouper(key='Date', freq='W')]).size().reset_index(name='counts') #df.groupby(['Date']).size().reset_index(name='counts')
district.head()

Unnamed: 0,Date,counts
0,2012-01-01,40
1,2012-01-08,262
2,2012-01-15,157
3,2012-01-22,263
4,2012-01-29,275


In [112]:
# We remove the first value, since it is an anomaly
district = district.tail(-1)
district.head()

Unnamed: 0,Date,counts
1,2012-01-08,262
2,2012-01-15,157
3,2012-01-22,263
4,2012-01-29,275
5,2012-02-05,217


In [113]:
district_smooth = pd.DataFrame(savgol_filter(district['counts'], 101, 3))
district_smooth['Date'] = district['Date'].to_list()
district_smooth.columns = ['counts', 'Date']
district_smooth.head()

Unnamed: 0,counts,Date
0,240.971302,2012-01-08
1,238.598897,2012-01-15
2,236.348453,2012-01-22
3,234.217673,2012-01-29
4,232.204265,2012-02-05


In [115]:
source = ColumnDataSource(data=district)
source_smooth = ColumnDataSource(data=district_smooth)
dates = np.array(district['Date'], dtype=np.datetime64)

p = figure(title=f"Crimes per Week in {focus_district}", height=150, width=300, tools="xpan", toolbar_location=None,
           x_axis_type="datetime", x_axis_location="above",
           background_fill_color="#efefef", x_range=(dates[0], dates[50]))

p.line('Date', 'counts', source=source)
p.line('Date', 'counts', source=source_smooth, line_color="#f46d43", line_width=3)
p.yaxis.axis_label = 'Crime counts'

select = figure(title="Drag the middle and edges of the \nselection box to change \nthe range above",
                height=125, width=300, y_range=p.y_range, 
                x_axis_type="datetime", y_axis_type=None,
                tools="", toolbar_location=None, background_fill_color="#efefef")

range_tool = RangeTool(x_range=p.x_range)
range_tool.overlay.fill_color = "navy"
range_tool.overlay.fill_alpha = 0.2

select.line('Date', 'counts', source=source)
select.ygrid.grid_line_color = None
select.add_tools(range_tool)

show(column(p, select))