In [65]:
#Install following libraries if not installed already
#!pip install sodapy
#!pip install plotly

In [86]:
#Import libraries
from sodapy import Socrata
import plotly.offline as pyo
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import ipywidgets as widgets
import pandas as pd
import numpy as np
from scipy import special

init_notebook_mode(connected=True)

In [67]:
client = Socrata("data.sfgov.org", None)



In [68]:
results = client.get("wg3w-h783", limit=2000)

In [69]:
results_df = pd.DataFrame.from_records(results)

In [70]:
results_df.columns

Index(['incident_datetime', 'incident_date', 'incident_time', 'incident_year',
       'incident_day_of_week', 'report_datetime', 'row_id', 'incident_id',
       'incident_number', 'report_type_code', 'report_type_description',
       'filed_online', 'incident_code', 'incident_category',
       'incident_subcategory', 'incident_description', 'resolution',
       'police_district', 'cad_number', 'intersection', 'cnn',
       'analysis_neighborhood', 'supervisor_district', 'latitude', 'longitude',
       'point', ':@computed_region_6qbp_sg9q', ':@computed_region_qgnn_b9vv',
       ':@computed_region_26cr_cadq', ':@computed_region_ajp5_b2md',
       ':@computed_region_6pnf_4xz7', ':@computed_region_nqbw_i6c3',
       ':@computed_region_2dwj_jsy4', ':@computed_region_h4ep_8xdi',
       ':@computed_region_jg9y_a9du', ':@computed_region_y6ts_4iup'],
      dtype='object')

In [71]:
#Takfe a copy of original dataframe
results_df_copy = results_df.copy()

In [72]:
#Drop junk columns
results_df_copy = results_df_copy[results_df_copy.columns.drop(list(results_df_copy.filter(regex=':@computed_region')))]

In [73]:
results_df_copy.columns

Index(['incident_datetime', 'incident_date', 'incident_time', 'incident_year',
       'incident_day_of_week', 'report_datetime', 'row_id', 'incident_id',
       'incident_number', 'report_type_code', 'report_type_description',
       'filed_online', 'incident_code', 'incident_category',
       'incident_subcategory', 'incident_description', 'resolution',
       'police_district', 'cad_number', 'intersection', 'cnn',
       'analysis_neighborhood', 'supervisor_district', 'latitude', 'longitude',
       'point'],
      dtype='object')

### Which day of the week has the highest and the lowest incident rate?

In [74]:
#Aggregate dataframe by incident_day_of_week

results_groupby = results_df_copy.groupby(by='incident_day_of_week', as_index=False)['incident_description'].count()
results_groupby = pd.DataFrame(results_groupby)
results_groupby

#list(results_groupby['incident_day_of_week'])

Unnamed: 0,incident_day_of_week,incident_description
0,Friday,362
1,Monday,346
2,Saturday,249
3,Sunday,197
4,Thursday,239
5,Tuesday,333
6,Wednesday,274


In [75]:
#Re-order incident_day_of_week to Monday, Tuesday, etc
results_df_copy['incident_day_of_week'] = pd.Categorical(values=results_df_copy['incident_day_of_week'],
                                                        categories=['Monday','Tuesday','Wednesday','Thursday',
                                                                   'Friday','Saturday','Sunday'],
                                                        ordered=True)
results_groupby = results_df_copy.groupby(by='incident_day_of_week', as_index=False)['incident_description'].count()
results_groupby = pd.DataFrame(results_groupby)
results_groupby

Unnamed: 0,incident_day_of_week,incident_description
0,Monday,346
1,Tuesday,333
2,Wednesday,274
3,Thursday,239
4,Friday,362
5,Saturday,249
6,Sunday,197


In [87]:
data = [go.Scatter(
    x = results_groupby['incident_day_of_week'],
    y = results_groupby['incident_description'],
    mode = 'markers+lines',
    marker = dict(
        size = 10,
        color = 'rgb(51,204,153)',
        symbol = 'circle',
        line = dict(
            width = 2)
    )
    )]

layout = go.Layout(
    title = 'Number of Incidents by Day of Week',
    xaxis = dict(title='Incident Day of Week'),
    yaxis = dict(title='Number of Incidents'),
    hovermode='closest'
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='Day_of_Week_Incidents.html')

### Incident trend for over a year


In [77]:
#Find datatype of incident_date
results_df_copy.dtypes

incident_datetime            object
incident_date                object
incident_time                object
incident_year                object
incident_day_of_week       category
report_datetime              object
row_id                       object
incident_id                  object
incident_number              object
report_type_code             object
report_type_description      object
filed_online                 object
incident_code                object
incident_category            object
incident_subcategory         object
incident_description         object
resolution                   object
police_district              object
cad_number                   object
intersection                 object
cnn                          object
analysis_neighborhood        object
supervisor_district          object
latitude                     object
longitude                    object
point                        object
dtype: object

In [78]:
#Convert incident_date from object to datetime
results_df_copy['incident_date'] = pd.to_datetime(results_df_copy['incident_date'])

In [79]:
#Group by date
results_by_date = results_df_copy.groupby(by='incident_date', as_index=False)['incident_code'].count()
results_by_date = pd.DataFrame(results_by_date)
results_by_date

Unnamed: 0,incident_date,incident_code
0,2018-03-15,1
1,2018-05-22,1
2,2018-08-01,2
3,2018-08-21,1
4,2018-10-10,2
...,...,...
133,2020-05-19,31
134,2020-05-20,35
135,2020-05-21,106
136,2020-05-22,237


In [88]:
data_by_date = [go.Scatter(x=results_by_date['incident_date'],
                          y=results_by_date['incident_code'],
                          mode='lines')]
layout = go.Layout(title="Daily Incidents Since Jan 2018",
                  xaxis=dict(title="Incident Date"),
                  yaxis=dict(title="Incident Count"),
                  hovermode='closest')
fig = go.Figure(data=data_by_date, layout=layout)
iplot(fig)