In [27]:
#Install following libraries if not installed already
#!pip install sodapy
#!pip install plotly

In [28]:
#Import libraries
from sodapy import Socrata
import plotly.offline as pyo
import plotly.graph_objs as go
import ipywidgets as widgets
import pandas as pd
import numpy as np
from scipy import special

#py.offline.init_notebook_mode(connected=True)

In [29]:
client = Socrata("data.sfgov.org", None)



In [30]:
results = client.get("wg3w-h783", limit=2000)

In [31]:
results_df = pd.DataFrame.from_records(results)

In [32]:
results_df.columns

Index(['incident_datetime', 'incident_date', 'incident_time', 'incident_year',
       'incident_day_of_week', 'report_datetime', 'row_id', 'incident_id',
       'incident_number', 'cad_number', 'report_type_code',
       'report_type_description', 'incident_code', 'incident_category',
       'incident_subcategory', 'incident_description', 'resolution',
       'intersection', 'cnn', 'police_district', 'analysis_neighborhood',
       'supervisor_district', 'latitude', 'longitude', 'point',
       ':@computed_region_6qbp_sg9q', ':@computed_region_qgnn_b9vv',
       ':@computed_region_26cr_cadq', ':@computed_region_ajp5_b2md',
       ':@computed_region_6pnf_4xz7', ':@computed_region_nqbw_i6c3',
       ':@computed_region_h4ep_8xdi', ':@computed_region_2dwj_jsy4',
       'filed_online', ':@computed_region_jg9y_a9du',
       ':@computed_region_y6ts_4iup'],
      dtype='object')

In [33]:
#Takfe a copy of original dataframe
results_df_copy = results_df.copy()

In [34]:
#Drop junk columns
results_df_copy = results_df_copy[results_df_copy.columns.drop(list(results_df_copy.filter(regex=':@computed_region')))]

In [35]:
results_df_copy.columns

Index(['incident_datetime', 'incident_date', 'incident_time', 'incident_year',
       'incident_day_of_week', 'report_datetime', 'row_id', 'incident_id',
       'incident_number', 'cad_number', 'report_type_code',
       'report_type_description', 'incident_code', 'incident_category',
       'incident_subcategory', 'incident_description', 'resolution',
       'intersection', 'cnn', 'police_district', 'analysis_neighborhood',
       'supervisor_district', 'latitude', 'longitude', 'point',
       'filed_online'],
      dtype='object')

### Which day of the week has the highest and the lowest incident rate?

In [36]:
#Aggregate dataframe by incident_day_of_week

results_groupby = results_df_copy.groupby(by='incident_day_of_week', as_index=False)['incident_description'].count()
results_groupby = pd.DataFrame(results_groupby)
results_groupby

#list(results_groupby['incident_day_of_week'])

Unnamed: 0,incident_day_of_week,incident_description
0,Friday,268
1,Monday,373
2,Saturday,363
3,Sunday,344
4,Thursday,130
5,Tuesday,317
6,Wednesday,205


In [37]:
#Re-order incident_day_of_week to Monday, Tuesday, etc
results_df_copy['incident_day_of_week'] = pd.Categorical(values=results_df_copy['incident_day_of_week'],
                                                        categories=['Monday','Tuesday','Wednesday','Thursday',
                                                                   'Friday','Saturday','Sunday'],
                                                        ordered=True)
results_groupby = results_df_copy.groupby(by='incident_day_of_week', as_index=False)['incident_description'].count()
results_groupby = pd.DataFrame(results_groupby)
results_groupby

Unnamed: 0,incident_day_of_week,incident_description
0,Monday,373
1,Tuesday,317
2,Wednesday,205
3,Thursday,130
4,Friday,268
5,Saturday,363
6,Sunday,344


In [61]:
data = [go.Scatter(
    x = results_groupby['incident_day_of_week'],
    y = results_groupby['incident_description'],
    mode = 'markers+lines',
    marker = dict(
        size = 10,
        color = 'rgb(51,204,153)',
        symbol = 'circle',
        line = dict(
            width = 2)
    )
    )]

layout = go.Layout(
    title = 'Number of Incidents by Day of Week',
    xaxis = dict(title='Incident Day of Week'),
    yaxis = dict(title='Number of Incidents'),
    hovermode='closest'
)

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='scatter+line.html')

'file:///Users/bharath/Desktop/Apps/Coursera/Github/SanFrancisco_Crime/scatter+line.html'

### Incident trend for over a year


In [55]:
#Find datatype of incident_date
results_df_copy.dtypes

incident_datetime                  object
incident_date              datetime64[ns]
incident_time                      object
incident_year                      object
incident_day_of_week             category
report_datetime                    object
row_id                             object
incident_id                        object
incident_number                    object
cad_number                         object
report_type_code                   object
report_type_description            object
incident_code                      object
incident_category                  object
incident_subcategory               object
incident_description               object
resolution                         object
intersection                       object
cnn                                object
police_district                    object
analysis_neighborhood              object
supervisor_district                object
latitude                           object
longitude                         

In [51]:
#Convert incident_date from object to datetime
results_df_copy['incident_date'] = pd.to_datetime(results_df_copy['incident_date'])

In [53]:
#Group by date
results_by_date = results_df_copy.groupby(by='incident_date', as_index=False)['incident_code'].count()
results_by_date = pd.DataFrame(results_by_date)
results_by_date

Unnamed: 0,incident_date,incident_code
0,2018-01-02,1
1,2018-03-15,1
2,2018-05-22,1
3,2018-06-20,1
4,2018-07-11,1
...,...,...
371,2020-05-20,15
372,2020-05-21,22
373,2020-05-22,92
374,2020-05-23,187


In [64]:
data_by_date = [go.Scatter(x=results_by_date['incident_date'],
                          y=results_by_date['incident_code'],
                          mode='lines')]
layout = go.Layout(title="Daily Incidents Since Jan 2018",
                  xaxis=dict(title="Incident Date"),
                  yaxis=dict(title="Incident Count"),
                  hovermode='closest')
fig = go.Figure(data=data_by_date, layout=layout)
pyo.plot(fig, filename='DailIncidents_Line.html')

'file:///Users/bharath/Desktop/Apps/Coursera/Github/SanFrancisco_Crime/line.html'