### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from datetime import datetime

### Inspect Data

In [2]:
#create pandas dataframe from tabular policing data
policing_df = pd.read_csv('denver-1.nov.csv')
policing_df.head()

Unnamed: 0,raw_row_number,date,time,location,lat,lng,district,precinct,type,disposition,arrest_made,citation_issued,warning_issued,outcome
0,1,2012-11-13,00:02:22,8500 W CRESTLINE AVE,39.618329,-105.092691,4,423,vehicular,K - Street Check Completed,False,False,False,
1,16,2012-11-29,00:25:01,8500 W CRESTLINE AVE,39.618329,-105.092691,4,423,vehicular,Party Advised,False,False,False,
2,72,2012-11-23,22:58:10,4830 S HOLLAND WAY,39.630666,-105.102551,4,423,vehicular,K - Street Check Completed,False,False,False,
3,121,2012-11-27,17:48:33,4885 S QUEBEC ST,39.626174,-104.904062,3,324,pedestrian,In Service,False,False,False,
4,138,2012-11-14,23:46:58,W LAYTON AVE / S WADSWORTH BLVD,39.63015,-105.081693,4,423,vehicular,Party Advised,False,False,False,


In [3]:
#remove unwanted info and check shape of dataframe
policing_df = policing_df.drop(['lat', 'lng', 'raw_row_number'], axis = 1) 
policing_df.shape

(6365, 11)

### Filtering and Aggregation

In [4]:
#create new dataframe that shows policing outcomes and total stops by district
by_district = policing_df.groupby("district").sum()
by_district["counts"] = policing_df["district"].value_counts()
by_district

Unnamed: 0_level_0,arrest_made,citation_issued,warning_issued,counts
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,200,207,141,1158
2.0,254,209,155,1338
3.0,113,256,151,968
4.0,149,181,138,1158
5.0,60,111,67,571
6.0,186,84,57,1004
7.0,0,1,0,2
,23,16,23,166


In [5]:
#SANITY CHECK
#just want to see if my df above is accurate
dis4 = policing_df[policing_df['district'] == '4']
dis4['arrest_made'].value_counts()
#149 arrests, 1158 total stops. We're good.

False    1009
True      149
Name: arrest_made, dtype: int64

In [6]:
#create column for calculated arrest% and warning % statistic
by_district['arrest%'] = by_district['arrest_made']/by_district['counts']
by_district['warning%'] = by_district['warning_issued']/by_district['counts']
by_district

Unnamed: 0_level_0,arrest_made,citation_issued,warning_issued,counts,arrest%,warning%
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.0,200,207,141,1158,0.172712,0.121762
2.0,254,209,155,1338,0.189836,0.115845
3.0,113,256,151,968,0.116736,0.155992
4.0,149,181,138,1158,0.12867,0.119171
5.0,60,111,67,571,0.105079,0.117338
6.0,186,84,57,1004,0.185259,0.056773
7.0,0,1,0,2,0.0,0.0
,23,16,23,166,0.138554,0.138554


### Convert to json

In [7]:
#create json data from pandas df
police_json = by_district.to_json()
police_json

