In [12]:
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins
import geopandas as gpd
#import geopy?
%matplotlib inline

In [13]:
datapath = "data"

In [14]:
dallas_crime_data = "/Dept_37-00049/37-00049_UOF-P_2016_prepped.csv"

In [15]:
crime_df = pd.read_csv(datapath + dallas_crime_data, header = 0, skiprows=[1])
sub_header = pd.read_csv(datapath + dallas_crime_data, header = 1).keys()

In [16]:
crime_df.head()

Unnamed: 0,INCIDENT_DATE,INCIDENT_TIME,UOF_NUMBER,OFFICER_ID,OFFICER_GENDER,OFFICER_RACE,OFFICER_HIRE_DATE,OFFICER_YEARS_ON_FORCE,OFFICER_INJURY,OFFICER_INJURY_TYPE,...,TYPE_OF_FORCE_USED3,TYPE_OF_FORCE_USED4,TYPE_OF_FORCE_USED5,TYPE_OF_FORCE_USED6,TYPE_OF_FORCE_USED7,TYPE_OF_FORCE_USED8,TYPE_OF_FORCE_USED9,TYPE_OF_FORCE_USED10,NUMBER_EC_CYCLES,FORCE_EFFECTIVE
0,9/3/16,4:14:00 AM,37702,10810,Male,Black,5/7/14,2,No,No injuries noted or visible,...,,,,,,,,,,Yes
1,3/22/16,11:00:00 PM,33413,7706,Male,White,1/8/99,17,Yes,Sprain/Strain,...,,,,,,,,,,Yes
2,5/22/16,1:29:00 PM,34567,11014,Male,Black,5/20/15,1,No,No injuries noted or visible,...,,,,,,,,,,Yes
3,1/10/16,8:55:00 PM,31460,6692,Male,Black,7/29/91,24,No,No injuries noted or visible,...,,,,,,,,,,Yes
4,11/8/16,2:30:00 AM,"37879, 37898",9844,Male,White,10/4/09,7,No,No injuries noted or visible,...,,,,,,,,,,"No, Yes"


In [17]:
crime_df.keys()

Index(['INCIDENT_DATE', 'INCIDENT_TIME', 'UOF_NUMBER', 'OFFICER_ID',
       'OFFICER_GENDER', 'OFFICER_RACE', 'OFFICER_HIRE_DATE',
       'OFFICER_YEARS_ON_FORCE', 'OFFICER_INJURY', 'OFFICER_INJURY_TYPE',
       'OFFICER_HOSPITALIZATION', 'SUBJECT_ID', 'SUBJECT_RACE',
       'SUBJECT_GENDER', 'SUBJECT_INJURY', 'SUBJECT_INJURY_TYPE',
       'SUBJECT_WAS_ARRESTED', 'SUBJECT_DESCRIPTION', 'SUBJECT_OFFENSE',
       'REPORTING_AREA', 'BEAT', 'SECTOR', 'DIVISION', 'LOCATION_DISTRICT',
       'STREET_NUMBER', 'STREET_NAME', 'STREET_DIRECTION', 'STREET_TYPE',
       'LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION', 'LOCATION_CITY',
       'LOCATION_STATE', 'LOCATION_LATITUDE', 'LOCATION_LONGITUDE',
       'INCIDENT_REASON', 'REASON_FOR_FORCE', 'TYPE_OF_FORCE_USED1',
       'TYPE_OF_FORCE_USED2', 'TYPE_OF_FORCE_USED3', 'TYPE_OF_FORCE_USED4',
       'TYPE_OF_FORCE_USED5', 'TYPE_OF_FORCE_USED6', 'TYPE_OF_FORCE_USED7',
       'TYPE_OF_FORCE_USED8', 'TYPE_OF_FORCE_USED9', 'TYPE_OF_FORCE_USED10',
    

In [10]:
# date_with_time = np.empty(crime_df['INCIDENT_DATE'].size, dtype = 'object')
# for k in range(date_with_time.size):
#     if str(crime_df['INCIDENT_DATE'][k]) == 'nan' or str(crime_df['INCIDENT_TIME'][k]) == 'nan':
#         date_with_time[k] = pd.to_datetime('nan')
#         continue
#     datestring = str(crime_df['INCIDENT_DATE'][k]) + " " + str(crime_df['INCIDENT_TIME'][k])
#     date_with_time[k] = pd.to_datetime(datestring)

# date_with_time_series = pd.Series(data = date_with_time)
# date_with_time_series.head()

# crime_df['INCIDENT_DATE'] = date_with_time_series

In [114]:
non_nan_mask = ~pd.isnull(crime_df['LOCATION_LATITUDE']) & ~pd.isnull(crime_df['LOCATION_LONGITUDE']) & ~pd.isnull(crime_df['SUBJECT_RACE'])
non_nan_locations_df = crime_df[non_nan_mask]

crime_coordinates = []
crime_offender_race = []

for k in non_nan_locations_df['LOCATION_LATITUDE'].keys():
    lat = float(non_nan_locations_df['LOCATION_LATITUDE'][k])
    lon = float(non_nan_locations_df['LOCATION_LONGITUDE'][k])
    race = non_nan_locations_df['SUBJECT_RACE'][k]
    crime_coordinates.append([lat, lon])
    crime_offender_race.append(race)
    
print(len(crime_coordinates))

2293


In [111]:
unique_races = set(crime_offender_race)
race_list = ['American Ind', 'White', 'Other', 'Black', 'Asian', 'Hispanic']
marker_colors = ["orange", "blue", "black", "red", "brown", "purple"]

color_mapping = dict(zip(race_list, marker_colors))

In [112]:
m = folium.Map(
    location=crime_coordinates[0],
    tiles='Stamen Toner',
    zoom_start=11,
)

for k in range(len(crime_coordinates)):
    folium.Circle(
        radius=100,
        location=crime_coordinates[k],
        color=color_mapping[crime_offender_race[k]],
        fill=False,
    ).add_to(m)

In [113]:
m

Submission components:

- The automation component. Automating resolution of shapefiles, census data, and environmental data. Building a flexible system/scheme for storing all of this data together (a single data frame?) and automatic visualization/uncertainty handling.
- An analysis of inequity in policing: a methodology for assessing racial biases. Indicators for biases. For instance, the use of force to force effectiveness, subject injury, the sheer volume of use of force, incident reason + reason for force, officer injury vs. use of force (signals power imbalance), cross-racial considerations, officer history, justification for arrest/encounter (subject offense, subject description). The idea should be to look for imbalances: there are combinations of variable values which make sense (proper, effective, and justified use of force, etc.), and significant deviations from that signify inequities, either leniency or excessive cruelty.
- Automatic generation of inequity map: for each of the metrics above (or some combination of them), identify areas + times of day + days of week where the inequity is significant. One idea would be to identify those times/areas where, say, use of force to force effectiveness ratio is much higher than average, even after accounting for changes in population/crime frequency. Stratify by district/police bureau, build a score for each bureau. Can pitch this as a solution: helps to try and predict the prevalence of biases so that care can be taken when dispatching officers to those areas at those specific times.
- For the above: provide the severity of the inequity. Also, automatically generate summary statistics for the city (some total inequity metric/baseline, crime hotspots/hot-times, statistical significance of disparity, etc.).
- An analysis of what contributes to bias. This would be some kind of joint analysis with the environmental variables. Can even lend itself to a predictive-type analysis; as the environment changes, how can we expect the policing biases there to change? Are there any case studies of areas where development caused significant changes to the environment, and consequently, also to the bias patterns there?
- Tracking officer behavior. Tracking officer behavior over time, seeing if certain behavioral patterns are indicative or reflective of bias (race targeting, use of force). See if dispatch pattern (time and place) is related to bias.

Would be great if we could do this for each city.