# Code - Section "Who"

In [None]:
from lxml import etree
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.collections import PatchCollection
from matplotlib.colorbar import ColorbarBase
from mpl_toolkits.basemap import Basemap
from shapely.geometry import Point, Polygon, MultiPoint, MultiPolygon
from shapely.prepared import prep
from descartes import PolygonPatch
import mapclassify as mc
import fiona
from itertools import chain

In [37]:
#Read in the traffic data
df_police = pd.read_csv("ca_san_francisco_2020_04_01.csv", low_memory=False)

In [38]:
#Create a subset df describing the location of each traffic stop
locations = pd.DataFrame({'lat': df_police['lat'], 'lng': df_police['lng']})

In [39]:
#Get rid of those pesky NaN's
locations.dropna(inplace=True)

In [40]:
#Open the shapefile taken from San Francisco's city website:
    #https://data.sfgov.org/Geographic-Locations-and-Boundaries/SF-Find-Neighborhoods/pty2-tcw4
#Find the boundaries of the map for uploading the shapefile
shp = fiona.open("shapefile/SFFind_Neighborhoods.shp")
bds = shp.bounds
shp.close()

In [None]:
m = Basemap(
    projection='tmerc',
    lon_0=-122.44229,
    lat_0=37.7756435,
    ellps = 'WGS84',
    llcrnrlon=bds[0],
    llcrnrlat=bds[1],
    urcrnrlon=bds[2],
    urcrnrlat=bds[3],
    lat_ts=0,
    resolution='i',
    suppress_ticks=True
)

m.readshapefile('shapefile/SFFind_Neighborhoods', 'SF',
               color = 'none',
               zorder = 2)

In [7]:
# create d_map dataframe
df_map = pd.DataFrame({
    #creates the polygon representing each neighborhood
    'poly': [Polygon(xy) for xy in m.SF],
    #corresponding neighborhood
    'ward_name': [ward['name'] for ward in m.SF_info]})
    #
df_map['area_m'] = df_map['poly'].map(lambda x: x.area)
df_map['area_km'] = df_map['area_m'] / 100000

In [8]:
# Create Point objects in map coordinates from dataframe lon and lat values
#takes latitude/longitude data and transforms them into map coordinates
map_points = pd.Series(
    [Point(m(mapped_x, mapped_y)) for mapped_x, mapped_y in zip(locations['lng'], locations['lat'])])
#create a single object that represents all of the points
stop_points = MultiPoint(list(map_points.values))

#takes all the pre-created territory polygons
    #Places them in a "MultiPolygon" object
        #prep creates a prepared geometric object that has all neighborhoods
neighborhoods = [prep(Polygon(i)) for i in list(df_map['poly'].values)]

#Turn the above information into a dictionary 
neighbdict = dict(zip(df_map['ward_name'], neighborhoods))

Did we make it here?


In [9]:
#Map all of the data to a neighborhood using
#WARNING DO NOT RUN TAKES FOREVER!! Updated csv was created to only have to run this once
df_police['neighborhood'] = None

cats = df_police.copy()
for i in range(len(df_map['ward_name'])):
    neighb = df_map['ward_name'][i]
    for j in range(len(stop_points)):
        if neighborhoods[i].contains(stop_points[j]):
            cats['neighborhood'][j] = neighb

df_props = cats.copy()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cats['neighborhood'][j] = neighb


In [26]:
#Doesn't fit on deepnote, but is available on request
df_props = df_props.groupby(['neighborhood', 'subject_race']).count()
cats.to_csv('Neighborhoods.csv')

In [None]:
#Aggregrate and organize data into proportion of searches of each neighborhood based on demographics
    #Returns dictionary of tuples:  (x, y, z) = (prop white, prop minority demographic, neighborhood)
rtpoints = {'black': [], 'asian/pacific islander':[], 'hispanic':[]}

for hood in df_map['ward_name']:
    for peeps in rtpoints.keys():
        try:
            minority = df_props.loc[peeps, hood]['district']
            white = df_props.loc['white', hood]['district']
            whiteprop = white[1]/sum(white)
            minprop = minority[1]/sum(minority)
            rtpoints[peeps].append((whiteprop, minprop, hood))
        except:
            print("Oops!")
            continue

In [None]:
#Make the pretty subplots!
fig, ax = plt.subplots(ncols = 3, figsize = (9, 3))
plt.subplots_adjust(wspace=0.2)
plt.suptitle('Bias Threshold of Proportion of Non-White Stops vs. White Stops', ha = 'center', fontsize = 14, y=1.2)

colours = {'asian/pacific islander':'#bae4bc', 'black': '#7bccc4', 'hispanic':'#2b8cbe'}
colors = ['#7bccc4', '#bae4bc', '#2b8cbe']
peoples = ['black', 'asian/pacific islander', 'hispanic']

#Generate the three plot skeletons
for i in range(0, 3):
    ax[i].spines['top'].set_visible(False) 
    ax[i].spines['right'].set_visible(False) 
    ax[i].spines['left'].set_linewidth(0.4) 
    ax[i].spines['bottom'].set_linewidth(.4)
    ax[i].set_yticks([0, 0.1, 0.2])
    ax[i].set_yticklabels(['0', '0.1', '0.2'], fontsize = 10)
    ax[i].set_xticks([0, 0.1, 0.2])
    ax[i].set_xticklabels(['0', '0.1', '0.2'], fontsize = 10)
    ax[i].set_xlim([0,0.23])
    ax[i].set_ylim([0,0.23])

#Give each a title
for i in range(len(colors)):
    ax[i].set_title(f'%s' % peoples[i].title(), loc = 'left', color = colors[i], weight='bold')


#print('data for black threshold')
#plot the different thresholds
for (i, j, k) in rtpoints['black']:
    ax[0].scatter(i, j, color = colours['black'], alpha = 0.7, edgecolors= "gray")


print('data for asian/pacific islander threshold')
for (i, j, k) in rtpoints['asian/pacific islander']:
    ax[1].scatter(i, j, color = colours['asian/pacific islander'], alpha = 0.7, edgecolors= "gray")
ax[1].set_xlabel('White Search Rate')
xval = .13
yval = .15
threshold = ax[1].annotate("Bias threshold", xy=[xval,yval], xytext=[xval,yval], color = 'gray', fontsize=8)
threshold.set_rotation(47.4)

print('data for hispanic threshold')
for (i, j, k) in rtpoints['hispanic']:
    ax[2].scatter(i, j, color = colours['hispanic'], alpha = 0.3, edgecolors= "gray")


plt.suptitle('Proportion of Non-White searches vs. White searches', ha = 'center', fontsize = 14, y=1.15, x=.448, weight = 'bold')
plt.title('Zoned by Neighborhood', x=-1.92, y=1.13)
    
[line.plot([0, 0.23], [0, 0.23], '-.', linewidth=0.5, color='gray') for line in ax]
plt.savefig('Threshold.png', bbox_inches="tight")
plt.show()

# end of the code
