In [1]:
import pandas as pd
# import geopandas as gpd
import plotly.express as px
import plotly.graph_objects as go
import json
from sklearn import neighbors
from urllib.request import urlopen
%matplotlib inline

In [2]:
df_full = pd.read_csv("data/allegations_202007271729.csv")
df_full.head()

Unnamed: 0,unique_mos_id,first_name,last_name,command_now,shield_no,complaint_id,month_received,year_received,month_closed,year_closed,...,mos_age_incident,complainant_ethnicity,complainant_gender,complainant_age_incident,fado_type,allegation,precinct,contact_reason,outcome_description,board_disposition
0,10004,Jonathan,Ruiz,078 PCT,8409,42835,7,2019,5,2020,...,32,Black,Female,38.0,Abuse of Authority,Failure to provide RTKA card,78.0,Report-domestic dispute,No arrest made or summons issued,Substantiated (Command Lvl Instructions)
1,10007,John,Sears,078 PCT,5952,24601,11,2011,8,2012,...,24,Black,Male,26.0,Discourtesy,Action,67.0,Moving violation,Moving violation summons issued,Substantiated (Charges)
2,10007,John,Sears,078 PCT,5952,24601,11,2011,8,2012,...,24,Black,Male,26.0,Offensive Language,Race,67.0,Moving violation,Moving violation summons issued,Substantiated (Charges)
3,10007,John,Sears,078 PCT,5952,26146,7,2012,9,2013,...,25,Black,Male,45.0,Abuse of Authority,Question,67.0,PD suspected C/V of violation/crime - street,No arrest made or summons issued,Substantiated (Charges)
4,10009,Noemi,Sierra,078 PCT,24058,40253,8,2018,2,2019,...,39,,,16.0,Force,Physical force,67.0,Report-dispute,Arrest - other violation/crime,Substantiated (Command Discipline A)


In [3]:
df_add = pd.read_excel("data/precinct_addresses.xlsx")

FileNotFoundError: [Errno 2] No such file or directory: 'data/precinct_addresses.xlsx'

In [None]:
df_add.head()

In [None]:
df_add.drop(['Precinct Info', 'Phone'], axis=1, inplace=True)

In [None]:
#Make all columns lowercase
df_add.columns = map(str.lower, df_add.columns)

In [None]:
# Check datatypes 
df_add.dtypes

In [None]:
df_full.precinct.dtypes

In [None]:
#Make precinct float in df_add to match df_full 
df_add.precinct = df_add.precinct.astype('float64')

In [None]:
#Sanity Check 
df_add.precinct.dtypes

In [None]:
df_add.head()

In [None]:
df_full.head()

In [None]:
df_tmp = df_full.merge(df_add, on='precinct')
df_tmp.head()

In [None]:
#Check for accuracy 
df_tmp[df_tmp.precinct == 6.0]

In [None]:
#Check for accuracy 
df_add[df_add.precinct == 78.0]

In [None]:
# Make addresses full 
df_tmp['address'] = df_tmp['address'].astype(str) +", "+ df_tmp['borough'].astype(str)+ ', NY, USA' 


In [None]:
df_tmp.head()

## Use Google maps API to get gps coordinates of each precinct 

In [None]:
from googlemaps import Client as GoogleMaps

In [None]:
key = open("./google_api_key").read()
gmaps = GoogleMaps(key=key)

In [None]:
#Create blank columns for long and lat 
df_tmp['long'] = ''
df_tmp['lat'] = ''

In [None]:
#Sanity Check 
df_tmp.head(2)

In [None]:
df_tmp.address.isna().sum()

In [None]:
print(gmaps.geocode(df_tmp['address'][0]))

In [None]:
result = gmaps.geocode(df_tmp['address'][0])
print(result[0]['geometry']['location']['lat'])

In [None]:
df_tmp['long'][0]

In [None]:
precinct_grp = df_tmp.groupby(by=['precinct', 'address', 'long', 'lat', 'year_received'],).size()

In [None]:
precinct_grp.head()

In [None]:
precinct_grp = precinct_grp.reset_index()
precinct_grp.head()

In [None]:
precinct_grp.shape

In [None]:
df_tmp.shape

In [None]:
precinct_grp_tmp = precinct_grp[precinct_grp['year_received'] == 1999]

In [None]:
precinct_grp_tmp.shape

In [None]:
precinct_grp_tmp.reset_index(inplace=True)

In [None]:
%time 
for x in range(len(precinct_grp_tmp)):
    try:
        geocode_result = gmaps.geocode(precinct_grp_tmp['address'][x])
        precinct_grp_tmp['lat'][x] = geocode_result[0]['geometry']['location'] ['lat']
        precinct_grp_tmp['long'][x] = geocode_result[0]['geometry']['location']['lng']
    except IndexError:
        print("Address was wrong...")
    except Exception as e:
        print("Unexpected error occurred.", e )
        
precinct_grp_tmp.head()

In [None]:
precinct_grp_tmp.rename(columns = {0:'num'}, inplace = True) 

In [None]:
precinct_grp_tmp.head()

In [None]:
px.set_mapbox_access_token(open("./mapbox_token").read())
fig = px.scatter_geo(precinct_grp_tmp,
                    lat=precinct_grp_tmp.lat,
                    lon=precinct_grp_tmp.long,
                    locationmode='USA-states',
                    hover_name="num",
                    scope='usa',
                    center={'lat': 40.68230, 'lon': -73.98724},)
fig.show()

In [None]:
precinct_grp_tmp.head()

In [None]:
with open("./mapbox_token", 'r') as f: 
    mapbox_key=f.read().strip()
    
fig = px.scatter_mapbox(precinct_grp_tmp, lat="lat", lon="long", size='num', zoom=12)
fig.update_layout(mapbox_style="light", mapbox_accesstoken=mapbox_key)
fig.show()