In [1]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point, LineString, Polygon
import matplotlib
import os
from pathlib import Path

In [None]:
# ###########################
# import data
# ###########################

In [2]:
# import crash data
crashes_raw = gpd.read_file('https://opendata.arcgis.com/datasets/70392a096a8e431381f1f692aaa06afd_24.geojson')

In [3]:
# import crash details table
crash_details = gpd.read_file('https://opendata.arcgis.com/datasets/70248b73c20f46b0a5ee895fc91d6222_25.geojson')

In [12]:
# import anc data
ancs = gpd.read_file('https://opendata.arcgis.com/datasets/fcfbf29074e549d8aff9b9c708179291_1.geojson')

In [None]:
#######################################
# roll up crashes to crash id level
#######################################

In [4]:
# first create variables that will be aggregated
# driver over 80/driver under 25 
crash_details['DRIVERS_OVER_80']= crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Driver' 
                                                     and x.AGE>=80 else 0, axis = 1)
crash_details['DRIVERS_UNDER_25']= crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Driver' 
                                                      and x.AGE<=25 else 0, axis = 1)
# ped under 12/ped over 70 
crash_details['PEDS_OVER_70']= crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Pedestrian' 
                                                     and x.AGE>=70 else 0, axis = 1)
crash_details['PEDS_UNDER_12']= crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Pedestrian' 
                                                      and x.AGE<=12 else 0, axis = 1)
# biker under 12/biker over 70
crash_details['BIKERS_OVER_70']= crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Bicyclist' 
                                                     and x.AGE>=70 else 0, axis = 1)
crash_details['BIKERS_UNDER_12']= crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Bicyclist' 
                                                      and x.AGE<=12 else 0, axis = 1)
# out of state driver
crash_details['OOS_VEHICLES']= crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Driver' 
                                                   and x.LICENSEPLATESTATE != 'DC' else 0, axis = 1)
# vehicle type 
crash_details['CARS']=crash_details.apply(lambda x: 1 if x.INVEHICLETYPE=='Passenger Car/automobile' 
                                                    and x.PERSONTYPE=='Driver' else 0, axis = 1)
crash_details['SUVS_OR_TRUCKS']=crash_details.apply(lambda x: 1 if (x.INVEHICLETYPE=='Suv (sport Utility Vehicle)'
                                                     or x.  INVEHICLETYPE== 'Pickup Truck')
                                                    and x.PERSONTYPE=='Driver' else 0, axis = 1)

# injuries 
crash_details['PED_INJURIES']=crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Pedestrian' 
                                                   and (x.MAJORINJURY == 'Y' or x.MINORINJURY =='Y') else 0,
                                                        axis = 1)
crash_details['BICYCLE_INJURIES']=crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Bicyclist' 
                                                   and (x.MAJORINJURY == 'Y' or x.MINORINJURY =='Y') else 0,
                                                        axis = 1)
crash_details['VEHICLE_INJURIES']=crash_details.apply(lambda x: 1 if 
                                                      (x.PERSONTYPE=='Driver' or x.PERSONTYPE == 'Passenger')
                                                   and (x.MAJORINJURY == 'Y' or x.MINORINJURY =='Y') else 0,
                                                        axis = 1)
# tickets issued? 
crash_details['DRIVER_TICKETS']=crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Driver' 
                                                   and x.TICKETISSUED == 'Y' else 0,
                                                        axis = 1)
crash_details['BICYCLE_TICKETS']=crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Bicyclist' 
                                                   and x.TICKETISSUED == 'Y' else 0,
                                                        axis = 1)
crash_details['PED_TICKETS']=crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Pedestrian' 
                                                   and x.TICKETISSUED == 'Y' else 0,
                                                        axis = 1)
# speeding? 
crash_details['DRIVERS_SPEEDING']=crash_details.apply(lambda x: 1 if x.PERSONTYPE=='Driver' 
                                                   and x.SPEEDING == 'Y' else 0,
                                                        axis = 1)
# total injuries
crash_details['TOTAL_INJURIES']=crash_details['VEHICLE_INJURIES']+crash_details['BICYCLE_INJURIES']+crash_details['PED_INJURIES']

In [5]:
crash_details_agg = (crash_details.groupby(['CRIMEID'])
               .agg({
                    'PED_INJURIES': 'sum', 'BICYCLE_INJURIES': 'sum','VEHICLE_INJURIES': 'sum'
                     ,'TOTAL_INJURIES': 'sum', 'OOS_VEHICLES': 'sum', 'DRIVERS_UNDER_25': 'sum'
                     , 'DRIVERS_OVER_80': 'sum', 'PEDS_OVER_70':'sum', 'PEDS_UNDER_12': 'sum'
                   , 'BIKERS_OVER_70': 'sum', 'BIKERS_UNDER_12':'sum', 'OOS_VEHICLES': 'sum'
                   ,'CARS' : 'sum', 'SUVS_OR_TRUCKS' : 'sum', 'DRIVER_TICKETS': 'sum'
                   ,'BICYCLE_TICKETS': 'sum', 'PED_TICKETS':'sum', 'DRIVERS_SPEEDING': 'sum'
                  ,'PERSONTYPE': lambda x: list(x), 'INVEHICLETYPE':  lambda x: list(x), 
                   'LICENSEPLATESTATE': lambda x: list(x)
                    })
               .reset_index())

In [None]:
#######################################
# join crashes to crash detail
#######################################

In [None]:
crash_details_agg.head()

In [7]:
# first add year to crashes
crashes_raw['YEAR'] = crashes_raw.apply(lambda x: x.FROMDATE[:4], axis=1)

In [8]:
crashes_w_detail =  crashes_raw.merge(crash_details_agg, how = 'left', on='CRIMEID')

In [9]:
len(crashes_w_detail)

242505

In [None]:
crashes_w_detail.columns

In [13]:
# join crashes to data natively at ANC level and add year
anc_crashes = gpd.sjoin(crashes_w_detail, ancs, how="inner", op='within')
anc_crashes['YEAR'] = anc_crashes.apply(lambda x: x.REPORTDATE[:4], axis=1)
#Number of crashes thus far in 2020 by ANC
pd.DataFrame(anc_crashes.groupby(['YEAR', 'NAME']).size()).loc['2020']

Unnamed: 0_level_0,0
NAME,Unnamed: 1_level_1
ANC 1A,484
ANC 1B,608
ANC 1C,183
ANC 1D,86
ANC 2A,394
ANC 2B,478
ANC 2C,530
ANC 2D,37
ANC 2E,249
ANC 2F,304
