In [2]:
# Import Modules
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline
plt.style.use('dark_background')

%load_ext autoreload
%autoreload 2

pd.set_option('display.min_row', 10)
pd.set_option('display.max_column', 100)
pd.set_option('display.max_colwidth', 300)

In [19]:
streets = pd.read_csv('../data/Seattle_Streets.csv')
collisions = pd.read_pickle('../data/processed/cleaned_data.pkl')
crosswalks = pd.read_csv('../data/Marked_Crosswalks.csv')

In [22]:
crosswalks.columns = crosswalks.columns.map(str.lower)

In [23]:
crosswalks.columns

Index(['x', 'y', 'objectid', 'compkey', 'unitid', 'unitdesc', 'condition',
       'condition_assessment_date', 'ownership', 'current_status',
       'primarydistrictcd', 'secondarydistrictcd', 'overrideyn', 'comptype',
       'segkey', 'unittype', 'old_id', 'approach', 'marking_type', 'school',
       'midblock_crosswalk', 'install_date', 'color', 'comments', 'category',
       'ownership_date', 'current_status_date', 'maintained_by',
       'maintenance_agreement', 'curbspaceid', 'maint_district',
       'overridecomment', 'shape_lng', 'shape_lat', 'crosswalk_control',
       'material', 'onstreet', 'xstrlow', 'meas_from_low', 'disttolow',
       'stpoint', 'xstrhi', 'meas_from_hi', 'disttohi', 'offset', 'side',
       'measurement_origin', 'attachment_1', 'attachment_2', 'attachment_3',
       'attachment_4', 'attachment_5', 'attachment_6', 'attachment_7',
       'attachment_8', 'attachment_9', 'maintenance_group', 'num_attachments'],
      dtype='object')

In [93]:
crosswalks.head()

Unnamed: 0,compkey,unitdesc,condition,primarydistrictcd,approach,marking_type,school,midblock_crosswalk,install_date,color,maint_district,shape_lng,shape_lat,crosswalk_control,onstreet,xstrlow,xstrhi
0,522399,ALASKAN WAY 0060 BLOCK C SIDE ( 35) 35 FT NW/O YESLER WAY,GOOD,DISTRICT7,NW,LADER,N,N,1970/01/01 00:00:00+00,WHT,CENTRAL,-122.336656,47.6018,,ALASKAN WAY,YESLER WAY,COLUMBIA ST
1,525064,S HOLLY ST 0440 BLOCK C SIDE ( 385) 20 FT W/O 45TH AVE S,GOOD,DISTRICT2,W,LADER,Y,N,1970/01/01 00:00:00+00,WHT,SOUTH,-122.276553,47.542459,,S HOLLY ST,44TH AVE S,45TH AVE S
2,523429,STONE WAY N 0390 BLOCK C SIDE ( 115) 115 FT N/O N 39TH ST,GOOD,DISTRICT4,S,LADER,N,N,1970/01/01 00:00:00+00,WHT,NORTH,-122.342486,47.654435,SIGNAL,STONE WAY N,N 39TH ST,BRIDGE WAY N
3,521447,35TH AVE SW 0390 BLOCK C SIDE ( 18) 18 FT S/O SW ANDOVER N ST,GOOD,DISTRICT1,S,LADER,Y,N,1970/01/01 00:00:00+00,WHT,SOUTH,-122.376151,47.568308,,35TH AVE SW,SW ANDOVER N ST,SW ANDOVER S ST
4,524665,NE 145TH ST 0200 BLOCK C SIDE ( 5) 5 FT E/O 20TH AVE NE,GOOD,DISTRICT5,N,LADER,N,N,1970/01/01 00:00:00+00,WHT,NORTH,-122.307324,47.73389,,NE 145TH ST,20TH AVE NE,22ND AVE NE


2035 unique blocks are in the collisions data (meaning 2035 has had an incident over the years)

In [96]:
crosswalks[crosswalks['compkey'].isin(collisions['CROSSWALKKEY'])].groupby('compkey').count()

Unnamed: 0_level_0,unitdesc,condition,primarydistrictcd,approach,marking_type,school,midblock_crosswalk,install_date,color,maint_district,shape_lng,shape_lat,crosswalk_control,onstreet,xstrlow,xstrhi
compkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
520756,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
520757,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
520758,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
520762,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
520764,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701110,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
701280,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
701306,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
701700,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


## Clean up crosswalks dataset

In [97]:
crosswalks.isnull().sum()

compkey               0
unitdesc              0
condition             0
primarydistrictcd     0
approach              0
marking_type          0
school                0
midblock_crosswalk    0
install_date          0
color                 0
maint_district        0
shape_lng             0
shape_lat             0
crosswalk_control     0
onstreet              0
xstrlow               0
xstrhi                0
dtype: int64

In [76]:
crosswalks['num_attachments'].value_counts(dropna=False)

0    5663
1      18
2       2
Name: num_attachments, dtype: int64

In [78]:
to_drop = ['x', 'y', 'objectid', 'unitid', 'condition_assessment_date', 'ownership', 'current_status',
           'secondarydistrictcd', 'overrideyn', 'comptype', 'segkey', 'unittype', 'old_id', 'comments', 
           'category', 'ownership_date', 'current_status_date', 'maintained_by', 'maintenance_agreement', 
           'curbspaceid', 'overridecomment','material', 'meas_from_low', 'disttolow', 'stpoint', 'meas_from_hi',
           'disttohi', 'offset', 'side', 'measurement_origin', 'attachment_1', 'attachment_2', 'attachment_3', 
           'attachment_4', 'attachment_5', 'attachment_6', 'attachment_7', 'attachment_8', 'attachment_9',
          'maintenance_group', 'num_attachments']

crosswalks.drop(columns=to_drop, inplace=True)

In [101]:
collisions[collisions['CROSSWALKKEY'] == 524995]

Unnamed: 0_level_0,X,Y,OBJECTID,INCKEY,ADDRTYPE,INTKEY,LOCATION,PERSONCOUNT,PEDCOUNT,PEDCYLCOUNT,VEHCOUNT,INJURIES,SERIOUSINJURIES,FATALITIES,INCDATE,JUNCTIONTYPE,SDOT_COLCODE,UNDERINFL,ST_COLCODE,CROSSWALKKEY,SPEEDING_Y,INATTENTIONIND_Y,HITPARKEDCAR_Y,PEDROWNOTGRNT_Y,WEATHER_Adverse,WEATHER_Good,WEATHER_Unknown,ROADCOND_Adverse,ROADCOND_Dry,ROADCOND_Unknown,LIGHTCOND_Dark,LIGHTCOND_Daylight,LIGHTCOND_Unknown,LIGHTCOND_VeryDark,SEVERITYCODE_Injury,SEVERITYCODE_PropertyDamage,SEVERITYCODE_Unknown
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
2008-04-25 15:16:00,-122.296881,47.55089,78808,92222,Intersection,33264.0,28TH AVE S AND S ORCAS ST,2,1,0,1,1,0,0,2008/04/25 00:00:00+00,At Intersection (intersection related),24.0,0,0,524995,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0


In [102]:
crosswalks[crosswalks['compkey'] == 524995]

Unnamed: 0,compkey,unitdesc,condition,primarydistrictcd,approach,marking_type,school,midblock_crosswalk,install_date,color,maint_district,shape_lng,shape_lat,crosswalk_control,onstreet,xstrlow,xstrhi
176,524995,S ORCAS ST 0280 BLOCK C SIDE ( 28) 28 FT E/O 28TH AVE S,GOOD,DISTRICT2,C,LADER,Y,Y,1970/01/01 00:00:00+00,WHT,SOUTH,-122.296768,47.550887,,S ORCAS ST,28TH AVE S,30TH AVE S


In [92]:
sum(crosswalks['compkey'].isin(collisions['CROSSWALKKEY']))

2035

In [99]:
crosswalks[crosswalks['midblock_crosswalk'] == 'Y']

Unnamed: 0,compkey,unitdesc,condition,primarydistrictcd,approach,marking_type,school,midblock_crosswalk,install_date,color,maint_district,shape_lng,shape_lat,crosswalk_control,onstreet,xstrlow,xstrhi
13,525229,WEST SEATTLE BRIDGE TRL 0010 BLOCK C SIDE (1783) 350 FT E/O SW SPOKANE NR ST,GOOD,DISTRICT1,C,LADER,N,Y,1970/01/01 00:00:00+00,WHT,SOUTH,-122.346808,47.571644,,WEST SEATTLE BRIDGE TRL,EAST MARGINAL WAY S,SW SPOKANE NR ST
34,574127,WESTLAKE EAST RDWY AVE N 0201 BLOCK E SIDE ( 87) 87 FT N/O WESTLAKE SHORE RDWY 3 AVE N,GOOD,DISTRICT7,C,LADER,N,Y,1970/01/01 00:00:00+00,WHT,CENTRAL,-122.340145,47.637312,,WESTLAKE EAST RDWY AVE N,WESTLAKE SHORE RDWY 3 AVE N,CROCKETT ST
74,628860,EAST MARGINAL WAY S 0480 BLOCK C SIDE ( 275) 5 FT N/O XW ALASKA,GOOD,DISTRICT2,N,LADER,N,Y,1970/01/01 00:00:00+00,WHT,SOUTH,-122.339441,47.559565,,EAST MARGINAL WAY S,S ALASKA ST,XW ALASKA
176,524995,S ORCAS ST 0280 BLOCK C SIDE ( 28) 28 FT E/O 28TH AVE S,GOOD,DISTRICT2,C,LADER,Y,Y,1970/01/01 00:00:00+00,WHT,SOUTH,-122.296768,47.550887,,S ORCAS ST,28TH AVE S,30TH AVE S
187,524290,N 79TH ST 0010 BLOCK C SIDE ( 225) 225 FT E/O 1ST AVE NW,GOOD,DISTRICT6,C,LADER,Y,Y,1970/01/01 00:00:00+00,WHT,NORTH,-122.357063,47.686208,,N 79TH ST,1ST AVE NW,GREENWOOD AVE N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5550,707508,8TH AVE N 0050 BLOCK C SIDE ( 234) 234 FT N/O REPUBLICAN ST,GOOD,DISTRICT7,C,LADER,N,Y,2019/08/20 00:00:00+00,WHT,CENTRAL,-122.341050,47.623860,,8TH AVE N,REPUBLICAN ST,MERCER SR ST
5564,523046,SAND POINT WAY NE 0770 BLOCK C SIDE ( 501) 5 FT S/O NOAA DR,GOOD,DISTRICT4,E,LADER,N,Y,1970/01/01 00:00:00+00,WHT,NORTH,-122.264964,47.686088,STOPSIGN,SAND POINT WAY NE,NE 77TH ST,NOAA DR
5581,522993,RAYE ST 0040 BLOCK C SIDE ( 370) 370 FT E/O QUEEN ANNE DR,GOOD,DISTRICT7,C,LADER,N,Y,1970/01/01 00:00:00+00,WHT,CENTRAL,-122.348090,47.643094,,RAYE ST,QUEEN ANNE DR,AURORA AVE N
5588,520839,15TH AVE NE 0390 BLOCK C SIDE ( 367) 367 FT N/O NE PACIFIC ST,GOOD,DISTRICT4,C,LADER,N,Y,1970/01/01 00:00:00+00,WHT,NORTH,-122.312184,47.653938,,15TH AVE NE,NE PACIFIC ST,NE 40TH ST


## Merge with collisions

In [104]:
collisions.columns = collisions.columns.map(str.lower)

In [106]:
collisions.head()

Unnamed: 0_level_0,x,y,objectid,inckey,addrtype,intkey,location,personcount,pedcount,pedcylcount,vehcount,injuries,seriousinjuries,fatalities,incdate,junctiontype,sdot_colcode,underinfl,st_colcode,crosswalkkey,speeding_y,inattentionind_y,hitparkedcar_y,pedrownotgrnt_y,weather_adverse,weather_good,weather_unknown,roadcond_adverse,roadcond_dry,roadcond_unknown,lightcond_dark,lightcond_daylight,lightcond_unknown,lightcond_verydark,severitycode_injury,severitycode_propertydamage,severitycode_unknown
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
2004-01-01,,,11627,25040,Block,,BATTERY ST TUNNEL NB BETWEEN ALASKAN WY VI NB AND AURORA AVE N,2,0,0,2,1,0,0,2004/01/01 00:00:00+00,Mid-Block (not related to intersection),14.0,0,13,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,1,0,0
2004-01-01,-122.31876,47.604359,12525,24635,Block,,E ALDER ST BETWEEN 10TH AVE AND 11TH AVE,2,0,0,2,0,0,0,2004/01/01 00:00:00+00,Mid-Block (not related to intersection),16.0,0,32,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0
2004-01-01,-122.32078,47.614076,14172,26463,Intersection,29745.0,BROADWAY AND E PIKE ST,4,0,0,2,1,0,0,2004/01/01 00:00:00+00,At Intersection (intersection related),11.0,0,28,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0
2004-01-01,-122.383351,47.529183,6538,19530,Block,,SW ROSE ST BETWEEN 39TH AVE SW AND 41ST AVE SW,2,0,0,2,0,0,0,2004/01/01 00:00:00+00,Mid-Block (not related to intersection),14.0,0,32,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0
2004-01-01,-122.329974,47.708637,9665,22520,Block,,N NORTHGATE WAY BETWEEN CORLISS AVE N AND 1ST AVE NE,1,0,0,1,0,0,0,2004/01/01 00:00:00+00,Mid-Block (not related to intersection),28.0,0,50,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0


In [105]:
# NEED COLUMNS: 'addrtype', 'intkey', 'location', 'pedcount', 'pedcylcount', 'vehcount', 'injuries', 'seriousinjuries', 'fatalities'

collisions.columns

Index(['x', 'y', 'objectid', 'inckey', 'addrtype', 'intkey', 'location',
       'personcount', 'pedcount', 'pedcylcount', 'vehcount', 'injuries',
       'seriousinjuries', 'fatalities', 'incdate', 'junctiontype',
       'sdot_colcode', 'underinfl', 'st_colcode', 'crosswalkkey', 'speeding_y',
       'inattentionind_y', 'hitparkedcar_y', 'pedrownotgrnt_y',
       'weather_adverse', 'weather_good', 'weather_unknown',
       'roadcond_adverse', 'roadcond_dry', 'roadcond_unknown',
       'lightcond_dark', 'lightcond_daylight', 'lightcond_unknown',
       'lightcond_verydark', 'severitycode_injury',
       'severitycode_propertydamage', 'severitycode_unknown'],
      dtype='object')