In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

pd.options.display.max_rows = 500
pd.options.display.max_columns = 100

import os
import warnings
warnings.filterwarnings("ignore") 

from datetime import datetime
from IPython.display import display, Markdown

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [57]:
collisions_df = pd.read_csv('../data/Collisions.csv',
                            parse_dates={'Datetime': ['INCDTTM']}, 
                            infer_datetime_format=True)
streets_df = pd.read_csv('../data/Seattle_Streets.csv')

In [128]:
#These important names have changed over the years
def name_check(lst):
    if 'STNAME' in lst:
        return 'STNAME'
    elif 'FIRST_STNAME_ORD' in lst:
        return 'FIRST_STNAME_ORD'
    else:
        return 'STNAME_ORD'

def metric_check(lst):
    if 'AAWDT' in lst:
        return 'AAWDT'
    elif 'COUNTAAWDT' in lst:
        return 'COUNTAAWDT'
    else:
        return 'AWDT'

In [134]:
folder = '../data/counts/'
traffic_df = pd.read_csv('../data/counts/2007_Traffic_Flow_Counts.csv')[['STNAME', 'YEAR', 'DOWNTOWN','AAWDT']]
missing = []
for f in sorted(os.listdir(folder))[1:]:
    file = folder + f
    d = pd.read_csv(file)

    name_column, metric_column = name_check(d.columns), metric_check(d.columns)
    
    d.rename(columns={name_column:'STNAME', metric_column:'AAWDT'}, inplace=True)
    
    if 'YEAR' not in d.columns:
        s = np.full((d.shape[0], 1), int(f[:4]), dtype=int)
        d['YEAR'] = s
    
    d = d[['STNAME', 'YEAR', 'DOWNTOWN', 'AAWDT']]
    #we'll only take the columns we can consistently use for now
    traffic_df = traffic_df[traffic_df['STNAME'].isin(d['STNAME'])]
    d = d[d['STNAME'].isin(traffic_df['STNAME'])] 

    traffic_df = pd.concat([traffic_df, d], sort=False)

In [63]:
block_mask = collisions_df['ADDRTYPE'] == 'Block'

collision_streets = pd.DataFrame(collisions_df[block_mask]['LOCATION'].apply(lambda string: string.split(' BETWEEN ')[0]))

collision_streets['datetime'] = collisions_df[block_mask]['Datetime']

In [97]:
collisions_by_year = collision_streets.groupby(pd.Grouper(key='datetime', freq='Y'))['LOCATION'].value_counts().unstack(level=0).fillna(0)

for column in collisions_by_year.columns:
    collisions_by_year.rename(columns={column:column.year}, inplace=True)
    
collisions_by_year.drop(columns=[2004,2005,2006,2019,2020], inplace=True)

In [107]:
streets_df = streets_df[streets_df['STATUS'] == 'INSVC']
sg = streets_df.groupby('STNAME_ORD')
d = {}
d['arterial_class'] = sg['ARTCLASS'].median().fillna(0).astype(int)
d['speed_limit'] =  sg['SPEEDLIMIT'].median().fillna(0).astype(int)
d['slope_percentage'] = sg['SLOPE_PCT'].median()
d['transit_class'] = sg['TRANCLASS'].median().astype(int)
d['pavement_condition'] = sg['PVMTCONDINDX1'].median()
columns = ['ARTCLASS', 'SPEEDLIMIT', 'SLOPE_PCT', 'TRANCLASS', 'PVMTCONDINDX']
clean_streets = pd.DataFrame(data=d)
clean_streets = clean_streets[clean_streets.index.isin(collisions_by_year.index)]

In [123]:
model_df = pd.DataFrame(columns=['name', 'arterial_class', 'speed_limit', 'slope_percentage', 'transit_class',
                                 'pavement_condition'])
for i, st in enumerate(clean_streets.index):
    row = ['name']
    row.extend(model_df.loc[i])
    clean_streets.loc[st]

arterial_class         0.0
speed_limit           20.0
slope_percentage       1.0
transit_class          0.0
pavement_condition    47.0
Name: 10TH AVE, dtype: float64

In [140]:
traffic_df

Unnamed: 0,STNAME,YEAR,DOWNTOWN,AAWDT
0,RENTON AVE S,2007.0,N,7200.0
1,AIRPORT WAY S,2007.0,N,13100.0
2,N NORTHGATE WAY,2007.0,N,28300.0
3,N 65TH ST,2007.0,N,8200.0
4,2ND AVE,2007.0,Y,12400.0
...,...,...,...,...
1896,ELLIOTT AV ON RP,2018.0,Y,16533.0
1900,W EMERSON PL,2018.0,N,19000.0
1902,15TH AVE NW,2018.0,N,9970.0
1903,W DRAVUS ST,2018.0,N,15000.0


In [142]:
collisions_by_year

datetime,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
LOCATION,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
10TH AVE,4.0,13.0,7.0,7.0,6.0,9.0,10.0,14.0,14.0,8.0,11.0,18.0
10TH AVE E,30.0,23.0,23.0,24.0,12.0,22.0,21.0,23.0,21.0,32.0,15.0,14.0
10TH AVE NE,2.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
10TH AVE NW,0.0,2.0,0.0,1.0,0.0,2.0,4.0,2.0,0.0,0.0,0.0,1.0
10TH AVE S,4.0,3.0,1.0,6.0,3.0,3.0,4.0,9.0,6.0,8.0,8.0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...
YALE PL E,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
YALE TER E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
YESLER WAY,15.0,18.0,19.0,15.0,10.0,15.0,10.0,14.0,6.0,8.0,2.0,15.0
YORK RD S,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0


In [143]:
clean_streets

Unnamed: 0_level_0,arterial_class,speed_limit,slope_percentage,transit_class,pavement_condition
STNAME_ORD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10TH AVE,0,20,1.0,0,47.0
10TH AVE E,2,25,1.0,2,17.0
10TH AVE NE,0,20,2.5,0,0.0
10TH AVE NW,0,20,2.0,0,68.0
10TH AVE S,0,20,1.0,0,81.0
...,...,...,...,...,...
YALE PL E,0,20,2.0,0,100.0
YALE TER E,0,20,2.5,0,35.0
YESLER WAY,2,25,2.5,3,50.5
YORK RD S,0,20,3.5,0,86.0


In [146]:
row=['name']
row.extend(clean_streets.loc['10TH AVE'])

In [147]:
row

['name', 0.0, 20.0, 1.0, 0.0, 47.0]