### Washington State Crash Event Analysis
#### --- by 

In [2]:
import pandas as pd
import numpy as np
import regex as re

import requests
import asyncio
import json as js

import time

import os

pd.set_option('display.max_rows', 6)

#### Pre-defined functions for analysis

- functions for reverse geocoding
 The following groups of functions are supposed to help us acquire the zipcode given a geo location in the form oflongtitude and latitude. They leverage the reverse geocoding Google Map API.<br/>
 As the process of reverse geocoding is time-consuming, we did it one-time and store the results returned by the API in our local storeage.<br/>
 Note that x in the dataset stands for longtitude and y for latitude.

In [72]:
google_map_key = r'AIzaSyAp2SQpRE8VGx_84I3qAtBt3NvShYjGzJM'
def api_get_rev_geocode(lat, long) -> dict:
    url = 'https://maps.googleapis.com/maps/api/geocode/json?latlng={x},{y}&result_type=postal_code&key={key}'.format(x= long, y = lat, key = google_map_key)
    res = requests.get(url)
    return js.loads(res.content)


def api_get_rev_geocode_valid(resp: dict) -> bool:
    res = not 'error_message' in resp.keys() and len(resp['results']) > 0
    return res


def coord_to_zipcode(lat, long) -> str:
    resp = api_get_rev_geocode(lat, long)
    zipcode = resp['results'][0]['address_components'][0]['short_name']
    return zipcode


def coord_to_zipcode_vectorized(lat_arr: pd.Series | list, long_arr: pd.Series | list, time_step = 0.3):
    res = []
    for lat, long in zip(lat_arr, long_arr):
        resp  = api_get_rev_geocode(lat, long)
        if api_get_rev_geocode_valid(resp):
            zipcode = coord_to_zipcode(lat, long)
            res.append(zipcode)
        else:
            res.append(None)
        time.sleep(time_step)
    return res


#get_zipcode_by_coord(-121, 47)

#resp = api_get_rev_geocode(-121.0397611, 47.18249167)  #  example for invalid request
#resp
# resp2 = get_reverse_geocoding(-121, 47)
# resp2['error_message']
#t_t = coord_to_zipcode_vectorized(df_data.x[:10], df_data.y[:10])

In [3]:
dir = os.path.abspath(os.path.dirname(os.getcwd())) + '/data/'

df_data = pd.read_csv(dir + 'data_with_zipcode.csv')
df_data.zipCode = df_data.zipCode.astype(str)

df_crash_type = pd.read_csv(dir + 'crash_type.csv')

df_data.head()

  df_data = pd.read_csv(dir + 'data_with_zipcode.csv')


Unnamed: 0.1,Unnamed: 0,year,case,par,repjur,crash_dt,crash_tm,accday,accmon,holiday,...,lab,isedtcase,crf1,crf2,crf3,CoRoadName,CoMP,IntCoRoadName,IntCoMP,zipCode
0,0,2017,1,E628946,2.0,01/01/2017,2:12,1,1,1.0,...,,,,,,,,,,98201.0
1,1,2017,2,E627989,26.0,01/02/2017,17:14,2,1,1.0,...,,,,,,,,,,98359.0
2,2,2017,4,3747633,263.0,01/01/2017,18:47,1,1,1.0,...,,,,,,,,,,98103.0
3,3,2017,5,E628691,4.0,01/01/2017,3:50,1,1,1.0,...,,,,,,,,,,99224.0
4,4,2017,6,3746306,263.0,01/05/2017,9:53,5,1,0.0,...,,,,,,,,,,98104.0


In [74]:
df_data.shape

(4132, 307)

In [75]:
# drop rows which do not have a zipcode

has_no_zipcode = df_data.zipCode.map(lambda v : v == 'nan')
df_data = df_data[df_data.zipCode != 'nan']
df_data.shape

(4107, 307)

#### Analysis of Crash Type and Behavioral Factors

##### Analysis of Crash Type

In [76]:
df_crash_type

Unnamed: 0,info
0,0=No Impact
1,1=Drive Off Road
2,2=Control/Traction Loss
...,...
90,93=Other Vehicle or Object
91,98=Other Crash Type
92,99=Unknown Crash Type


In [77]:
crash_type_mapper = {
    # map a crash number to its actual meaning
    k: v for k,v in zip(
        df_crash_type['info'].map(
            lambda v: v.strip().split('=')[0].strip()
        ),
        df_crash_type['info'].map(
            lambda v: v.strip().split('=')[1].strip()
        ),
    )
}

##### Analysis of Behavioral Factors

In [80]:
df_data.loc[:, 'race1': 'race5'].apply(
    lambda col : sum(col.isnull())
)

race1    1556
race2    4051
race3    4100
race4    4107
race5    4107
dtype: int64

- The following columns are thought to indicate whether an involved person conducted risky behavior in the crash event.
- - restraintmisuse: valued 1 when there was a restraint misuse
- - helmetmisuse: valued 1 when there was a helmet misuse
- - 