In [237]:
#Import libraries
import pandas as pd
import geopandas as gpd
import shapely
from shapely.geometry import Point
import numpy as np

In [2]:
import urllib.request as request

**Load in traffic data**

In [3]:
#List of traffic download URLs for 2007 - 2018
url_list = ['https://opendata.arcgis.com/datasets/7015d5d46a284f94ac05c2ea4358bcd7_0.geojson',
            'https://opendata.arcgis.com/datasets/5fc63b2a48474100b560a7d98b5097d7_1.geojson',
            'https://opendata.arcgis.com/datasets/27af9a2485c5442bb061fa7e881d7022_2.geojson',
            'https://opendata.arcgis.com/datasets/4f62515558174f53979b3be0335004d3_3.geojson',
            'https://opendata.arcgis.com/datasets/29f801d03c9b4b608bca6a8e497278c3_4.geojson',
            'https://opendata.arcgis.com/datasets/a0019dd0d6464747a88921f5e103d509_5.geojson',
            'https://opendata.arcgis.com/datasets/40bcfbc4054549ebba8b5777bbdd40ff_6.geojson',
            'https://opendata.arcgis.com/datasets/16cedd233d914118a275c6510115d466_7.geojson',
            'https://opendata.arcgis.com/datasets/902fd604ecf54adf8579894508cacc68_8.geojson',
            'https://opendata.arcgis.com/datasets/170b764c52f34c9497720c0463f3b58b_9.geojson',
            'https://opendata.arcgis.com/datasets/2c37babc94d64bbb938a9b520bc5538c_10.geojson',
            'https://opendata.arcgis.com/datasets/a35aa9249110472ba2c69cc574eff984_11.geojson']

In [4]:
#Long (but necessary) function written by Sarah to homogenize column titles for traffic data downloads.
def get_gdf(year):
    '''Enter the desired year to download the traffic flow count
    data for that year. Example: enter '7' for the year 2007.
    '''
    num = year-7
    gdf_year = gpd.read_file(url_list[num])
    if year == 11:
        gdf_year = gdf_year.rename(columns={"YEAR_" : 'YEAR'})
    if year == 12:
        gdf_year = gdf_year.rename(columns={'STDY_YEAR' : 'YEAR'})
    if year == 15 or year == 16:
        gdf_year = gdf_year.rename(columns={"COUNTAAWDT" : 'AAWDT', "FLOWSEGID" : "GEOBASID", 'FIRST_STNAME_ORD' : 'STNAME'})
        gdf_year = gdf_year[['AAWDT', 'GEOBASID', 'STNAME', 'SHAPE_Length', 'geometry']]
        if year == 15:
            year_list = ['2015']*len(gdf_year)
            gdf_year['YEAR'] = year_list
        elif year == 16:
            year_list = ['2016']*len(gdf_year)
            gdf_year['YEAR'] = year_list
    elif year == 17 or year == 18:
        gdf_year = gdf_year.rename(columns={"AWDT" : 'AAWDT', "FLOWSEGID" : "GEOBASID", 'STNAME_ORD' : 'STNAME'})
        gdf_year = gdf_year[['AAWDT', 'GEOBASID', 'STNAME', 'SHAPE_Length', 'geometry']]
        if year == 17:
            year_list = ['2017']*len(gdf_year)
            gdf_year['YEAR'] = year_list
        elif year == 18:
            year_list = ['2018']*len(gdf_year)
            gdf_year['YEAR'] = year_list

    gdf_year = gdf_year[[ 'YEAR', 'AAWDT', 'GEOBASID', 'STNAME', 'SHAPE_Length', 'geometry']]
    return gdf_year

Unnamed: 0,YEAR,AAWDT,GEOBASID,STNAME,SHAPE_Length,geometry
0,2010,8000,94250370,RENTON AVE S,4615.134504,"LINESTRING (-122.28064 47.53541, -122.28056 47..."
1,2010,10100,50000650,AIRPORT WAY S,18205.478931,"MULTILINESTRING ((-122.29348 47.52408, -122.29..."
2,2010,27500,127150040,N NORTHGATE WAY,4267.857028,"LINESTRING (-122.34469 47.70505, -122.34334 47..."
3,2010,7900,123450090,N 65TH ST,12341.641545,"LINESTRING (-122.34831 47.67606, -122.34990 47..."
4,2010,11600,9550200,2ND AVE,4161.475805,"LINESTRING (-122.34045 47.61096, -122.34165 47..."
...,...,...,...,...,...,...
255,2010,11800,53900160,BEACON AVE S,3460.821435,"LINESTRING (-122.31341 47.58059, -122.31238 47..."
256,2010,25400,55650170,BOREN AVE,1224.113739,"LINESTRING (-122.32253 47.60648, -122.32287 47..."
257,2010,99000,520040,1ST AV S BR NB,6772.284374,"LINESTRING (-122.33431 47.54827, -122.33417 47..."
258,2010,1000,75750010,COLUMBIA ST RAMP,327.206009,"LINESTRING (-122.32926 47.60489, -122.32939 47..."


**Load in census tracts for zip code filtering**

In [6]:
# Census tract boundaries
census_url = 'https://opendata.arcgis.com/datasets/de58dc3e1efc49b782ab357e044ea20c_9.geojson'
census_bounds = gpd.read_file(census_url)
census_columns = ['NAME10', 'SHAPE_Area', 'geometry']
census_bounds_cleaned = census_bounds.loc[:,census_columns]
census_bounds_cleaned['NAME10'] = census_bounds_cleaned['NAME10'].astype(float)

**Load in zip code areas, filter by spatial join with census tracts**

In [7]:
# Zip code boundaries
zipcodes_url = 'https://opendata.arcgis.com/datasets/83fc2e72903343aabff6de8cb445b81c_2.geojson'
zipcodes = gpd.read_file(zipcodes_url)

In [8]:
zipcodes_columns = ['ZIPCODE', 'SHAPE_Area', 'geometry']
zipcodes_cleaned = zipcodes.loc[:,zipcodes_columns]
zipcodes_cleaned['ZIPCODE'] = zipcodes_cleaned['ZIPCODE'].astype(int)
zipcodes_cleaned.head()

Unnamed: 0,ZIPCODE,SHAPE_Area,geometry
0,98031,228012900.0,"POLYGON ((-122.21842 47.43750, -122.21896 47.4..."
1,98032,482675400.0,"MULTIPOLYGON (((-122.24187 47.44122, -122.2436..."
2,98033,256674700.0,"POLYGON ((-122.20571 47.65170, -122.20571 47.6..."
3,98034,272507200.0,"POLYGON ((-122.17551 47.73706, -122.17551 47.7..."
4,98030,200095400.0,"POLYGON ((-122.16746 47.38549, -122.16746 47.3..."


In [26]:
# Zip codes joined with census tracts
zips = gpd.sjoin(zipcodes_cleaned, census_bounds_cleaned, op='intersects')
zips_columns = ['ZIPCODE', 'NAME10', 'SHAPE_Area_left', 'geometry']
zips = zips[zips_columns]

32

**Function to convert line segments to midpoint identifiers to ensure placement in correct zip code**

In [234]:
def traffic(year):
    '''Function to generate distributions of traffic flow by year in each zip
    '''
    gdf_test = get_gdf(year)

    midpoints = gdf_test.copy()
    midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)
    midpoint_columns = ['YEAR', 'AAWDT', 'MIDPOINT']
    midpoint_cleaned = midpoints.loc[:,midpoint_columns]
    midpoint_cleaned['geometry'] = midpoint_cleaned['MIDPOINT']
    
    zip_mids = gpd.sjoin(zips,midpoint_cleaned,op='contains')
    zip_mids_clean = zip_mids.copy()
    zip_mids_clean = zip_mids_clean.drop(columns=['SHAPE_Area_left','NAME10','index_right','MIDPOINT'])
    
    zip_mids_clean_c = zip_mids_clean.copy()
    zip_mids_clean_c.drop_duplicates(inplace=True)
    zip_mids_clean_cc = zip_mids_clean_c.copy()
    zip_mids_clean_cc.drop(columns=['geometry'])
    zip_mids_clean_cc = zip_mids_clean_cc.dissolve(by=['ZIPCODE'],aggfunc=sum)
    
    zip_traffic = zip_mids_clean_cc.copy()
    zip_traffic.drop(columns=['geometry'],inplace=True)
    zip_traffic['YEAR'] = year + 2000
    zip_traffic.reset_index(inplace=True)
    zip_traffic = zip_traffic[['ZIPCODE', 'YEAR', 'AAWDT']]
    zip_traffic.head(n=30)

    return zip_traffic

In [235]:
def total_traffic(years):
    df_total_traffic = pd.DataFrame()
    years = list(np.arange(7,19))
    for year in years:
        traffic_year = traffic(year)
        df_total_traffic = df_total_traffic.append(traffic_year)
    return df_total_traffic

In [236]:
total_df = total_traffic(years)


  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)

  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)

  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)

  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)

  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)

  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)

  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)

  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)

  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)

  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)

  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)

  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized 

In [227]:
total_traffic_df = total_df.copy()
total_traffic_df.groupby(by='ZIPCODE')
total_traffic_df.sort_values(['ZIPCODE','YEAR'],inplace=True)
total_traffic_df.head(n=30)

Unnamed: 0,ZIPCODE,YEAR,AAWDT
0,98101,2007,264800.0
0,98101,2008,254100.0
0,98101,2009,254100.0
0,98101,2010,257900.0
0,98101,2011,353100.0
0,98101,2012,259292.0
0,98101,2013,312500.0
0,98101,2014,318800.0
0,98101,2015,992674.9
0,98101,2016,992674.9
