In [1]:
#Import libraries
import pandas as pd 
import geopandas as gpd
from geopandas.tools import geocode
import shapely
from shapely.geometry import Point
import numpy as np
import scipy.stats as stats
import scipy
import seaborn as sns
import matplotlib 
from matplotlib import pyplot as plt
matplotlib.rcParams.update({'font.size': 20})

In [2]:
import urllib.request as request

**Load in traffic data**

In [3]:
#List of traffic download URLs for 2007 - 2018
url_list = ['https://opendata.arcgis.com/datasets/7015d5d46a284f94ac05c2ea4358bcd7_0.geojson',
            'https://opendata.arcgis.com/datasets/5fc63b2a48474100b560a7d98b5097d7_1.geojson',
            'https://opendata.arcgis.com/datasets/27af9a2485c5442bb061fa7e881d7022_2.geojson',
            'https://opendata.arcgis.com/datasets/4f62515558174f53979b3be0335004d3_3.geojson',
            'https://opendata.arcgis.com/datasets/29f801d03c9b4b608bca6a8e497278c3_4.geojson',
            'https://opendata.arcgis.com/datasets/a0019dd0d6464747a88921f5e103d509_5.geojson',
            'https://opendata.arcgis.com/datasets/40bcfbc4054549ebba8b5777bbdd40ff_6.geojson',
            'https://opendata.arcgis.com/datasets/16cedd233d914118a275c6510115d466_7.geojson',
            'https://opendata.arcgis.com/datasets/902fd604ecf54adf8579894508cacc68_8.geojson',
            'https://opendata.arcgis.com/datasets/170b764c52f34c9497720c0463f3b58b_9.geojson',
            'https://opendata.arcgis.com/datasets/2c37babc94d64bbb938a9b520bc5538c_10.geojson',
            'https://opendata.arcgis.com/datasets/a35aa9249110472ba2c69cc574eff984_11.geojson']

In [4]:
#Long (but necessary) function written by Sarah to homogenize column titles for traffic data downloads.
def get_gdf(year):
    '''Enter the desired year to download the traffic flow count
    data for that year. Example: enter '7' for the year 2007.
    '''
    num = year-7
    gdf_year = gpd.read_file(url_list[num])
    if year == 11:
        gdf_year = gdf_year.rename(columns={"YEAR_" : 'YEAR'})
    if year == 12:
        gdf_year = gdf_year.rename(columns={'STDY_YEAR' : 'YEAR'})
    if year == 15 or year == 16:
        gdf_year = gdf_year.rename(columns={"COUNTAAWDT" : 'AAWDT', "FLOWSEGID" : "GEOBASID", 'FIRST_STNAME_ORD' : 'STNAME'})
        gdf_year = gdf_year[['AAWDT', 'GEOBASID', 'STNAME', 'SHAPE_Length', 'geometry']]
        if year == 15:
            year_list = ['2015']*len(gdf_year)
            gdf_year['YEAR'] = year_list
        elif year == 16:
            year_list = ['2016']*len(gdf_year)
            gdf_year['YEAR'] = year_list
    elif year == 17 or year == 18:
        gdf_year = gdf_year.rename(columns={"AWDT" : 'AAWDT', "FLOWSEGID" : "GEOBASID", 'STNAME_ORD' : 'STNAME'})
        gdf_year = gdf_year[['AAWDT', 'GEOBASID', 'STNAME', 'SHAPE_Length', 'geometry']]
        if year == 17:
            year_list = ['2017']*len(gdf_year)
            gdf_year['YEAR'] = year_list
        elif year == 18:
            year_list = ['2018']*len(gdf_year)
            gdf_year['YEAR'] = year_list

    gdf_year = gdf_year[[ 'YEAR', 'AAWDT', 'GEOBASID', 'STNAME', 'SHAPE_Length', 'geometry']]
    return gdf_year

**Load in census tracts for zip code filtering**

In [8]:
# Census tract boundaries
census_url = 'https://opendata.arcgis.com/datasets/de58dc3e1efc49b782ab357e044ea20c_9.geojson'
census_bounds = gpd.read_file(census_url)
census_columns = ['NAME10', 'SHAPE_Area', 'geometry']
census_bounds_cleaned = census_bounds.loc[:,census_columns]
census_bounds_cleaned['NAME10'] = census_bounds_cleaned['NAME10'].astype(float)

**Load in zip code areas, filter by spatial join with census tracts**

In [11]:
# Zip code boundaries
zipcodes_url = 'https://opendata.arcgis.com/datasets/83fc2e72903343aabff6de8cb445b81c_2.geojson'
zipcodes = gpd.read_file(zipcodes_url)

In [12]:
zipcodes_columns = ['ZIPCODE', 'SHAPE_Area', 'geometry']
zipcodes_cleaned = zipcodes.loc[:,zipcodes_columns]
zipcodes_cleaned['ZIPCODE'] = zipcodes_cleaned['ZIPCODE'].astype(int)
zipcodes_cleaned.head()

Unnamed: 0,ZIPCODE,SHAPE_Area,geometry
0,98031,228012900.0,"POLYGON ((-122.21842 47.43750, -122.21896 47.4..."
1,98032,482675400.0,"MULTIPOLYGON (((-122.24187 47.44122, -122.2436..."
2,98033,256674700.0,"POLYGON ((-122.20571 47.65170, -122.20571 47.6..."
3,98034,272507200.0,"POLYGON ((-122.17551 47.73706, -122.17551 47.7..."
4,98030,200095400.0,"POLYGON ((-122.16746 47.38549, -122.16746 47.3..."


In [13]:
# Zip codes joined with census tracts
zips = gpd.sjoin(zipcodes_cleaned, census_bounds_cleaned, op='intersects')
zips_columns = ['ZIPCODE', 'NAME10', 'SHAPE_Area_left', 'geometry']
zips = zips[zips_columns]

np.unique(zips['ZIPCODE'])

array([98101, 98102, 98103, 98104, 98105, 98106, 98107, 98108, 98109,
       98112, 98115, 98116, 98117, 98118, 98119, 98121, 98122, 98125,
       98126, 98133, 98134, 98136, 98144, 98146, 98154, 98155, 98164,
       98168, 98177, 98178, 98195, 98199])

**Function to convert line segments to midpoint identifiers to ensure placement in correct zip code**

In [16]:
def traffic_in_zip(year):
    '''Function to generate distributions of traffic flow by year in each zip
    '''
    gdf_test = get_gdf(7)

    midpoints = gdf_test.copy()
    midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)
    midpoint_columns = ['YEAR', 'AAWDT', 'MIDPOINT']
    midpoint_cleaned = midpoints.loc[:,midpoint_columns]
    midpoint_cleaned['geometry'] = midpoint_cleaned['MIDPOINT']
    
    zip_mids = gpd.sjoin(zips,midpoint_cleaned,op='contains')
    zip_mids_cleaned = zip_mids.loc[:,['AAWDT']]
    
    return zip_mids_cleaned

In [18]:
traffic_zip_10 = traffic_in_zip(10)
traffic_zip_10.head(n=25)


  midpoints['MIDPOINT'] = gdf_test['geometry'].interpolate(0.5, normalized = True)


Unnamed: 0,AAWDT
34,14300
34,14300
34,14300
34,14300
34,14300
34,14300
34,14300
34,14300
34,14300
34,10100
