In [3]:
import sys
import getpass
import geopandas as gpd
from arcgis import GIS
from arcgis import features

user = getpass.getuser()
sys.dont_write_bytecode = True

# sys.path.insert(0, '/Users/{}/Box/Utility Code'.format(user))

# for macOS Monterey
sys.path.insert(0, '/Users/{}/Library/CloudStorage/Box-Box/Utility Code'.format(user))

from utils_io import *

In [3]:
gis = GIS(
    url='https://mtc.maps.arcgis.com/home/', 
    username='content_MTC'
)

Enter password: ········


In [4]:
work_dir = os.path.join(
    '/Users',
    user,
    'Library',
    'CloudStorage',
    'Box-Box',
    'DataViz Projects',
    'Adhoc-Spatial-Analysis',
    'Mobility Hubs Overlay Analysis'
)

## Mobility Hubs Overlay Analysis

**Analysis requirements:** 
1. mobility hub locations that are both in an EPC and transit oriented community area and are on the existing MTC bike network
2. mobility hub locations that are in a transit rich community area and on the existing MTC bike network

**Box Directory**
[Mobility Hubs Overlay Analysis Box](https://mtcdrive.box.com/s/xbl4frhez31wkucrba9ml9x1zen5wr0c)

**Data Sources**
1. [Mobility Hubs](https://mtc.maps.arcgis.com/home/item.html?id=6f3f6556d3c14d3d94ecef85270f52d4)
2. [Equity Priority Communities](https://mtc.maps.arcgis.com/home/item.html?id=28a03a46fe9c4df0a29746d6f8c633c8)
3. [Transit-Oriented Communities](https://mtc.maps.arcgis.com/home/item.html?id=3df9742c87a849a8a09342bc58da2dd6)
5. [Existing MTC Bike Network](https://mtc.maps.arcgis.com/home/item.html?id=df9ff2da527140e1ab36dd6c7d5ea0e6)
6. [Plan Bay Area 2050 PDAS](https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/priority_development_areas_pba2050/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson)
6. [San Francisco Bay Jurisdictions](https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/region_jurisdiction/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson)

## Pull data sources

In [5]:
mh_gdf = (gpd.read_file(
    'https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/mtc_mobility_hubs_2020/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson')
          .to_crs('EPSG:26910')
         )

In [6]:
epc_gdf = (gpd.read_file(
    'https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/communities_of_concern_2020_acs2018/FeatureServer/0/query?where=1%3D1&outFields=geoid,epc_2050,epc_class&outSR=4326&f=json')
           .to_crs('EPSG:26910')
          )

In [7]:
bn_gdf = (gpd.read_file(
    os.path.join(work_dir, 'data','mtc_bike_network.geojson'))
          .to_crs('EPSG:26910')
         )

In [8]:
# pull toc item from AGOL
toc = gis.content.get(itemid='3df9742c87a849a8a09342bc58da2dd6')

In [9]:
# select layer from item
toc_layer = toc.layers[0]

In [10]:
# query layer
toc_qry = toc_layer.query(out_sr={"wkid" : 4326})

In [11]:
# export to geojson
toc_geojson = toc_qry.to_geojson

In [12]:
# create geodataframe from object and set crs as EPSG:26910 
# the default geom from AGOL was incorrectly set as EPSG:4326
toc_gdf = (gpd.read_file(
    toc_geojson)
           .to_crs('EPSG:26910')
          )

In [13]:
gg_gdf = (gpd.read_file(
    'https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/pba2050_growth_geographies_2020/FeatureServer/0/query?where=1%3D1&outFields=designatio&outSR=4326&f=json')
          .to_crs('EPSG:26910')
         )

In [14]:
# pull jurisdictions from AGOL
jurs_gdf = (gpd.read_file(
    'https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/region_jurisdiction/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson')
        .to_crs('EPSG:26910')
       )

In [15]:
pda_gdf = (gpd.read_file(
    'https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/priority_development_areas_pba2050/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson')
        .to_crs('EPSG:26910')
       )

## Perform spatial joins

In [16]:
# filter epc
epc_gdf = epc_gdf[epc_gdf['epc_2050'] == 1]

In [17]:
# spatial join mobility hubs to equity priority communities
mh_epc = gpd.sjoin(
    mh_gdf, 
    epc_gdf, 
    how='left'
)

In [18]:
# create epc flag column
mh_epc['epc_pba2050'] = np.where(mh_epc['index_right'].isnull(),0,1)

In [19]:
# spatially join mobility hubs to bike network
mh_bn = gpd.sjoin_nearest(
    mh_gdf,
    bn_gdf,
    how='left',
    max_distance=15.24
)

In [20]:
# create bike network flag column
mh_bn['existing_bike_nw'] = np.where(mh_bn['index_right'].isnull(),0,1)

In [21]:
# spatially join mobility hubs to bike network within 1/4 mile
mh_1_4_mi_bn = gpd.sjoin_nearest(
    mh_gdf,
    bn_gdf,
    how='left',
    max_distance=402.336
)

In [22]:
# create 1/4 mile bike network flag column
mh_1_4_mi_bn['1_4_mi_existing_bike_nw'] = np.where(mh_1_4_mi_bn['index_right'].isnull(),0,1)

In [23]:
# spatially join mobility hubs to transit rich geographies
mh_toc = gpd.sjoin(
    mh_gdf,
    toc_gdf,
    how='left'
)

In [24]:
mh_toc['toc'] = np.where(mh_toc['index_right'].isnull(),0,1)

In [25]:
# spatially join mobility hubs to PBA2050 priority development areas
mh_pda = gpd.sjoin(
    mh_gdf,
    pda_gdf,
    how='left'
)

In [26]:
mh_pda['pba2050_pda'] = np.where(mh_pda['index_right'].isnull(),0,1)

## Flag overlay areas within original df

In [27]:
def create_flag_column(sj_gdf, flag_col_name, original_gdf, original_gdf_id_col):
    if sj_gdf.shape[0] != original_gdf.shape[0]:
        original_gdf[flag_col_name] = original_gdf[original_gdf_id_col].map(
            sj_gdf.groupby(original_gdf_id_col)[flag_col_name].first())
    else:
        original_gdf[flag_col_name] = original_gdf[original_gdf_id_col].map(
            sj_gdf.set_index(original_gdf_id_col)[flag_col_name])

In [28]:
# create epc flag in original gdf
create_flag_column(
    sj_gdf=mh_epc,
    flag_col_name='epc_pba2050',
    original_gdf=mh_gdf,
    original_gdf_id_col='id_x'
)

In [29]:
# create bike network flag in original gdf
create_flag_column(
    sj_gdf=mh_bn,
    flag_col_name='existing_bike_nw',
    original_gdf=mh_gdf,
    original_gdf_id_col='id_x'
)

In [30]:
# create 1/4 bike network flag in original gdf
create_flag_column(
    sj_gdf=mh_1_4_mi_bn,
    flag_col_name='1_4_mi_existing_bike_nw',
    original_gdf=mh_gdf,
    original_gdf_id_col='id_x'
)

In [31]:
# create transit rich flag in original gdf
create_flag_column(
    sj_gdf=mh_toc,
    flag_col_name='toc',
    original_gdf=mh_gdf,
    original_gdf_id_col='id_x'
)

In [32]:
# create pda flag in original gdf
create_flag_column(
    sj_gdf=mh_pda,
    flag_col_name='pba2050_pda',
    original_gdf=mh_gdf,
    original_gdf_id_col='id_x'
)

## Flag counties and jurisdictions

In [34]:
jurs_gdf

Unnamed: 0,objectid,fipst,fipco,jurname,Shape__Area,Shape__Length,geometry
0,110,06,097,Unincorporated Sonoma,0.450046,8.231250,"MULTIPOLYGON (((532413.783 4279946.829, 532729..."
1,111,06,041,Unincorporated Marin,0.196976,5.260678,"MULTIPOLYGON (((557233.893 4214144.962, 556868..."
2,112,06,055,Unincorporated Napa,0.201949,4.566464,"MULTIPOLYGON (((578177.125 4263156.061, 578176..."
3,113,06,095,Unincorporated Solano,0.200921,6.225325,"MULTIPOLYGON (((616638.139 4226074.193, 616415..."
4,114,06,013,Unincorporated Contra Costa,0.128281,8.168448,"MULTIPOLYGON (((627009.074 4186435.599, 626934..."
...,...,...,...,...,...,...,...
104,214,06,081,San Bruno,0.001451,0.239008,"POLYGON ((552459.710 4165530.026, 552462.282 4..."
105,215,06,081,San Carlos,0.001428,0.310386,"POLYGON ((565295.849 4152865.344, 565304.800 4..."
106,216,06,081,San Mateo,0.004186,0.465963,"POLYGON ((563905.810 4154384.459, 563901.285 4..."
107,217,06,081,South San Francisco,0.007979,0.701870,"POLYGON ((553624.238 4169571.806, 553675.906 4..."


In [41]:
mh_jur_gdf = gpd.sjoin(mh_gdf, jurs_gdf[['fipco','jurname','geometry']])

In [45]:
# update county w/ human readable name
county_dict = {
    '075':'San Francisco',
    '041':'Marin',
    '055':'Napa',
    '097':'Sonoma',
    '095':'Solano',
    '013':'Contra Costa',
    '001':'Alameda',
    '085':'Santa Clara',
    '081':'San Mateo'
}

In [46]:
mh_jur_gdf['county'] = mh_jur_gdf['fipco'].map(county_dict)

## Reverse geocode addresses

In [None]:
google_api_key = getpass.getpass()

In [66]:
from functools import partial
from tqdm import tqdm
from geopandas.tools import reverse_geocode
from geopy.geocoders import GoogleV3
from geopy.extra.rate_limiter import RateLimiter
from shapely.geometry import Point

In [67]:
# Create a geo locator
g = GoogleV3(api_key=google_api_key)
# Use a rate limiter
geocode = RateLimiter(
    g.reverse, 
    min_delay_seconds=1/40, 
    error_wait_seconds=1
)
tqdm.pandas()

In [68]:
mh_jur_gdf = mh_jur_gdf.to_crs('EPSG:4326')

In [109]:
mh_jur_gdf['geo_str'] = mh_jur_gdf['geometry'].apply(lambda x: str(x.y) + ',' + str(x.x))

In [111]:
mh_jur_gdf['address'] = mh_jur_gdf['geo_str'].progress_apply(
    partial(geocode,timeout=1000)
)

100%|█████████████████████████████████████████████████████████████████████████| 1785/1785 [03:15<00:00,  9.11it/s]


In [118]:
mh_jur_gdf['address_short'] = mh_jur_gdf['address'].apply(lambda x: x.address)

## Export final data

In [120]:
rename_dict = {
    'csha_flag':'carshare',
    'bknt_flag':'former_bike_nw',
    'bwsa_flag':'bay_wheels'
}
mh_jur_gdf = mh_jur_gdf.rename(columns=rename_dict)

In [121]:
mh_jur_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1785 entries, 0 to 1690
Data columns (total 65 columns):
 #   Column                   Non-Null Count  Dtype   
---  ------                   --------------  -----   
 0   OBJECTID                 1785 non-null   int64   
 1   id_x                     1785 non-null   int64   
 2   hub_type                 1785 non-null   object  
 3   rank_all                 1785 non-null   int64   
 4   eq_flag                  1785 non-null   int64   
 5   clmt_score               1785 non-null   float64 
 6   system_typ               1785 non-null   object  
 7   hct_stn_nm               1785 non-null   object  
 8   raw_name                 1785 non-null   object  
 9   hct_flag                 1785 non-null   int64   
 10  hct_score                1785 non-null   int64   
 11  bhf_flag                 1785 non-null   int64   
 12  bhf_score                1785 non-null   int64   
 13  pr_flag                  1785 non-null   int64   
 14  

In [126]:
final_cols = [
    'county',
    'jurname',
    'address_short',
    'id_x',
    'hub_type',
    'system_typ',
    'hct_stn_nm',
    'raw_name',
    'carshare',
    'former_bike_nw',
    'bay_wheels',
    'epc_pba2050',
    'existing_bike_nw',
    '1_4_mi_existing_bike_nw',
    'toc',
    'pba2050_pda'
]
mh_jur_gdf[final_cols].to_csv(os.path.join(work_dir,'data','mobility_hubs_overlay_analysis.csv'),index=False)

In [None]:
# #export bike network w/ missing geoms
# bn_gdf[bn_gdf['geometry'].isnull()].to_csv('data/existing_bike_nw_missing_geometry.csv',index=False)