In [1]:
import os
import sys
import getpass

user = getpass.getuser()
sys.dont_write_bytecode = True

# for macOS Monterey
sys.path.insert(0, '/Users/{}/Library/CloudStorage/Box-Box/Utility Code'.format(user))

import utils_io as utils
import fiona
from arcgis import GIS
import pandas as pd
import numpy as np
import geopandas as gpd
import uuid

In [2]:
work_dir = os.path.join('/Users',
                        user,
                        'Library',
                        'CloudStorage',
                        'Box-Box',
                        'DataViz Projects',
                        'Spatial Analysis and Mapping',
                        'TOD TOC Policy Update Mapping'
                       )

In [3]:
toc_fgdb = os.path.join(work_dir, 
                        'TOD Policy Analysis', 
                        'TOD Policy Analysis.gdb'
                       )

In [4]:
password = getpass.getpass()

········


In [5]:
gis = GIS('https://mtc.maps.arcgis.com/home/', client_id=password)

Please sign in to your GIS and paste the code that is obtained below.
If a web browser does not automatically open, please navigate to the URL below yourself instead.
Opening web browser to navigate to: https://mtc.maps.arcgis.com/sharing/rest//oauth2/authorize?response_type=code&client_id=UlXxfwX7XiHV5uuy&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&state=OFoJj9lVQsohrQV5id0kIfF83eVovl
Enter code obtained on signing in using SAML: ········




## Preprocessing

- [Pull PBA2050 PDAs](https://mtc.maps.arcgis.com/home/item.html?id=4df9cb38d77346a289252ced4ffa0ca0#overview)
- Read Planned Transit Stops
- [Pull SF Bay Region Water Areas](https://mtc.maps.arcgis.com/home/item.html?id=cc84a5ab0d2340b6be8e214bf352140b#overview)
- [Pull SF Bay Region Jurisdictions](https://mtc.maps.arcgis.com/home/item.html?id=4b1242e5cb224a2c9043927d3344df5a#overview)

In [6]:
# pda = utils.pull_geodata_from_argis(arcgis_data_id='85043289ac774a928e4628aa904a317c', 
#                                     client=bam_portal).to_crs('EPSG:26910')
pda = (gpd.read_file(os.path.join(work_dir, 'Data Outputs', 'pda_explode.geojson'), 
                     driver='GeoJSON')
       .to_crs('EPSG:26910')
      )

In [7]:
# stops = utils.pull_geodata_from_argis(arcgis_data_id='a4e761b25425464e978829db4c3563dc',
#                                      client=bam_portal).to_crs('EPSG:26910')
stop_path = os.path.join(
    work_dir, 
    'Data Outputs', 
    'transit_stops_existing_planned_2021_update_5_2022.geojson'
)
stops = (gpd.read_file(stop_path,
                       driver='GeoJSON')
       .to_crs('EPSG:26910')
        )

In [8]:
water_url = 'https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/region_water_area/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson'
water = (gpd.read_file(water_url)
         .to_crs('EPSG:26910')
        )

In [9]:
# Having issues with multi-part geometries when using utils function. After pull, explode does not work.
# However, Can explode geometries coming from other sources / created by geopandas functions. 
# Exploded geographies using ArcGIS Pro

jurisdictions = (gpd.read_file(
    'https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/region_jurisdiction_clp/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson')
                 .to_crs('EPSG:26910')
                 .explode(index_parts=True)
                )

In [10]:
ppas = (gpd.read_file(
    'https://services3.arcgis.com/i2dkYWmb4wHvYPda/arcgis/rest/services/priority_production_areas_current/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson')
                 .to_crs('EPSG:26910')
                )

## Create dataframe of fixed-guideway stops

- Remove stops that where TOC policy does not apply
- Drop duplicate stops

In [11]:
def create_unique_id(row):
    return str(uuid.uuid4())

In [12]:
# create unique stop identifier for stops without ids
stops.loc[stops['stop_id'].isnull(), 'stop_id'] = (stops.loc[stops['stop_id'].isnull()]
                                                   .apply(lambda x: create_unique_id(x), axis=1)
                                                  )

In [13]:
# copy fixed guideway types
fg_types = [
    'Rail', 
    'BRT', 
    'Tram, Streetcar, Light Rail', 
    'Cable Tram',
    'Ferry'
]
stops_fg = stops[stops['route_type'].isin(fg_types)].copy()

In [14]:
# Remove stops with the following pba50_ids 
pba50_id_list = [
    'CC-021',
    'SF-024',
    'MU-060',
    'MU-061',
    'MU-062',
    'CC-015',
    'SF-012',
    'SF-011',
    'SM-011',
    'MU-033',
    'AL-016',
    'CC-018',
    'MR-017',
    'CC-028',
    'CC-030'
]
stops_fg = stops_fg[~stops_fg['pba50_id'].isin(pba50_id_list)]

In [15]:
# Remove San Pablo BRT stop at San Pablo Ave & Cedar St
stops_fg = stops_fg[stops_fg['stop_nm'] != 'San Pablo Ave & Cedar St']

In [16]:
# Remove stops with the following stop_ids
stop_id_list = [
    'SI:GA',
    'SI:GG',
    'SI:HOT',
    'SI:ITA',
    'SI:ITG',
    'SI:RCC',
    'SI:T1',
    'SI:T2',
    'SI:T3',
    'SI:WFR',
    'OAKL',
    'SB:12048537',
    'SB:890002',
    'SB:890003',
    'SFIA',
    '70151',
    '70152'
]
stops_fg = stops_fg[~stops_fg['stop_id'].isin(stop_id_list)]

In [17]:
# Remove stops served by tideline ferry
stops_fg = stops_fg[~(stops_fg['agency_nm'] == 'Tideline Water Taxi')]

In [18]:
# Remove stops with open years 2036-2050
stops_fg = stops_fg[stops_fg['exp_open'] != '2036-2050']

In [19]:
# remove stops that are missing both route_id and pba50_id
# these stops do not have attributes to disambiguate them for the next step
stops_fg = stops_fg[~((stops_fg['route_id'].isnull()) & (stops_fg['pba50_id'].isnull()))]

In [20]:
# Update BART blue line routes
stops_fg.loc[stops_fg['route_id'].isin(['BA:Blue-Sun','BA:Blue-Wkd/Sat']),'route_id'] = 'BA:Blue'

In [21]:
# Update Van Ness BRT from under construction to existing
stops_fg.loc[stops_fg['ppa_nm'] == 'Van Ness BRT','status'] = 'Existing/Built'

In [22]:
# Remove stops within Priority Production Areas
stops_ppas = gpd.overlay(stops_fg, ppas, how='intersection')

# Add intersecting stop_ids to list
ppa_lst = stops_ppas.stop_id.to_list()

# Remove Altamont Corridor Express Vasco Road Station from list
# The station is on the edge of the PPA and should be retained. 
ppa_lst = list(filter(lambda a: a != 'CE:3400004', ppa_lst))

# Exclude stops witin PPAs
stops_fg = stops_fg[~stops_fg['stop_id'].isin(ppa_lst)]

In [23]:
unique_cols = [
    'agency_nm',
    'pba50_id',
    'ppa_nm',
    'route_id',
    'route_type',
    'status',
    'stop_id',
    'stop_nm'
]
stops_fg[stops_fg.duplicated(subset=unique_cols)].shape

(182, 19)

In [24]:
# There are 182 duplicate records on the fields in the unique list below
# These are mostly at BART stations where the station ID is the same for 
# both directions. These need to be dropped to correctly count routes that
# pass through these stations in later steps. 
req_cols = [
    'agency_nm',
    'pba50_id',
    'ppa_nm',
    'route_id',
    'route_type',
    'status',
    'stop_id',
    'stop_nm',
    'geometry'
]
stops_fg = stops_fg[req_cols].drop_duplicates(subset=unique_cols)

## Create stop station areas

- Find stops that are nearby:
    - Tram, Streetcar, Light Rail, and BRT within 75 ft (22.86 meters) of other stops are considered same stop
    - Rail, Ferry within 300 ft (91.44 meters) of other stops are considered same stop
- Dissolve stop areas to create station areas; this creates a multi-part geometry. 
- Explode multi-part station areas to single-part station areas
- Create station area id
- Assign station id to stops

In [25]:
# create a stops buffer gdf
stops_buff = stops_fg.copy().reset_index(drop=True)

In [26]:
# buffer stops_near geometry by meters
stops_buff['geometry'] = np.where(stops_buff['route_type'].isin(['Tram, Streetcar, Light Rail','Cable Tram','BRT']),
                                  stops_buff.geometry.buffer(22.86), 
                                  stops_buff.geometry.buffer(91.44))

In [27]:
# dissolve buffers into station areas
stations_diss = stops_buff[['geometry']].dissolve()

In [28]:
# create single-part areas
station_areas = stations_diss.explode(ignore_index=True, 
                                      index_parts=True)

In [29]:
station_areas['station_id'] = station_areas.apply(lambda x: create_unique_id(x), axis=1)

In [30]:
# intersect stops w/ station areas
stops_stations = gpd.sjoin(stops_fg, 
                           station_areas, 
                           how='left')

In [31]:
m = station_areas.explore(color='blue', style_kwds={'fillOpacity':1})
stops_fg.explore(m=m, color='red', style_kwds={'opacity':1})

In [32]:
# update stops with station id
stops_fg['station_id'] = stops_fg['stop_id'].map(stops_stations.groupby('stop_id')['station_id'].first())

## Drop duplicate routes from stations

In [33]:
stops_fg.shape

(1290, 10)

In [34]:
unique_cols = [
    'agency_nm',
    'ppa_nm',
    'route_id',
    'route_type',
    'status',
    'station_id'
]
stops_fg[stops_fg.duplicated(subset=unique_cols)].shape

(366, 10)

In [35]:
# drop duplicate route ids from stations
# 370 stations have duplicate route_ids - stations are identified by station_id
# these need to be dropped to prevent double counting of routes in downstream steps
req_cols = [
    'agency_nm',
    'ppa_nm',
    'route_id',
    'route_type',
    'status',
    'station_id',
    'geometry',
]
stations_fg = stops_fg.sort_values(by='stop_nm').drop_duplicates(subset=unique_cols)

## Count BART and Caltrain routes by station
- Add BART and Caltrain route ids to planned routes
- Group transit routes by station and agency
- Add count columns to station dataframe

In [36]:
def update_row_with_dict(df, dictionary, column):
    for key in dictionary.keys():
        df.loc[key, column] = dictionary.get(key)

In [37]:
# copy planned bart stops to a new dataframe
pln_list = [
    'BART to Silicon Valley Phase 1',
    'BART to Silicon Valley Phase 2', 
    'Irvington BART Infill Station'
]
bart = (stops_fg[stops_fg['ppa_nm']
                .isin(pln_list)]
 .copy())

In [38]:
# drop planned bart stops from original df
stations_fg = stations_fg[~stations_fg['ppa_nm']
                .isin(pln_list)]

In [39]:
# duplicate planned bart stops so there are two rows per stop
bart = bart.loc[bart.index.repeat(2)].reset_index(drop=True)

In [40]:
dict.fromkeys(bart.index.to_list())

{0: None,
 1: None,
 2: None,
 3: None,
 4: None,
 5: None,
 6: None,
 7: None,
 8: None,
 9: None,
 10: None,
 11: None,
 12: None,
 13: None}

In [41]:
# update planned bart stations w/ route ids by index and route id
ba_dict = {
    0: 'BA:Green',
    1: 'BA:Orange',
    2: 'BA:Green',
    3: 'BA:Orange',
    4: 'BA:Green',
    5: 'BA:Orange',
    6: 'BA:Green',
    7: 'BA:Orange',
    8: 'BA:Green',
    9: 'BA:Orange',
    10: 'BA:Green',
    11: 'BA:Orange',
    12: 'BA:Green',
    13: 'BA:Orange'
}
update_row_with_dict(df=bart, 
                     dictionary=ba_dict, 
                     column='route_id')

In [42]:
# concat updated bart dataframe to original stops df
stations_fg = pd.concat([stations_fg, bart], axis=0)

In [43]:
# update planned Caltrain stops with route id
pln_list = [
    'Downtown Extension',
    'Diridon Station Relocation'
]
stations_fg.loc[stations_fg['ppa_nm'].isin(pln_list), 'route_id'] = 'CT:Bullet'

In [44]:
# create a dataframe with only caltrain and bart stations
operator = [
    'Bay Area Rapid Transit',
    'Caltrain'
]
tier_1_2 = stations_fg[stations_fg['agency_nm'].isin(operator)].copy()

In [45]:
# Group transit routes by station and agency
tier_1_2_gp = tier_1_2.groupby(['agency_nm','station_id']).size().unstack(level=0)

In [46]:
# add bart route count to stations dataframe
stations_fg['bart_rt_ct'] = stations_fg['station_id'].map(tier_1_2_gp['Bay Area Rapid Transit'])

In [47]:
# add caltrain route count to stations dataframe
stations_fg['caltrain_rt_ct'] = stations_fg['station_id'].map(tier_1_2_gp['Caltrain'])

## Flag levels of transit service and rank stations

**Level of Transit Service**
- Tier 1: Rail station served by 3 BART lines or a BART line and Caltrain Baby Bullet
- Tier 2: Stop/station served by 2 BART lines or Caltrain Baby Bullet
- Tier 3: Stop/station served by 1 BART line, Caltrain, light rail transit, or bus rapid transit
- Tier 4: Commuter rail (SMART, ACE, Capitol Corridor) or ferry terminal (only if PDA at ferry terminal)

**Station rank**

Rank rail routes from 1-4 with 1 being the highest rank. This will be used along with tier to sort and drop duplicate routes, preserving stations with highest Level of Service and Route Rank. 
- Rail/Ferry: 1
- Tram, Streetcar, Lightrail: 2
- BRT: 3
- Cable Tram: 4

In [48]:
def flag_levels_of_service(row):
    light_rail_brt_op = [
        'AC Transit',
        'Caltrain',
        'San Francisco Municipal Transportation Agency',
        'Santa Clara Valley Transportation Authority'
    ]
    commuter_rail_ferry_op = [
        'Altamont Corridor Express',
        'Sonoma Marin Area Rail Transit',
        'Capitol Corridor Joint Powers Authority',
        'Golden Gate Ferry',
        'San Francisco Bay Ferry',
        'Tri-Valley - San Joaquin Valley Regional Rail Authority',
        'Amtrak'
    ]
    # Tier 1
    if (
        row['bart_rt_ct'] >= 3
    ) or (
        (row['bart_rt_ct'] > 0) and 
        (row['caltrain_rt_ct'] > 0) and 
        (row['route_id'] == 'CT:Bullet') 
    ):
        return 1
    # Tier 2
    elif (
        row['bart_rt_ct'] == 2
    ) or (
        (row['caltrain_rt_ct'] > 0) and 
        (row['route_id'] == 'CT:Bullet')
    ):
        return 2
    # Tier 3
    elif (row['bart_rt_ct'] > 0) or (row['agency_nm'] in light_rail_brt_op):
        return 3
    # Tier 4
    elif row['agency_nm'] in commuter_rail_ferry_op:
        return 4
    else:
        return None

In [49]:
stations_fg['level_of_service'] = stations_fg.apply(lambda row: flag_levels_of_service(row), axis=1)

In [50]:
# Create a rank for station route type
# this will be used in next steps where station_ids have dup
# want to sort by level of service, and route_type rank
# keeping records with highest level of service and rank with 1 being the highest
route_type_rank = {
    'Rail': 1,
    'Ferry': 1, 
    'Tram, Streetcar, Light Rail': 2, 
    'BRT': 3,
    'Cable Tram': 4
}

stations_fg['route_rank'] = stations_fg['route_type'].map(route_type_rank)

In [51]:
# Write stations to file
(stations_fg
 .to_crs('EPSG:4326')
 .to_file(os.path.join(work_dir, 'Data Outputs', 'stations_los.geojson'), driver='GeoJSON', index=False)
)

In [52]:
#Count unique operators
#Implement a solution to classify stations with multiple operators as a pre-step to dropping duplicates 
#This will allow for keeping of operators if a stop/station is served by just one operator  
#and for classification if the station is served by more than one operator
unique_operators = (stations_fg.groupby('station_id')['agency_nm'].nunique()
)

In [53]:
# add unique operators column to stations df
stations_fg['unique_operators'] = stations_fg['station_id'].map(unique_operators)

In [54]:
# add new operator column
stations_fg['station_operator'] = np.where(stations_fg['unique_operators'] > 1, 
                                           'Multiple Operators', 
                                           stations_fg['agency_nm']
                                          )

In [55]:
sub_cols = ['stop_nm',
            'station_id',
            'station_operator',
            'status',
            'route_type',
            'level_of_service',
            'route_rank',
            'geometry'
           ]
stations_fg_dd = (stations_fg[sub_cols]
                  .sort_values(by=['level_of_service','route_rank'])
                  .drop_duplicates(subset=('station_id','station_operator'),
                                keep='first')
                 )

#### Save to project folder and publish to AGOL

In [56]:
stations_path = os.path.join(work_dir, 'Data Outputs', 'stations_los_dedup.geojson')
(stations_fg_dd
 .to_crs('EPSG:4326')
 .to_file(stations_path, driver='GeoJSON', index=False)
)

In [57]:
(station_areas
 .to_crs('EPSG:4326')
 .to_file(os.path.join(work_dir, 'Data Outputs', 'station_areas.geojson'), driver='GeoJSON',index=False)
)

In [58]:
# # Publish item to ArcGIS Online
# item_properties = {'type': 'GeoJson', 
#                    'description':'This dataset represents fixed guideway transit stops, with TOC policy levels of service classification ' + 
#                    'This is a draft dataset and questions about the data should be directed to Raleigh McCoy or Kara Vuicich.',
#                    'title':'DRAFT Fixed Guideway Stations', 
#                    'tags': 'bay area, transit, stops, draft, toc',
#                   'overwrite':'true'}
# item = gis.content.add(item_properties, data=stations_path)
# item.publish(file_type='geojson')

In [59]:
# Update item on ArcGIS Online
stations_itemid = 'a9ea02ddee5a4e7ea7fb9285733a667b'
#stations_itemid = '43f518322cc849169c96ebc41b2dc32b'
stations_agol = gis.content.get(stations_itemid)

In [60]:
from arcgis.features import FeatureLayerCollection
stations = FeatureLayerCollection.fromitem(stations_agol)

In [61]:
stations.properties.capabilities

'Query'

In [63]:
stations.manager.overwrite(data_file=stations_path)

AttributeError: 'FeatureLayerCollection' object has no attribute 'replicas'

## Create transit rich station areas

In [None]:
toc_areas = stations_fg_dd.copy()

In [None]:
# create 1/2 mile buffer area around fixed guideway stops - deduplicated
toc_areas['geometry'] = toc_areas.geometry.buffer(804.672)

In [None]:
toc_areas_path = os.path.join(work_dir, 'Data Outputs', 'toc_policy_area_1_2_mi_buffer.geojson')
(toc_areas
 .to_crs('EPSG:4326')
 .to_file(toc_areas_path, driver='GeoJSON', index=False)
)

In [None]:
# # Publish item to ArcGIS Online
# item_properties = {'type': 'GeoJson', 
#                    'description':'This dataset represents fixed guideway transit stops with a 1/2 mile buffer applied ' + 
#                    'This is a draft dataset and questions about the data should be directed to Raleigh McCoy or Kara Vuicich.',
#                    'title':'DRAFT Transit Rich Policy Area Half Mile Buffer', 
#                    'tags': 'bay area, transit, draft, toc',
#                    'overwrite':'true'}
# item = gis.content.add(item_properties, data=toc_areas_path)
# item.publish(file_type='geojson')

In [None]:
toc_areas_itemid = '3df9742c87a849a8a09342bc58da2dd6'
toc_areas_agol = gis.content.get(toc_areas_itemid)
#toc_areas_agol.update(data=toc_areas_path)

In [None]:
from arcgis.features import FeatureLayerCollection
toc_areas = FeatureLayerCollection.fromitem(toc_areas_agol)

In [None]:
toc_areas.properties.capabilities

In [None]:
toc_areas.manager.overwrite(data_file=toc_areas_path)

## Determine TOC policy applicability area

Applicable areas include:
- PDAs areas within 1/2 mile of fixed-guideway stations
- Areas outside of PDAs within 1/2 mile of fixed-guideway station

In [None]:
m = water.explore(color='blue')
toc_areas.explore(m=m,color='green')

In [None]:
# Erase water areas
toc_areas = gpd.overlay(toc_areas, water, how='difference')

In [None]:
toc_areas.explore()

In [None]:
# calculate toc area
toc_areas['toc_area'] = round(toc_areas.geometry.area, 2)

In [None]:
pda['pda_area'] = round(pda.geometry.area, 2)

In [None]:
toc_pda_over = gpd.overlay(toc_areas, pda, how='identity')

In [None]:
# add overlay area
toc_pda_over['overlay_area'] = round(toc_pda_over.geometry.area, 2)

In [None]:
# calculate overlay percent
toc_pda_over['overlay_pct'] = round((toc_pda_over['overlay_area'] / toc_pda_over['toc_area'] * 100), 2)

In [None]:
# calculate area of pda within toc
toc_pda_over['pda_pct'] = round((toc_pda_over['overlay_area'] / toc_pda_over['pda_area'] * 100), 2)

In [None]:
# flag pdas
toc_pda_over['pda'] = np.where(toc_pda_over['pda_name'].notnull(), 1, 0)

#### Flag areas where pda area within toc area is greater than 20% of total pda area

- Within areas where this is true **all** pda areas apply

20% was chosen to avoid issues that come up when performing spatial overlay or intersect analyses. The issue often is that the overlay or intersecting area may be very small or or the edges of the layer may touch creating a 'false positive' relationship. To avoid that, this analysis requires that 20% of the total original pda area remain after the overlay operation. Or said another way, that 20% or more of the original PDA should be within the TRA (1/2 buffer) area.

In [None]:
# get series of stations with pda areas greater than 20% total pda area
station_list = toc_pda_over.loc[toc_pda_over['pda_pct'] >= 20, 'station_id'].unique()

In [None]:
station_df = pd.DataFrame({'unique_station_id':station_list})

In [None]:
station_df['pda_area_gt20'] = 1

In [None]:
# add flag column to toc pda overlay dataframe
toc_pda_over['pda_area_gt20'] = (toc_pda_over['station_id']
                                 .map(station_df.set_index('unique_station_id')['pda_area_gt20']))

#### Flag areas where sum of all pda areas within toc area is greater than 20% of total toc area
- Within areas where this is true **all** pda areas apply

20% was chosen to avoid issues that come up when performing spatial overlay or intersect analyses. The issue often is that the overlay or intersecting area may be very small or or the edges of the layer may touch creating a 'false positive' relationship. To avoid that, this analysis requires that **sum of all pda areas within the TRA** be 20% or more of the PDA/TRA overlay area.

In [None]:
def pda_or_tra(overlay_pct, pda_flag):
    if (pda_flag == 1) & (overlay_pct >= 20):
        # 1 for PDA
        return 1
    if (pda_flag == 0) & (overlay_pct >= 50):
        # 2 for TOC no pda
        return 2

In [None]:
out_cols = [
 'station_id',
 'toc_area',
 'overlay_area',
 'pda']
toc_pda_gp = (toc_pda_over[out_cols]
              .groupby(['station_id','pda','toc_area'],
                       dropna=False)
              .agg(overlay_area_sum=('overlay_area','sum'), count=('overlay_area','count'))
              .reset_index()
             )

In [None]:
# Calculate overlay percent
toc_pda_gp['overlay_pct'] = round((toc_pda_gp['overlay_area_sum'] / toc_pda_gp['toc_area'] * 100), 2)

In [None]:
# Flag whether the area is a pda area or a tra area
toc_pda_gp['pda_or_tra'] = toc_pda_gp.apply(lambda x: pda_or_tra(x['overlay_pct'], x['pda']), axis=1)

In [None]:
# Drop duplicate records from dataset
toc_pda_gp_dd = (toc_pda_gp
                 .sort_values(by='pda_or_tra')
                 .drop_duplicates(subset='station_id'))

In [None]:
# Add flag to original dataset
pda_areas = toc_pda_gp_dd[toc_pda_gp_dd['pda_or_tra'] == 1]
toc_pda_over['pda_sumarea_gt20'] = toc_pda_over['station_id'].map(pda_areas.set_index('station_id')['pda_or_tra'])

#### Flag areas where toc area greater than 50% 
- Within areas where this is true **all** toc area applies 

In [None]:
# Add toc flag to original dataset
toc_areas = toc_pda_gp_dd[toc_pda_gp_dd['pda_or_tra'] == 2]
toc_pda_over['toc_area_gt50'] = toc_pda_over['station_id'].map(toc_areas.set_index('station_id')['pda_or_tra'])

#### flag final pda or tpa area

In [None]:
def final_pda_or_tra(pda_flag, pda_area_flag, pda_sum_area_flag, toc_area_flag):
    if (pda_area_flag == 1) & (pda_flag == 1):
        return 1
    elif (toc_area_flag == 2) & (pda_area_flag != 1) & (pda_sum_area_flag != 1):
        return 2
    elif (pda_sum_area_flag == 1) & (pda_flag == 1):
        return 1

In [None]:
toc_pda_over['pda_tra_final_cat'] = (toc_pda_over
                                     .apply(lambda x: final_pda_or_tra(x['pda'],
                                                                       x['pda_area_gt20'],
                                                                       x['pda_sumarea_gt20'],
                                                                       x['toc_area_gt50']), 
                                                       axis=1))

## Remove designated pda areas from toc designated areas

In many cases, an area designated as a toc may overlap with an area designated as a pda area. This results because, in many cases, it can be true that a pda may be 20% within a transit-rich area, and a neighboring transit-rich area may overlay the same pda by more than 50%. In these cases, the intersecting area should be removed with priority given to pda areas.

In [None]:
designated_pda = toc_pda_over[toc_pda_over['pda_tra_final_cat'] == 1]

In [None]:
designated_toc = toc_pda_over[toc_pda_over['pda_tra_final_cat'] == 2]

In [None]:
toc_intersect_pda = designated_toc.overlay(designated_pda[['geometry']], 
                                                how='symmetric_difference', 
                                                keep_geom_type=True)

In [None]:
# create toc remaining area gdf
toc_remaining_area = toc_intersect_pda[toc_intersect_pda['station_id'].notnull()].copy()

In [None]:
# explode multi-part geometries into single part
toc_remaining_area = toc_remaining_area.explode(ignore_index=True, index_parts=False)

In [None]:
# add an area to the remaining gdf
toc_remaining_area['remaining_area'] = round(toc_remaining_area.geometry.area, 2)

In [None]:
# find percent of remaining area
toc_remaining_area['remain_area_pct'] = round((toc_remaining_area['remaining_area'] / 
                                               toc_remaining_area['toc_area'] * 100), 2)

In [None]:
# remove remaining areas that are less than 1 percent of original toc area
toc_remaining_area = toc_remaining_area[toc_remaining_area['remain_area_pct'] > 1]

In [None]:
m = designated_pda.explore(color='red')
designated_toc.explore(m=m)

In [None]:
# create a copy of the original toc_pda_overlay gft
toc_pda_over_final = toc_pda_over.copy()

In [None]:
intersect_ids = toc_remaining_area['station_id'].unique().tolist()

In [None]:
# drop remaining station ids from toc pda overlay dataset
toc_pda_over_final = toc_pda_over_final[~toc_pda_over_final['station_id'].isin(intersect_ids)]

In [None]:
# concatenate toc_pda_final with toc remaining gdf
toc_pda_over_final = pd.concat([toc_pda_over_final, toc_remaining_area])

In [None]:
(toc_pda_over_final[toc_pda_over_final['pda_tra_final_cat'].notnull()]
 .explore(column='pda_tra_final_cat', 
          categorical= True,
          cmap= ['red','dodgerblue'],
          style_kwds = {'opacity':1, 
                        'fillOpacity':.75
                       }
         )
)

## Add county and city to TRAs

In [None]:
tra = toc_pda_over_final[toc_pda_over_final['pda_tra_final_cat'] == 2]

In [None]:
pda_final = toc_pda_over_final[toc_pda_over_final['pda_tra_final_cat'] == 1]

In [None]:
# map human-readable names to fipco
cnty = {
    '001':'Alameda',
    '013':'Contra Costa',
    '041':'Marin',
    '055':'Napa',
    '075':'San Francisco',
    '081':'San Mateo',
    '085':'Santa Clara',
    '095':'Solano',
    '097':'Sonoma'
}
jurisdictions['county'] = jurisdictions['fipco'].map(cnty)

In [None]:
cols = [
    'stop_nm',
    'station_id',
    'station_operator',
    'status',
    'route_type',
    'level_of_service',
    'route_rank',
    'join_key',
    'pda_name',
    'pba50_chng',
    'pda',
    'pda_area_gt20',
    'pda_sumarea_gt20',
    'toc_area_gt50',
    'pda_tra_final_cat',
    'geometry'
             ]
tra_jurs = gpd.overlay(tra[cols], 
                       jurisdictions[['jurname','county','geometry']], 
                       keep_geom_type=True,
                       how='identity')

In [None]:
tra_jurs.rename(columns={'jurname':'jurisdicti'},inplace=True)

In [None]:
m = jurisdictions.explore(color='tan')
tra_jurs[['county','jurisdicti','geometry']].explore(m=m)

In [None]:
tra_jurs[tra_jurs['jurisdicti'].isnull()].explore()

In [None]:
# remove nulls. These are slivers that resulted from the identity operation
tra_final = tra_jurs[~tra_jurs['jurisdicti'].isnull()]

In [None]:
final_cols = [
    'county',
    'jurisdicti',
    'stop_nm',
    'station_id',
    'station_operator',
    'status',
    'route_type',
    'level_of_service',
    'route_rank',
    'join_key',
    'pda_name',
    'pba50_chng',
    'pda',
    'pda_area_gt20',
    'pda_sumarea_gt20',
    'toc_area_gt50',
    'pda_tra_final_cat',
    'geometry'
             ]
toc_final = pd.concat([pda_final[final_cols], tra_final[final_cols]])

## Save locally to project directory and publish to AGOL

In [None]:
toc_path = os.path.join(work_dir, 'Data Outputs', 'toc_policy_area_v2.geojson')
(toc_final
 .to_crs('EPSG:4326')
 .to_file(toc_path, driver='GeoJSON', index=False)
)

In [None]:
# # Publish item to ArcGIS Online
# item_properties = {'type': 'GeoJson', 
#                    'snippet':'This is a draft of the TOC policy areas. For more information on these data, contact Raleigh McCoy or Kara Vuicich.',
#                    'title':'DRAFT Transit Oriented Communities Policy Area', 
#                    'tags': 'bay area, land use, transit, draft, toc'}
# item = gis.content.add(item_properties, data=toc_path, folder='Transit Oriented Communities')

In [None]:
# # Analyze item and publish
# analyze = gis.content.analyze(item=item, file_type='geojson')
# pp = analyze['publishParameters']
# pp['name'] = 'toc_policy_area_v2'
# pitem = item.publish(publish_parameters=pp, file_type='geojson')

In [None]:
# Update item
toc_itemid = '182e3ee3f7b54bc1b84c26271353f5e7'
toc_agol = gis.content.get(toc_itemid)

In [None]:
toc_agol.update(data=toc_path)