In [1]:
import os
import pandas as pd
import fiona
import geopandas as gpd

In [2]:
match_dir = os.path.join('/Users',
                        'jcroff',
                        'Box',
                        'DataViz Projects',
                        'Spatial Analysis and Mapping',
                        'Active Transportation Plan',
                        'Data',
                        'shst_match_results',
                        'by_data_source_matched')

In [3]:
toole_dir = os.path.join('/Users',
                        'jcroff',
                        'Box',
                        'MTC Data for Toole Design')

In [4]:
os.listdir(match_dir)

['actc_bike_network_epsg4326_matched.geojson',
 'batc_bike_network_epsg4326_matched.geojson',
 'caltrans_d4_bike_network_epsg4326_matched.geojson',
 'ccta_bike_network_epsg4326_matched.geojson',
 'nvta_bike_network_epsg4326_matched.geojson',
 'oakland_bike_network_epsg4326_matched.geojson',
 'scta_bike_network_epsg4326_matched.geojson',
 'sfcta_bike_network_epsg4326_matched.geojson',
 'sta_bike_network_epsg4326_matched.geojson',
 'tam_bike_network_epsg4326_matched.geojson',
 'vta_bike_network_epsg4326_matched.geojson']

In [5]:
os.listdir(toole_dir)

['Bay Area Trails Collaborative Network',
 'County, City and Caltrans Bike Ped Network and Counts Data',
 'data_for_Toole.gdb',
 'GIS Data Request.xlsx',
 'Local and Regional Transit',
 'MTC Mobility Hubs top25_final_20210308.html',
 'MTC_mobility_hubs_R_final.zip',
 'PBA50_FinalBlueprintLandUse_TAZdata_dataDictionary.xlsx',
 'travelModel2_v11_link_csv.csv',
 'travelModel2_v11_link_json.zip']

In [6]:
fiona.listlayers(os.path.join(toole_dir,'data_for_Toole.gdb'))

['census_2002_2018_lehd_by_block',
 'regional_bikeway_network_2019',
 'tomtom_cityLimits_region',
 'tomtom_cityLimits_region_clipped',
 'tomtom_countyBoundaries_region',
 'tomtom_countyBoundaries_region_clipped',
 'travelModel2_v11_node',
 'travelModel2_v11_shape',
 'BayAreaTrailsCollaborative_TrailNetwork',
 'equity_priority_communities_2020_acs2018',
 'priority_development_areas_pba2050',
 'travelModel2_landUseByTAZ_2015',
 'travelModel2_landUseByTAZ_2035',
 'travelModel2_landUseByTAZ_2050',
 'travelModel2_v11_link']

## Read geojson into dataframe

In [7]:
actc = gpd.read_file(os.path.join(match_dir,'actc_bike_network_epsg4326_matched.geojson'),
                     driver='GeoJSON')

In [8]:
actc.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 87856 entries, 0 to 87855
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   shstReferenceId     87856 non-null  object  
 1   shstGeometryId      87856 non-null  object  
 2   fromIntersectionId  87856 non-null  object  
 3   toIntersectionId    87856 non-null  object  
 4   pp_id               87856 non-null  int64   
 5   pp_street_nam       87856 non-null  object  
 6   pp_trail_name       87856 non-null  object  
 7   pp_jurisdicti       87856 non-null  object  
 8   pp_existing_f       87856 non-null  object  
 9   pp_planned_fa       87856 non-null  object  
 10  pp_source           87856 non-null  object  
 11  pp_ex_class         54594 non-null  float64 
 12  pp_pln_class        50008 non-null  float64 
 13  pp_shape_length     87856 non-null  float64 
 14  pp_mtc_facility_id  87856 non-null  object  
 15  source              87856 no

In [9]:
actc.rename(columns={'pp_ex_class':'cma_ex_class',
                     'pp_pln_class':'cma_pln_class'},inplace=True)

In [10]:
actc.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 87856 entries, 0 to 87855
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   shstReferenceId     87856 non-null  object  
 1   shstGeometryId      87856 non-null  object  
 2   fromIntersectionId  87856 non-null  object  
 3   toIntersectionId    87856 non-null  object  
 4   pp_id               87856 non-null  int64   
 5   pp_street_nam       87856 non-null  object  
 6   pp_trail_name       87856 non-null  object  
 7   pp_jurisdicti       87856 non-null  object  
 8   pp_existing_f       87856 non-null  object  
 9   pp_planned_fa       87856 non-null  object  
 10  pp_source           87856 non-null  object  
 11  cma_ex_class        54594 non-null  float64 
 12  cma_pln_class       50008 non-null  float64 
 13  pp_shape_length     87856 non-null  float64 
 14  pp_mtc_facility_id  87856 non-null  object  
 15  source              87856 no

In [11]:
oak = gpd.read_file(os.path.join(match_dir,'oakland_bike_network_epsg4326_matched.geojson'),
                    driver='GeoJSON')

In [12]:
oak.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 11886 entries, 0 to 11885
Data columns (total 28 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   shstReferenceId     11886 non-null  object  
 1   shstGeometryId      11886 non-null  object  
 2   fromIntersectionId  11886 non-null  object  
 3   toIntersectionId    11886 non-null  object  
 4   pp_id               11886 non-null  int64   
 5   pp_shape_leng       11886 non-null  float64 
 6   pp_roadway          11886 non-null  object  
 7   pp_beginning        11886 non-null  object  
 8   pp_ending           11886 non-null  object  
 9   pp_proposedcl       11886 non-null  object  
 10  pp_existingcl       11886 non-null  object  
 11  pp_prioritypr       11886 non-null  object  
 12  pp_vision           11886 non-null  int64   
 13  pp_oneway_div       11886 non-null  object  
 14  pp_longdist_c       11886 non-null  int64   
 15  pp_otheragenc       11886 no

In [13]:
oak.rename(columns={'pp_ex_class':'oak_ex_class','pp_pln_class':'oak_pln_class'},inplace=True)

In [14]:
oak.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 11886 entries, 0 to 11885
Data columns (total 28 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   shstReferenceId     11886 non-null  object  
 1   shstGeometryId      11886 non-null  object  
 2   fromIntersectionId  11886 non-null  object  
 3   toIntersectionId    11886 non-null  object  
 4   pp_id               11886 non-null  int64   
 5   pp_shape_leng       11886 non-null  float64 
 6   pp_roadway          11886 non-null  object  
 7   pp_beginning        11886 non-null  object  
 8   pp_ending           11886 non-null  object  
 9   pp_proposedcl       11886 non-null  object  
 10  pp_existingcl       11886 non-null  object  
 11  pp_prioritypr       11886 non-null  object  
 12  pp_vision           11886 non-null  int64   
 13  pp_oneway_div       11886 non-null  object  
 14  pp_longdist_c       11886 non-null  int64   
 15  pp_otheragenc       11886 no

## Test group by of duplicated records

Group by shst id columns and bike class columns; retain mtc_facility id as list

In [15]:
#Check for duplicates
oak[oak[['shstReferenceId','shstGeometryId']].duplicated()]

Unnamed: 0,shstReferenceId,shstGeometryId,fromIntersectionId,toIntersectionId,pp_id,pp_shape_leng,pp_roadway,pp_beginning,pp_ending,pp_proposedcl,...,pp_lanemile_2,pp_centerline,pp_roadwayord,pp_shape_length,pp_source,oak_ex_class,oak_pln_class,pp_mtc_facility_id,source,geometry
126,4986e051ce7ba7f469ec9305038f13a8,0ed42c1e507abb18e7b0fe523c61a713,3224ab7c86cb877d8de4c036c9e13d3e,af95bc2b171316ddb936a91c65d6bc5b,824,88.083853,Waterfront Trail #24,,,1,...,0.03,0.02,28.0,0.000282,City of Oakland,1.0,1.0,c719c482-2e22-4123-a3ed-f86744b24800,1_bike_rules\oakland_bike_network_epsg4326.out...,"LINESTRING (-122.23269 37.77072, -122.23268 37..."
129,ef34ebbec77f568bf60889a7ca424bcb,0ed42c1e507abb18e7b0fe523c61a713,af95bc2b171316ddb936a91c65d6bc5b,3224ab7c86cb877d8de4c036c9e13d3e,824,88.083853,Waterfront Trail #24,,,1,...,0.03,0.02,28.0,0.000282,City of Oakland,1.0,1.0,c719c482-2e22-4123-a3ed-f86744b24800,1_bike_rules\oakland_bike_network_epsg4326.out...,"LINESTRING (-122.23255 37.77069, -122.23266 37..."
275,85e34f2ce1303a96f91187a012222e3e,aa1c19c0bf002c10a8c5d17e55fb73db,9cd633e44806ad4fc4ddd008714cb2d6,bb371c559837e9050de08b6a34ada733,241,1219.421711,Foothill Blvd,41st Ave,45th Ave,2B,...,0.46,0.23,13.0,0.003826,City of Oakland,3.0,2.0,310fb8a8-7836-4824-8e4d-0fc3da5a181a,1_bike_rules\oakland_bike_network_epsg4326.out...,"LINESTRING (-122.21387 37.77650, -122.21366 37..."
290,467d0e2a11344d1e497e68cbcc04b8cc,aa1c19c0bf002c10a8c5d17e55fb73db,bb371c559837e9050de08b6a34ada733,9cd633e44806ad4fc4ddd008714cb2d6,241,1219.421711,Foothill Blvd,41st Ave,45th Ave,2B,...,0.46,0.23,13.0,0.003826,City of Oakland,3.0,2.0,310fb8a8-7836-4824-8e4d-0fc3da5a181a,1_bike_rules\oakland_bike_network_epsg4326.out...,"LINESTRING (-122.21366 37.77632, -122.21387 37..."
341,f6afcec45b5df6170b4243aa1d806123,ac384d4407cfcc9f5d3bb71efb2489d7,3798afb343576b1a721acf7bb9ed6338,80e56a451d1a1afc93e0018a32983571,316,2937.129209,Lake Temescal Path,Broadway,Broadway Ter,1,...,1.11,0.56,0.0,0.008739,City of Oakland,1.0,1.0,b03204cc-89ab-4d5e-8eae-0a2da4fd9a82,1_bike_rules\oakland_bike_network_epsg4326.out...,"LINESTRING (-122.23086 37.84867, -122.23086 37..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11822,535ecf0a7e93948e3c5a81cfb1977b6f,5f0cb94eb5b86e876eb495c2d5654323,912c009b55ced41c077c88479fa2bd47,002a47375248af272aeb670cb741d674,987,1495.301643,Hegenberger Rd,"I-880 Ramps, south","I-880 Ramps, north",4,...,0.28,0.28,4.4,0.004113,City of Oakland,,4.0,8ff3177d-47e2-4307-9e2f-ceb7a756397c,2_car_rules\oakland_bike_network_epsg4326.out....,"LINESTRING (-122.19665 37.73944, -122.19666 37..."
11828,1cf91f6443f7fa6f2e31ca91bda5f1b6,fe4cf6ef220e4e1138a45a75db51df61,bfcaa6aad25b455ad425d1de9b4aeb88,05497cf4f5abe2881b6287c503f57ee5,403,2262.190839,7th St,Fallon St,5th Ave,4,...,0.86,0.43,12.0,0.007432,City of Oakland,2.0,4.0,93be08aa-d774-4485-885a-e94770f95906,2_car_rules\oakland_bike_network_epsg4326.out....,"LINESTRING (-122.26104 37.79351, -122.26072 37..."
11833,8645791fb2ad9e49452579d99f69a6a2,675519902c7809e1ec260a21808f0cc7,d04be73e50e562a5edcd39d85b533586,dce13e7961304828d1ce92035f639d62,403,2262.190839,7th St,Fallon St,5th Ave,4,...,0.86,0.43,12.0,0.007432,City of Oakland,2.0,4.0,93be08aa-d774-4485-885a-e94770f95906,2_car_rules\oakland_bike_network_epsg4326.out....,"LINESTRING (-122.25815 37.79269, -122.25804 37..."
11835,ffdca4695812e7f9e62df0db30c07ae6,31a17f753fbc387eef874a5713cc7238,f7c84d3a0b4fd60e4d5008732ca7b83d,dcb4046d8effa067ed6c4f35e3509a1f,2166,4376.371522,Stanford Ave,Vallejo St,Berkeley border,1,...,1.66,0.83,0.0,0.014359,City of Oakland,,1.0,2952f0c6-55ae-4068-93cc-de8cba507393,2_car_rules\oakland_bike_network_epsg4326.out....,"LINESTRING (-122.27308 37.84612, -122.27311 37..."


In [16]:
#Dissolve by shared streets id columns and bike class columns
sub_cols = ['shstReferenceId','shstGeometryId','oak_ex_class','oak_pln_class','pp_mtc_facility_id']
diss_cols = ['shstReferenceId','shstGeometryId','oak_ex_class','oak_pln_class']
oak_group_test = (oak[sub_cols].
                  groupby(diss_cols)['pp_mtc_facility_id']
                  .apply(lambda x: list(x)).reset_index())

In [17]:
oak_group_test['fac_diss_count'] = oak_group_test['pp_mtc_facility_id'].apply(lambda x: len(x))

In [18]:
oak_group_test[['pp_mtc_facility_id','fac_diss_count']][oak_group_test['fac_diss_count'] > 1]

Unnamed: 0,pp_mtc_facility_id,fac_diss_count
26,"[79ebf29a-0a9e-47c4-ac4f-efc2ffc8e09a, 09e7b81...",2
40,"[be266bc9-9aa1-4a14-9938-1c0131378447, 4a359de...",2
42,"[293e96ec-1bd6-4d1d-995b-09be9642a2fe, 6f77dad...",2
61,"[93ea24c5-79c8-401c-b5a4-0f824bb0c3f5, 75f01fe...",2
98,"[f02c63cb-ccc8-4c85-a7a6-ac129e52b2ca, 976a5d1...",2
...,...,...
6920,"[abeff34b-b6ff-4602-a853-bacd900a6986, 8d7ba71...",3
6927,"[aa43a38a-4df6-443f-accf-abc3e2cfe4b9, 9003fcb...",2
6967,"[291a43c0-928c-4e12-8e1f-8e6e2539c296, 63a86e6...",2
6981,"[207b5095-1cbc-46cf-90fe-da6bd6db5d51, 6a09c0a...",2


In [19]:
#Duplicates still exist. Why? 
oak_group_test[oak_group_test[['shstReferenceId','shstGeometryId']].duplicated()]

Unnamed: 0,shstReferenceId,shstGeometryId,oak_ex_class,oak_pln_class,pp_mtc_facility_id,fac_diss_count
17,010ffd02f550aae59c98b93fe7854f73,7a053e6a1568a6a6b737c39be4beaf95,3.0,2.0,[491eec43-b389-49fd-9e05-66e53504ba8d],1
21,014104033f46bb3af63ef4557164f2f5,daba5f1ad45122a1a9a43d7ef93280ad,2.0,4.0,[4a9e55a2-3319-4d7a-9bcd-70d76c7640ea],1
44,01b88bef700aa0469ae55eb26c763520,644eee67d7b3991f2266d2eddb01a362,2.0,1.0,[5c690a89-7322-4743-a34e-827dfb799d50],1
52,024eab1874990bad0895731749837573,77d057d0c3edef3a1699a1ff66b29d0b,3.0,3.0,[2f638f3e-3775-40ec-bb72-3118307719ab],1
57,025983e01cc43440216f362e8217a814,b3d699e92d43d9ac1fb4834a8dc6f724,3.0,2.0,[e5f8876d-e0dc-47cc-bd90-883b6649fa19],1
...,...,...,...,...,...,...
6893,fc71bf81f1a57c87a7c6da4a83fc3429,d7fed0d482024d6b56c3ca7de0d52bee,3.0,2.0,[4c3a0957-d5fa-4261-a115-de709252e8cf],1
6924,fdb9d0bb592cb893419b532e6f5c1339,282444aeb5a9a39e82eb78131ca1d5f1,3.0,2.0,[013997bc-bd34-40ac-b17f-06d59b6749d7],1
6933,fe21159ca7ec5a3425b708394005c8ac,7a053e6a1568a6a6b737c39be4beaf95,3.0,2.0,[491eec43-b389-49fd-9e05-66e53504ba8d],1
6949,fed2fe70c18e1072b4bf07541791f2ad,4d44342c592ef89fa089a9d661217466,3.0,2.0,[7f5f4a9b-9386-4ac6-ac9b-009bb4f7b69d],1


In [20]:
#Different Classes 
oak_group_test[(oak_group_test['shstReferenceId'] == '010ffd02f550aae59c98b93fe7854f73') & 
               (oak_group_test['shstGeometryId'] == '7a053e6a1568a6a6b737c39be4beaf95')]

Unnamed: 0,shstReferenceId,shstGeometryId,oak_ex_class,oak_pln_class,pp_mtc_facility_id,fac_diss_count
16,010ffd02f550aae59c98b93fe7854f73,7a053e6a1568a6a6b737c39be4beaf95,2.0,2.0,[435b4202-c56a-4697-914a-4c367c1aec28],1
17,010ffd02f550aae59c98b93fe7854f73,7a053e6a1568a6a6b737c39be4beaf95,3.0,2.0,[491eec43-b389-49fd-9e05-66e53504ba8d],1
