# This notebook goes through building drive, walk, and bike centroids and centorid connectors

1. centroid nodes come from existing network
2. drive centroid connector built by finding the new non-freeway drive node closest to the existing network's drive loading point
3. bike and walk centroid connector built by finding the closest new walk and bike node to the centroid

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from simpledbf import Dbf5
from shapely.geometry import Point, shape, LineString
from scipy.spatial import cKDTree
import json
import math
import fiona
import os

%matplotlib inline

import matplotlib.pyplot as plt

pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from methods import link_df_to_geojson
from methods import point_df_to_geojson
from methods import reproject
from methods import num_of_drive_loadpoint_per_centroid
from methods import num_of_walk_bike_loadpoint_per_centroid
from methods import find_new_load_point
from methods import generate_centroid_connectors
from methods import consolidate_cc

from methods import project_geometry
from methods import project_gdf
from methods import buffer1
from methods import buffer2
from methods import get_non_near_connectors

In [4]:
step6_output_folder = "../../data/interim/step6_gtfs/version_12/"
county_shape_folder = "../data/external/county/"

# Inputs

In [5]:
%%time
# Network Standard files from Step 6

print('read link')
link_file = step6_output_folder + "link.feather"
link_df = pd.read_feather(link_file)

print('read node')
node_file = step6_output_folder + "node.geojson"
node_gdf = gpd.read_file(node_file)

print('read shape')
shape_file = step6_output_folder + "shape.geojson"
shape_gdf = gpd.read_file(shape_file)

read link
read node
read shape
Wall time: 2min 33s


In [6]:
# TAZ and MAZ polygons

taz_poly_gdf = gpd.read_file('../../data/external/maz_taz/tazs_TM2_v2_2.shp')
taz_poly_gdf = taz_poly_gdf.to_crs(epsg = 4326)

maz_poly_gdf = gpd.read_file('../../data/external/maz_taz/mazs_TM2_v2_2.shp')
maz_poly_gdf = maz_poly_gdf.to_crs(epsg = 4326)

In [7]:
# legacy lookup for assignable
# initially this file is used to build centroid connectors; as we have newer versions of the network,
# updated assignable lookup is used instead of this one.
tm2_lookup_df = pd.read_csv(r'C:\Users\{}\Documents\GitHub\Lasso\mtc_data\lookups\legacy_tm2_attributes.csv'.format(os.getenv('USERNAME')))
tm2_lookup_df.head()

Unnamed: 0,shstReferenceId,A_node,B_node,lanes,assignable,ft
0,00000b467d2e08f9abf13eeafee3ed46,3027596,3028181,1,1,Collector
1,000018062272093dbaa5d49303062454,5005807,5010629,1,1,Collector
2,000018a23d1330dcfbae79f44e9fca1a,2002804,2113098,1,1,Major Arterial
3,00004e60296f9a9faa45248283397c28,2042055,2049818,2,1,Major Arterial
4,00008f3db470b7993ed7efe3fdbbe371,1005731,1004692,1,1,Collector


In [8]:
# assignable analysis result from RSG
dbf = Dbf5("../../data/processed/version_05/assignable_analysis_links.dbf")
assignable_lookup_df = dbf.to_dataframe()

assignable_lookup_df = assignable_lookup_df[['A', 'B', 'ASSIGNABLE', 'CNTYPE']]
# export assignable lookup table
#assignable_lookup_df[["A", "B", "ASSIGNABLE", "CNTYPE"]].to_csv("../../data/processed/version_05/assignable_analysis_links.csv",
#                                                     index = False)

# assignable_lookup_df = pd.read_csv("../../data/processed/version_05/assignable_analysis_links.csv")

# some stats
print(assignable_lookup_df.ASSIGNABLE.value_counts())
print('\nlink connector types of assignable links: \n{}'.format(assignable_lookup_df[assignable_lookup_df.ASSIGNABLE==1].CNTYPE.value_counts()))

0    1807923
1     422124
Name: ASSIGNABLE, dtype: int64

link connector types of assignable links: 
TANA    391050
TAZ      31069
PED          5
Name: CNTYPE, dtype: int64


In [9]:
# merge assignable lookup with link gdf

link_gdf = pd.merge(link_df,
                    shape_gdf[["id", "geometry"]],
                    how = "left",
                    on = "id")

link_gdf = gpd.GeoDataFrame(link_gdf, 
                            geometry = link_gdf["geometry"],
                            crs={"init" : "epsg:4326"})

# # As noted above, since we have updated assignable lookup, we no longer use the legacy assignable lookup
# link_gdf = pd.merge(
#     link_gdf,
#     tm2_lookup_df[["shstReferenceId", "assignable"]],
#     how = "left",
#     on = "shstReferenceId"
# )

link_gdf = pd.merge(
    link_gdf,
    assignable_lookup_df[["A", "B", "ASSIGNABLE"]],
    how = "left",
    on = ["A", "B"]
)

link_gdf.rename(columns = {"ASSIGNABLE" : "assignable"}, inplace = True)

  return _prepare_from_string(" ".join(pjargs))


In [10]:
# stats
link_gdf.assignable.value_counts()

0.000    1245221
1.000     388313
Name: assignable, dtype: int64

# existing centroid connector

### existing TAZ centorids and centroid connectors

In [11]:
%%time
# read existing network

existing_network_link_gdf = gpd.read_file("../../data/external/TM2_nonMarin/tm2_links.shp")
existing_network_node_gdf = gpd.read_file("../../data/external/TM2_nonMarin/tm2_nodes.shp")

# set initial CRS
# one of the following two syntaxes would work, depending on package versions 
# existing_network_link_gdf.crs = {"init" : "esri:102646"}
# existing_network_node_gdf.crs = {"init" : "esri:102646"}
existing_network_link_gdf.crs = "esri:102646"
existing_network_node_gdf.crs = "esri:102646"

# convert to ESPG lat-lon

existing_network_link_gdf = existing_network_link_gdf.to_crs({'init': 'epsg:4326'})
print(existing_network_link_gdf.crs)

existing_network_node_gdf = existing_network_node_gdf.to_crs({'init': 'epsg:4326'})
print(existing_network_node_gdf.crs)

+init=epsg:4326 +type=crs
+init=epsg:4326 +type=crs
Wall time: 3min 50s


existing_network_link_gdf[["CNTYPE","geometry"]].to_file("../../data/interim/step7_centroid_connector/tm2_connectors.geojson",
                                                        driver = "GeoJSON")

In [12]:
#covert networks to espg 26915 for nearest node operation
existing_network_link_gdf, existing_network_node_gdf = reproject(existing_network_link_gdf,
                                                                 existing_network_node_gdf,
                                                                 26915)

In [13]:
# TAZ and MAZ centroids node id ranges in the legacy network

taz_N_list = list(range(1, 10000)) + list(range(100001, 110000)) + list(range(200001, 210000)) + list(range(300001, 310000))\
+ list(range(400001, 410000)) + list(range(500001, 510000)) + list(range(600001, 610000)) + list(range(700001, 710000))\
+ list(range(800001, 810000)) + list(range(900001, 1000000))

maz_N_list = list(range(10001, 90000)) + list(range(110001, 190000)) + list(range(210001, 290000)) + list(range(310001, 390000))\
+ list(range(410001, 490000)) + list(range(510001, 590000)) + list(range(610001, 690000)) + list(range(710001, 790000))\
+ list(range(810001, 890000))

In [14]:
# existing taz level centroids
taz_node_gdf = existing_network_node_gdf[existing_network_node_gdf.N.isin(taz_N_list)].copy()

# existing taz level centroid connectors
taz_connectors_gdf = existing_network_link_gdf[existing_network_link_gdf.CNTYPE.isin(["TAZ", "EXT"])].copy()

taz_connectors_gdf = taz_connectors_gdf[(taz_connectors_gdf.A.isin(taz_N_list)) | 
                                       (taz_connectors_gdf.B.isin(taz_N_list))].copy()

# identify centroid (c) and non-centroid (non-c) for each centroid connector
taz_connectors_gdf["c"] = np.where(taz_connectors_gdf["A"].isin(taz_N_list),
                                  taz_connectors_gdf["A"],
                                  taz_connectors_gdf["B"])
taz_connectors_gdf["non_c"] = np.where(taz_connectors_gdf["A"].isin(taz_N_list),
                                  taz_connectors_gdf["B"],
                                  taz_connectors_gdf["A"])

taz_connectors_gdf.drop_duplicates(subset = ["c", "non_c"], inplace = True)

print('number of unique TAZ centroids: {}'.format(taz_connectors_gdf.c.nunique()))
print('\nfacility types of TAZ connectors: \n{}'.format(taz_connectors_gdf.FT.value_counts()))
print('\nCNTYPE of taz connectors: \n{}\n'.format(taz_connectors_gdf.CNTYPE.value_counts()))
display(taz_connectors_gdf.head())

number of unique TAZ centroids: 4756

facility types of TAZ connectors: 
6    31624
Name: FT, dtype: int64

CNTYPE of taz connectors: 
TAZ    31591
EXT       33
Name: CNTYPE, dtype: int64



Unnamed: 0,A,B,NUMLANES,F_JNCTID,T_JNCTID,FRC,NAME,FREEWAY,TOLLRD,ONEWAY,...,FFS,USECLASS,TOLLBOOTH,DANGLING,HASTRANSIT,DELETE,TOLLSEG,geometry,c,non_c
0,1,1002463,0,0,0,0,,0,,,...,25.0,0,0,0,0,0,0,"LINESTRING (-2116430.484 4613514.384, -2116715...",1,1002463
1,1,1002702,0,0,0,0,,0,,,...,25.0,0,0,0,0,0,0,"LINESTRING (-2116430.484 4613514.384, -2116320...",1,1002702
2,1,1003078,0,0,0,0,,0,,,...,25.0,0,0,0,0,0,0,"LINESTRING (-2116430.484 4613514.384, -2116775...",1,1003078
3,1,1004400,0,0,0,0,,0,,,...,25.0,0,0,0,0,0,0,"LINESTRING (-2116430.484 4613514.384, -2116567...",1,1004400
4,1,1009833,0,0,0,0,,0,,,...,25.0,0,0,0,0,0,0,"LINESTRING (-2116430.484 4613514.384, -2116740...",1,1009833


In [15]:
# label connectors by length and drop the longer ones

taz_connectors_gdf["length"] = taz_connectors_gdf.geometry.length

taz_connectors_gdf["length_mean"] = taz_connectors_gdf.groupby(["c"]).length.transform("mean") 

taz_connectors_gdf["length_std"] = taz_connectors_gdf.groupby(["c"]).length.transform("std") 

taz_connectors_gdf["length_norm"] = (taz_connectors_gdf["length"] - taz_connectors_gdf["length_mean"]) / taz_connectors_gdf["length_std"]

taz_connectors_gdf["keep"] = np.where((taz_connectors_gdf.length <= taz_connectors_gdf.length_mean + taz_connectors_gdf.length_std) |
                                      (taz_connectors_gdf.length_std < taz_connectors_gdf.length_mean * 0.6) | 
                                      (taz_connectors_gdf.length_std.isnull()),
                                     1,
                                     0)

taz_drop_long_connectors_gdf = taz_connectors_gdf[taz_connectors_gdf.keep == 1].copy()

print('after dropping long connectors, TAZ centroid connectors went from {} to {}, with {} unique TAZ centroids'.format(
    taz_connectors_gdf.shape[0],
    taz_drop_long_connectors_gdf.shape[0],
    taz_drop_long_connectors_gdf.c.nunique()))

print(taz_drop_long_connectors_gdf.crs)

after dropping long connectors, TAZ centroid connectors went from 31624 to 31243, with 4756 unique TAZ centroids
epsg:26915


### existing MAZ centroid and centroid connectors

In [16]:
# existing maz level centroids
maz_node_gdf = existing_network_node_gdf[existing_network_node_gdf.N.isin(maz_N_list)].copy()

# existing taz level centroid connectors
maz_connectors_gdf = existing_network_link_gdf[existing_network_link_gdf.CNTYPE == "MAZ"].copy()

maz_connectors_gdf = maz_connectors_gdf[(maz_connectors_gdf.A.isin(maz_N_list)) | 
                                       (maz_connectors_gdf.B.isin(maz_N_list))].copy()

# identify centroid (c) and non-centroid (non-c) for each centroid connector
maz_connectors_gdf["c"] = np.where(maz_connectors_gdf["A"].isin(maz_N_list),
                                  maz_connectors_gdf["A"],
                                  maz_connectors_gdf["B"])
maz_connectors_gdf["non_c"] = np.where(maz_connectors_gdf["A"].isin(maz_N_list),
                                  maz_connectors_gdf["B"],
                                  maz_connectors_gdf["A"])

maz_connectors_gdf.drop_duplicates(subset = ["c", "non_c"], inplace = True)

print('\nfacility types of MAZ connectors: \n{}'.format(maz_connectors_gdf.FT.value_counts()))
print('\nCNTYPE of MAZ connectors: \n{}\n'.format(maz_connectors_gdf.CNTYPE.value_counts()))


facility types of MAZ connectors: 
6    125745
Name: FT, dtype: int64

CNTYPE of MAZ connectors: 
MAZ    125745
Name: CNTYPE, dtype: int64



In [17]:
print('number of unique N in maz_node_gdf: {}\n'.format(maz_node_gdf.N.nunique()))
print('number of unique maz centroids: {}\n'.format(maz_connectors_gdf.c.nunique()))

print('nodes that are maz nodes but not maz centroids: \n{}'.format(
    np.setdiff1d(maz_node_gdf.N.tolist(), maz_connectors_gdf.c.tolist())))

number of unique N in maz_node_gdf: 39726

number of unique maz centroids: 39721

nodes that are maz nodes but not maz centroids: 
[ 10186  16084 111432 111433 411178]


In [18]:
# examine - appear to be nodes that don't belong to any link
display(existing_network_link_gdf[(existing_network_link_gdf.B.isin([10186,16084,111432,111433,411178])) | (existing_network_link_gdf.A.isin([10186,16084,111432,111433,411178]))])
display(existing_network_node_gdf[existing_network_node_gdf.N.isin([10186,16084,111432,111433,411178])])

Unnamed: 0,A,B,NUMLANES,F_JNCTID,T_JNCTID,FRC,NAME,FREEWAY,TOLLRD,ONEWAY,...,TAP_DRIVE,FT,FFS,USECLASS,TOLLBOOTH,DANGLING,HASTRANSIT,DELETE,TOLLSEG,geometry


Unnamed: 0,N,FAREZONE,X,Y,COUNTY,MODE,TYPE,ID,PNR_CAP,PNR1,...,RTDMODE,TAZSEQ,MAZSEQ,TAPSEQ,EXTSEQ,NEW_NODE,OLD_NODE,TEMP,TEMP2,geometry
735,10186,0,-2107516.995,4612058.273,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,POINT (-2107516.995 4612058.273)
4059,16084,0,-2171711.462,4623635.108,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,POINT (-2171711.462 4623635.108)
6701,111432,0,-2107631.545,4579615.002,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,POINT (-2107631.545 4579615.002)
6702,111433,0,-2104739.322,4576745.752,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,POINT (-2104739.322 4576745.752)
34383,411178,0,-2105415.904,4625783.336,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,POINT (-2105415.904 4625783.336)


In [71]:
# maz_node_gdf = maz_node_gdf[maz_node_gdf.N.isin(maz_connectors_gdf.c.tolist())]

In [19]:
# label connectors by length and drop the longer ones

maz_connectors_gdf["length"] = maz_connectors_gdf.geometry.length

maz_connectors_gdf["length_mean"] = maz_connectors_gdf.groupby(["c"]).length.transform("mean") 

maz_connectors_gdf["length_std"] = maz_connectors_gdf.groupby(["c"]).length.transform("std") 

maz_connectors_gdf["length_norm"] = (maz_connectors_gdf["length"] - maz_connectors_gdf["length_mean"]) / maz_connectors_gdf["length_std"]

maz_connectors_gdf["keep"] = np.where((maz_connectors_gdf.length <= maz_connectors_gdf.length_mean + maz_connectors_gdf.length_std) |
                                      (maz_connectors_gdf.length_std < maz_connectors_gdf.length_mean * 0.6) | 
                                      (maz_connectors_gdf.length_std.isnull()),
                                     1,
                                     0)

maz_drop_long_connectors_gdf = maz_connectors_gdf[maz_connectors_gdf.keep == 1].copy()

print('after dropping long connectors, TAZ centroid connectors went from {} to {}, with {} unique TAZ centroids'.format(
    maz_connectors_gdf.shape[0],
    maz_drop_long_connectors_gdf.shape[0],
    maz_drop_long_connectors_gdf.c.nunique()))

after dropping long connectors, TAZ centroid connectors went from 125745 to 122987, with 39721 unique TAZ centroids


# build new TAZ centroid connectors

### For each zone, find how many nodes that have only two assignable geometries (not reference)

In [20]:
link_gdf[link_gdf.drive_access==1].roadway.value_counts()

residential       407426
service           389515
tertiary           93218
secondary          68529
primary            26492
motorway_link       4983
trunk               4160
motorway            2774
secondary_link      1566
primary_link        1506
trunk_link          1382
tertiary_link        611
Name: roadway, dtype: int64

In [21]:
assignable_link_gdf = link_gdf[(link_gdf.assignable == 1) & 
                               ~(link_gdf.roadway.isin(
                                   ["motorway_link", "motorway", "trunk", "trunk_link", "service"])) &
                              (link_gdf.drive_access == 1)].copy()

# count geometry, regardless of if the geometry represents one link (one-way) or two links (two-way)

a_geometry_count_df = assignable_link_gdf.groupby(
    ["A", "shstGeometryId"])["model_link_id"].count().reset_index().rename(columns = {"A" : "model_node_id"})
b_geometry_count_df = assignable_link_gdf.groupby(
    ["B", "shstGeometryId"])["model_link_id"].count().reset_index().rename(columns = {"B" : "model_node_id"})

node_geometry_count_df = pd.concat([a_geometry_count_df, b_geometry_count_df], ignore_index = True, sort = False)

node_geometry_count_df = node_geometry_count_df.groupby(
    ["model_node_id", "shstGeometryId"]).count().reset_index().groupby(["model_node_id"])["shstGeometryId"].count().reset_index()

node_two_geometry_df = node_geometry_count_df[node_geometry_count_df.shstGeometryId == 2].copy()
node_two_geometry_id_list = node_two_geometry_df.model_node_id.tolist()


print('out of {} nodes with assignable links, {} have only two assignable geometries'.format(
    node_geometry_count_df.shape[0], node_two_geometry_df.shape[0]))

out of 178678 nodes with assignable links, 127885 have only two assignable geometries


In [22]:
# add other node attributes
node_two_geometry_df = pd.merge(
    node_two_geometry_df,
    node_gdf[["model_node_id", "osm_node_id", "shst_node_id", "geometry"]],
    how = "left",
    on = "model_node_id"
)

# convert to geodataframe
node_two_geometry_df = gpd.GeoDataFrame(node_two_geometry_df, 
                                        geometry = node_two_geometry_df["geometry"],
                                        crs = {"init" : "epsg:4326"})

  return _prepare_from_string(" ".join(pjargs))


In [23]:
node_two_geometry_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 127885 entries, 0 to 127884
Data columns (total 5 columns):
 #   Column          Non-Null Count   Dtype   
---  ------          --------------   -----   
 0   model_node_id   127885 non-null  int64   
 1   shstGeometryId  127885 non-null  int64   
 2   osm_node_id     127885 non-null  float64 
 3   shst_node_id    127885 non-null  object  
 4   geometry        127885 non-null  geometry
dtypes: float64(1), geometry(1), int64(2), object(1)
memory usage: 5.9+ MB


### create two buffers around each TAZ

In [24]:
%%time
# creat two buffers for each TAZ, buffer1 is a 10-meter buffer, buffer2 is a minimum_rotated_rectangle buffer.

taz_poly_buffer1_gdf = taz_poly_gdf.copy()
taz_poly_buffer2_gdf = taz_poly_gdf.copy()

taz_poly_buffer1_gdf["geometry_buffer"] = taz_poly_buffer1_gdf["geometry"].apply(lambda x: buffer1(x))
taz_poly_buffer1_gdf.rename(columns = {"geometry" : "geometry_orig", "geometry_buffer" : "geometry"}, inplace = True)

taz_poly_buffer2_gdf["geometry_buffer"] = taz_poly_buffer2_gdf["geometry"].apply(lambda x: buffer2(x))
taz_poly_buffer2_gdf.rename(columns = {"geometry" : "geometry_orig", "geometry_buffer" : "geometry"}, inplace = True)

  return _prepare_from_string(" ".join(pjargs))


Wall time: 5min 8s


In [25]:
%%time
# spatially join ('intersects') node_two_geometry with three types of geographies: taz, taz_buffer1, taz_buffer2,
# so each node is tagged with the taz, taz_buffer1, taz_buffer2 it belongs to

taz_node_two_geometry_df = gpd.sjoin(node_two_geometry_df, 
                                     taz_poly_gdf[["geometry", "taz"]], 
                                     how = "left", 
                                     op = "intersects")

taz_buffer1_node_two_geometry_df = gpd.sjoin(node_two_geometry_df, 
                                             taz_poly_buffer1_gdf[["geometry", "taz"]], 
                                             how = "left", 
                                             op = "intersects")

taz_buffer2_node_two_geometry_df = gpd.sjoin(node_two_geometry_df, 
                                             taz_poly_buffer2_gdf[["geometry", "taz"]], 
                                             how = "left", 
                                             op = "intersects")

# check three spatial joins have the same node_id count
print(taz_node_two_geometry_df.model_node_id.nunique())
print(taz_buffer1_node_two_geometry_df.model_node_id.nunique())
print(taz_buffer2_node_two_geometry_df.model_node_id.nunique())
print(node_two_geometry_df.model_node_id.nunique())

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326



127885
127885
127885
127885
Wall time: 2min 40s


In [26]:
print('unique TAZs in spatial join with TAZ: {}'.format(taz_node_two_geometry_df.taz.nunique()))
print('unique TAZs in spatial join with TAZ 10-meter buffer: {}'.format(
    taz_buffer1_node_two_geometry_df.taz.nunique()))
print('unique TAZs in spatial join with TAZ minimum_rotated_rectangle: {}'.format(
    taz_buffer2_node_two_geometry_df.taz.nunique()))

unique TAZs in spatial join with TAZ: 4682
unique TAZs in spatial join with TAZ 10-meter buffer: 4716
unique TAZs in spatial join with TAZ minimum_rotated_rectangle: 4712


In [27]:
# count # of nodes in type of geographies
taz_node_two_geometry_count_df = taz_node_two_geometry_df.groupby(["taz"])["model_node_id"].count().reset_index()
taz_buffer1_node_two_geometry_count_df = taz_buffer1_node_two_geometry_df.groupby(["taz"])["model_node_id"].count().reset_index()
taz_buffer2_node_two_geometry_count_df = taz_buffer2_node_two_geometry_df.groupby(["taz"])["model_node_id"].count().reset_index()

In [28]:
# examples of TAZs not consistent in the three types of geographies
display(taz_node_two_geometry_count_df[taz_node_two_geometry_count_df.taz.isin([433, 435, 439, 430])])
display(taz_buffer1_node_two_geometry_count_df[taz_buffer1_node_two_geometry_count_df.taz.isin([433, 435, 439, 430])])
display(taz_buffer2_node_two_geometry_count_df[taz_buffer2_node_two_geometry_count_df.taz.isin([433, 435, 439, 430])])

Unnamed: 0,taz,model_node_id
398,430.0,15


Unnamed: 0,taz,model_node_id
414,430.0,17
420,439.0,3


Unnamed: 0,taz,model_node_id
409,430,15
415,439,3


### create taz drive centroid connectors

In [29]:
%%time
# create taz drive centroids and centroid connectors using taz_buffer1

taz_buffer1_node_two_geometry_df.rename(columns = {"taz" : "c"}, inplace = True)

taz_drive_cc_group1_gdf, taz_drive_centroid_group1_gdf = generate_centroid_connectors('drive',
                                                                        taz_drop_long_connectors_gdf,
                                                                        taz_buffer1_node_two_geometry_df,
                                                                        existing_network_node_gdf)

  return _prepare_from_string(" ".join(pjargs))


Wall time: 7min 4s


In [30]:
display(taz_drive_cc_group1_gdf.head())

Unnamed: 0,u,shst_node_id,A,geometry_ld,B,geometry_c,geometry,fromIntersectionId,shstGeometryId,id
0,1723563906.0,c4f793a9cc09e7ea16381ca307a17bab,1001671,POINT (-2116707.795 4613515.834),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.42630 37.76972, -122.42356 37...",c4f793a9cc09e7ea16381ca307a17bab,cc1,cc1
1,5437876960.0,0166709338db50c787ff1f0cbde03108,1024255,POINT (-2116335.318 4613451.920),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.42240 37.77023, -122.42356 37...",0166709338db50c787ff1f0cbde03108,cc2,cc2
2,65292019.0,47607b5b93cfe3112c30ca77905ed17c,1007266,POINT (-2116776.152 4613447.413),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.42674 37.76900, -122.42356 37...",47607b5b93cfe3112c30ca77905ed17c,cc3,cc3
3,295083423.0,0ed7b96215de420ad456a1355e3350d8,1024558,POINT (-2116574.606 4613393.596),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.42457 37.76912, -122.42356 37...",0ed7b96215de420ad456a1355e3350d8,cc4,cc4
4,4087590310.0,f59e05dcd867cb660a9dfd24bedaf2ad,1022692,POINT (-2116735.490 4613483.039),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.42646 37.76939, -122.42356 37...",f59e05dcd867cb660a9dfd24bedaf2ad,cc5,cc5


In [31]:
display(taz_drive_centroid_group1_gdf.head())

Unnamed: 0,model_node_id,geometry
0,1,POINT (-122.42356 37.77046)
7,2,POINT (-122.41851 37.76431)
15,3,POINT (-122.42438 37.76777)
23,4,POINT (-122.37238 37.82546)
26,5,POINT (-122.42760 37.76654)


In [32]:
# count how many centroid connectors were created for each taz centroid
taz_drive_cc_group1_count_df = taz_drive_cc_group1_gdf.groupby(["B"])["u"].count().reset_index()

taz_group2_list = taz_drive_cc_group1_count_df[
    taz_drive_cc_group1_count_df.u < 3].B.tolist() + \
[c for c in taz_node_gdf.N.tolist() if c not in taz_drive_cc_group1_count_df.B.tolist()]

print('{} TAZ centroids got 3 or more centroid connectors from 1st round of cc creation using taz_buffer1'.format(
    taz_drive_cc_group1_count_df[taz_drive_cc_group1_count_df.u >= 3].B.nunique()))

print('{} TAZ centroids did not get 3 or more centroid connectors from 1st round of cc creation using taz_buffer1'.format(
    len(taz_group2_list)))

4551 TAZ centroids got 3 or more centroid connectors from 1st round of cc creation using taz_buffer1
205 TAZ centroids did not get 3 or more centroid connectors from 1st round of cc creation using taz_buffer1


In [33]:
%%time
# for the TAZ centroids that didn't get 3 or more centroid connectors in 1st round,
# create centroid connectors using taz_buffer2, considering all nodes within the TAZ boundary that have assignable links,
# not limited to nodes with two geometries as in 1st round. 

exclude_links_df = link_gdf[link_gdf.roadway.isin(["motorway_link", "motorway", "trunk", "trunk_link"])]

drive_node_gdf = node_gdf[(node_gdf.drive_access == 1) & 
                          ~(node_gdf.osm_node_id.isin(exclude_links_df.u.tolist() + 
                              exclude_links_df.v.tolist())) &
                         (node_gdf.model_node_id.isin(assignable_link_gdf.A.tolist() + 
                                                     assignable_link_gdf.A.tolist()))].copy()

taz_drive_cc_group2_gdf, taz_drive_centroid_group2_gdf = generate_centroid_connectors('drive',
                                                                        taz_drop_long_connectors_gdf[taz_drop_long_connectors_gdf.c.isin(taz_group2_list)],
                                                                        drive_node_gdf,
                                                                        existing_network_node_gdf)

Wall time: 51.9 s


  return _prepare_from_string(" ".join(pjargs))


In [34]:
# combine connectors and centroids created in two rounds, drop duplicates

taz_drive_cc_gdf = pd.concat(
    [taz_drive_cc_group1_gdf, taz_drive_cc_group2_gdf],
    sort = False,
    ignore_index = True
)

taz_drive_cc_gdf.drop_duplicates(subset = ["A", "B"], inplace = True)

taz_drive_cc_gdf["shstGeometryId"] = range(1, 1+len(taz_drive_cc_gdf))
taz_drive_cc_gdf["shstGeometryId"] = taz_drive_cc_gdf["shstGeometryId"].apply(lambda x: "drive_cc" + str(x))
taz_drive_cc_gdf["id"] = taz_drive_cc_gdf["shstGeometryId"]

taz_drive_centroid_gdf = pd.concat(
    [taz_drive_centroid_group1_gdf, taz_drive_centroid_group2_gdf],
    sort = False,
    ignore_index = True
)

taz_drive_centroid_gdf.drop_duplicates(subset = ["model_node_id"], inplace = True)

print('total {} drive centroid connectors for {} taz centroids; the centroid file has {} nodes'.format(
    taz_drive_cc_gdf.shape[0],
    taz_drive_cc_gdf.B.nunique(),
    taz_drive_centroid_gdf.model_node_id.nunique()))

total 27727 drive centroid connectors for 4756 taz centroids; the centroid file has 4756 nodes


In [35]:
%%time
# tag if a connector will be kept: if there are more than four centroid connectors, 
# keep four that have the greatest divergence. 
# If less than four centroid connectors exist, keep all the connections.

keep_taz_drive_cc_gdf = get_non_near_connectors(taz_drive_cc_gdf, taz_N_list, maz_N_list, node_two_geometry_id_list)

keep_taz_drive_cc_gdf = taz_drive_cc_gdf[taz_drive_cc_gdf.id.isin(keep_taz_drive_cc_gdf.id)].copy()

print('{} taz drive connectors kept, from {} TAZ centroids'.format(keep_taz_drive_cc_gdf.shape[0],
                                                                   keep_taz_drive_cc_gdf.B.nunique()))

16480 taz drive connectors kept, from 4756 TAZ centroids
Wall time: 47.7 s


In [36]:
print('save TAZ drive connectors')
keep_taz_drive_cc_gdf.to_pickle("../../data/interim/step7_centroid_connector/taz_drive_cc.pickle")

print('save {} TAZ centroids'.format(taz_drive_centroid_gdf.shape[0]))
taz_drive_centroid_gdf.to_pickle("../../data/interim/step7_centroid_connector/taz_drive_centroid.pickle")

save TAZ drive connectors
save 4756 TAZ centroids


In [95]:
print('save TAZ drive connector links')
gpd.GeoDataFrame(keep_taz_drive_cc_gdf[["geometry", "id"]],
                                    geometry = keep_taz_drive_cc_gdf['geometry'], crs = {"init" : "epsg:4326"}).to_file(
    "../../data/interim/step7_centroid_connector/taz_drive.geojson",
                        driver = "GeoJSON")

save TAZ drive connector links


  return _prepare_from_string(" ".join(pjargs))
  gpd.GeoDataFrame(keep_taz_drive_cc_gdf[["geometry", "id"]],


In [37]:
taz_cc_link_df, taz_cc_shape_gdf = consolidate_cc(link_gdf,
#                                              taz_drive_centroid_gdf,
                                             node_gdf,
                                             keep_taz_drive_cc_gdf)

print('After adding the other direction, taz drive centroid connectors has {} links, {} shape'.format(
    taz_cc_link_df.shape[0],
    taz_cc_shape_gdf.shape[0]))

After adding the other direction, taz drive centroid connectors has 32960 links, 16480 shape


In [38]:
# save taz drive centroid connector shapes
taz_cc_shape_gdf = gpd.GeoDataFrame(taz_cc_shape_gdf, geometry = taz_cc_shape_gdf['geometry'], crs = {"init" : "epsg:4326"})

taz_cc_shape_gdf.to_file("../../data/interim/step7_centroid_connector/taz.geojson",
                         driver = "GeoJSON")

  return _prepare_from_string(" ".join(pjargs))


# build new MAZ centroid connectors

### create MAZ drive centroid connectors

In [39]:
%%time
# creat two buffers for each MAZ, buffer1 is a 10-meter buffer, buffer2 is a minimum_rotated_rectangle buffer.

maz_poly_buffer1_gdf = maz_poly_gdf.copy()
maz_poly_buffer2_gdf = maz_poly_gdf.copy()

maz_poly_buffer1_gdf["geometry_buffer"] = maz_poly_buffer1_gdf["geometry"].apply(lambda x: buffer1(x))
maz_poly_buffer2_gdf["geometry_buffer"] = maz_poly_buffer2_gdf["geometry"].apply(lambda x: buffer2(x))

  return _prepare_from_string(" ".join(pjargs))


Wall time: 44min 31s


In [44]:
%%time
# spatially join ('intersects') node_two_geometry with maz_buffer1, 
# so each node is tagged with the maz_buffer1 it belongs to

maz_poly_buffer1_gdf.rename(columns = {"geometry" : "geometry_orig", "geometry_buffer" : "geometry"}, inplace = True)

maz_buffer1_node_two_geometry_df = gpd.sjoin(node_two_geometry_df, maz_poly_buffer1_gdf[["geometry", "maz"]], 
                                             how = "left", op = "intersects")

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326



Wall time: 30.2 s


In [45]:
%%time
# create maz drive centroids and centroid connectors using maz_buffer1

maz_buffer1_node_two_geometry_df.rename(columns = {"maz" : "c"}, inplace = True)

maz_drive_cc_group1_gdf, maz_drive_centroid_group1_gdf = generate_centroid_connectors('drive',
                                                                        maz_drop_long_connectors_gdf,
                                                                        maz_buffer1_node_two_geometry_df,
                                                                        existing_network_node_gdf)

  return _prepare_from_string(" ".join(pjargs))


Wall time: 34min 46s


In [46]:
maz_drive_cc_group1_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 71467 entries, 0 to 108276
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   u                   71467 non-null  object  
 1   shst_node_id        71467 non-null  object  
 2   A                   71467 non-null  object  
 3   geometry_ld         71467 non-null  geometry
 4   B                   71467 non-null  int64   
 5   geometry_c          71467 non-null  object  
 6   geometry            71467 non-null  geometry
 7   fromIntersectionId  71467 non-null  object  
 8   shstGeometryId      71467 non-null  object  
 9   id                  71467 non-null  object  
dtypes: geometry(2), int64(1), object(7)
memory usage: 6.0+ MB


In [47]:
# count how many drive centroid connectors were created for each MAZ centroid
maz_drive_cc_count_df = maz_drive_cc_group1_gdf.groupby(["B"])["u"].count().reset_index()

maz_group2_list = maz_drive_cc_count_df[maz_drive_cc_count_df.u < 2].B.tolist() + \
[c for c in maz_node_gdf.N.tolist() if c not in maz_drive_cc_count_df.B.tolist()]

print('{} MAZ centroids got 2 or more drive centroid connectors from 1st round of cc creation using maz_buffer1'.format(
    maz_drive_cc_count_df[maz_drive_cc_count_df.u >= 2].B.nunique()))

print('{} TAZ centroids did not get 2 or more centroid connectors from 1st round of cc creation using taz_buffer1'.format(
    len(maz_group2_list)))

22596 MAZ centroids got 2 or more drive centroid connectors from 1st round of cc creation using maz_buffer1
17130 TAZ centroids did not get 3 or more centroid connectors from 1st round of cc creation using taz_buffer1


In [50]:
%%time
# for the MAZ centroids that didn't get 2 or more centroid connectors in 1st round,
# create centroid connectors using maz_buffer2, considering all nodes within the MAZ boundary that have assignable links,
# not limited to nodes with two geometries as in 1st round. 

exclude_links_df = link_gdf[link_gdf.roadway.isin(["motorway_link", "motorway", "trunk", "trunk_link"])]

drive_node_gdf = node_gdf[(node_gdf.drive_access == 1) & 
                          ~(node_gdf.osm_node_id.isin(exclude_links_df.u.tolist() + 
                              exclude_links_df.v.tolist())) &
                         (node_gdf.model_node_id.isin(assignable_link_gdf.A.tolist() + 
                                                     assignable_link_gdf.A.tolist()))].copy()

maz_drive_cc_group2_gdf, maz_drive_centroid_group2_gdf = generate_centroid_connectors('drive',
                                                                        maz_drop_long_connectors_gdf[maz_drop_long_connectors_gdf.c.isin(maz_group2_list)],
                                                                        drive_node_gdf,
                                                                        existing_network_node_gdf)

  return _prepare_from_string(" ".join(pjargs))


Wall time: 37min 54s


In [51]:
maz_drive_cc_group2_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 28592 entries, 0 to 41434
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   u                   28592 non-null  object  
 1   shst_node_id        28592 non-null  object  
 2   A                   28592 non-null  object  
 3   geometry_ld         28592 non-null  geometry
 4   B                   28592 non-null  int64   
 5   geometry_c          28592 non-null  object  
 6   geometry            28592 non-null  geometry
 7   fromIntersectionId  28592 non-null  object  
 8   shstGeometryId      28592 non-null  object  
 9   id                  28592 non-null  object  
dtypes: geometry(2), int64(1), object(7)
memory usage: 2.4+ MB


In [52]:
# combine connectors and centroids created in two rounds, drop duplicates

maz_drive_cc_gdf = pd.concat(
    [maz_drive_cc_group1_gdf, maz_drive_cc_group2_gdf],
    sort = False,
    ignore_index = True
)

maz_drive_cc_gdf.drop_duplicates(subset = ["A", "B"], inplace = True)

maz_drive_centroid_gdf = pd.concat(
    [maz_drive_centroid_group1_gdf, maz_drive_centroid_group2_gdf],
    sort = False,
    ignore_index = True
)

maz_drive_centroid_gdf.drop_duplicates(subset = ["model_node_id"], inplace = True)

In [53]:
# tag if a connector will be kept: if there are more than four centroid connectors, 
# keep four that have the greatest divergence. 
# If less than four centroid connectors exist, keep all the connections.

keep_maz_drive_cc_gdf = get_non_near_connectors(maz_drive_cc_gdf, taz_N_list, maz_N_list, node_two_geometry_id_list)

keep_maz_drive_cc_gdf = maz_drive_cc_gdf[maz_drive_cc_gdf.id.isin(keep_maz_drive_cc_gdf.id)].copy()

print('{} maz drive connectors kept, from {} maz centroids'.format(keep_maz_drive_cc_gdf.shape[0],
                                                                   keep_maz_drive_cc_gdf.B.nunique()))

print(keep_maz_drive_cc_gdf.crs)

87968 maz drive connectors kept, from 39721 maz centroids
epsg:4326


In [55]:
print('save MAZ drive connectors')
keep_maz_drive_cc_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_drive_cc.pickle")

print('save {} MAZ centroids'.format(maz_drive_centroid_gdf.shape[0]))
maz_drive_centroid_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_drive_centroid.pickle")

print('save MAZ drive connectors geometries')
keep_maz_drive_cc_gdf[["geometry"]].to_file("../../data/interim/step7_centroid_connector/maz_drive.geojson",
                                            driver = "GeoJSON")

save MAZ drive connectors
save 39721 MAZ centroids
save MAZ drive connectors geometries


### create MAZ walk centroid connectors

In [56]:
%%time

walk_node_gdf = node_gdf[(node_gdf.walk_access == 1) & (node_gdf.rail_only != 1)]

maz_walk_cc_gdf, maz_walk_centroid_gdf = generate_centroid_connectors('walk',
                                                                      maz_drop_long_connectors_gdf,
                                                                      walk_node_gdf, 
                                                                      maz_node_gdf)

  return _prepare_from_string(" ".join(pjargs))


Wall time: 1h 44min 14s


In [57]:
print('{} maz walk connectors kept, from {} maz centroids'.format(maz_walk_cc_gdf.shape[0],
                                                                  maz_walk_cc_gdf.B.nunique()))

198630 maz walk connectors kept, from 39726 maz centroids


In [58]:
# print('save MAZ walk connectors')
maz_walk_cc_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_walk_cc.pickle")

# print('save {} MAZ walk centroids'.format(maz_walk_cc_gdf.shape[0]))
maz_walk_centroid_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_walk_centroid.pickle")

# print('save MAZ walk centroid connector shapes')
maz_walk_cc_gdf[["geometry"]].to_file("../../data/interim/step7_centroid_connector/maz_walk.geojson",
                                      driver = "GeoJSON")

### create MAZ bike centroid connectors

In [59]:
%%time

bike_node_gdf = node_gdf[(node_gdf.bike_access == 1)]

maz_bike_cc_gdf, maz_bike_centroid_gdf = generate_centroid_connectors('bike',
                                                                        maz_drop_long_connectors_gdf,
                                                                        bike_node_gdf, 
                                                                        maz_node_gdf)

  return _prepare_from_string(" ".join(pjargs))


Wall time: 1h 36min 47s


In [60]:
print('{} maz bike connectors kept, from {} maz centroids'.format(maz_bike_cc_gdf.shape[0],
                                                                  maz_bike_cc_gdf.B.nunique()))

198630 maz bike connectors kept, from 39726 maz centroids


In [61]:
# print('save MAZ bike connectors')
maz_bike_cc_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_bike_cc.pickle")

# print('save {} MAZ bike centroids'.format(maz_walk_cc_gdf.shape[0]))
maz_bike_centroid_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_bike_centroid.pickle")

# print('save MAZ bike centroid connector shapes')
maz_bike_cc_gdf[["geometry"]].to_file("../../data/interim/step7_centroid_connector/maz_bike.geojson",
                                      driver = "GeoJSON")

In [62]:
print(maz_walk_centroid_gdf.shape)
maz_walk_cc_gdf.info()

(39726, 2)
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 198630 entries, 0 to 198629
Data columns (total 10 columns):
 #   Column              Non-Null Count   Dtype   
---  ------              --------------   -----   
 0   u                   198630 non-null  float64 
 1   shst_node_id        198630 non-null  object  
 2   A                   198630 non-null  int64   
 3   geometry_ld         198630 non-null  geometry
 4   B                   198630 non-null  int64   
 5   geometry_c          198630 non-null  object  
 6   geometry            198630 non-null  geometry
 7   fromIntersectionId  198630 non-null  object  
 8   shstGeometryId      198630 non-null  object  
 9   id                  198630 non-null  object  
dtypes: float64(1), geometry(2), int64(2), object(5)
memory usage: 16.7+ MB


In [63]:
print(maz_bike_centroid_gdf.shape)
maz_bike_cc_gdf.info()

(39726, 2)
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 198630 entries, 0 to 198629
Data columns (total 10 columns):
 #   Column              Non-Null Count   Dtype   
---  ------              --------------   -----   
 0   u                   198630 non-null  float64 
 1   shst_node_id        198630 non-null  object  
 2   A                   198630 non-null  int64   
 3   geometry_ld         198630 non-null  geometry
 4   B                   198630 non-null  int64   
 5   geometry_c          198630 non-null  object  
 6   geometry            198630 non-null  geometry
 7   fromIntersectionId  198630 non-null  object  
 8   shstGeometryId      198630 non-null  object  
 9   id                  198630 non-null  object  
dtypes: float64(1), geometry(2), int64(2), object(5)
memory usage: 16.7+ MB


In [64]:
# leave previously disconnected 5 MAZs disconnected

maz_bike_cc_gdf = maz_bike_cc_gdf[~maz_bike_cc_gdf.B.isin([10186, 16084, 111432, 111433, 411178])]
maz_walk_cc_gdf = maz_walk_cc_gdf[~maz_walk_cc_gdf.B.isin([10186, 16084, 111432, 111433, 411178])]

In [65]:
keep_maz_drive_cc_gdf[keep_maz_drive_cc_gdf.A.isin([10186, 16084, 111432, 111433, 411178])]

Unnamed: 0,u,shst_node_id,A,geometry_ld,B,geometry_c,geometry,fromIntersectionId,shstGeometryId,id


In [66]:
maz_drive_centroid_gdf[maz_drive_centroid_gdf.model_node_id.isin([10186, 16084, 111432, 111433, 411178])]

Unnamed: 0,model_node_id,geometry


### consolidate MAZ drive, walk, bike centroid connectors

In [67]:
maz_walk_cc_gdf = pd.read_pickle("../../data/interim/step7_centroid_connector/maz_walk_cc.pickle")
maz_walk_centroid_gdf = pd.read_pickle("../../data/interim/step7_centroid_connector/maz_walk_centroid.pickle")

maz_bike_cc_gdf = pd.read_pickle("../../data/interim/step7_centroid_connector/maz_bike_cc.pickle")

In [68]:
maz_cc_link_df, maz_cc_shape_gdf = consolidate_cc(link_gdf,
#                                              maz_drive_centroid_gdf,
                                                  node_gdf,
                                                  keep_maz_drive_cc_gdf, 
                                                  maz_walk_cc_gdf, 
                                                  maz_bike_cc_gdf, 
                                                 )

In [69]:
maz_cc_link_df.info()
maz_cc_shape_gdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 531584 entries, 0 to 531583
Data columns (total 11 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   A                   531584 non-null  int64  
 1   B                   531584 non-null  int64  
 2   drive_access        175936 non-null  float64
 3   walk_access         510418 non-null  float64
 4   bike_access         507038 non-null  float64
 5   shstGeometryId      531584 non-null  object 
 6   id                  531584 non-null  object 
 7   u                   265792 non-null  float64
 8   v                   265792 non-null  float64
 9   fromIntersectionId  265792 non-null  object 
 10  toIntersectionId    265792 non-null  object 
dtypes: float64(5), int64(2), object(4)
memory usage: 44.6+ MB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 265792 entries, 0 to 265791
Data columns (total 4 columns):
 #   Column              Non-Null Count   Dtype   
---  ------    

In [70]:
# add additional fields

maz_cc_shape_gdf["id"] = "maz_" + maz_cc_shape_gdf["id"]
maz_cc_shape_gdf["shstGeometryId"] = maz_cc_shape_gdf["id"]
maz_cc_link_df["id"] = "maz_" + maz_cc_link_df["id"]
maz_cc_link_df["shstGeometryId"] = maz_cc_link_df["id"]
maz_cc_link_df["roadway"] = "maz"

taz_cc_shape_gdf["id"] = "taz_" + taz_cc_shape_gdf["id"]
taz_cc_shape_gdf["shstGeometryId"] = taz_cc_shape_gdf["id"]
taz_cc_link_df["id"] = "taz_" + taz_cc_link_df["id"]
taz_cc_link_df["shstGeometryId"] = taz_cc_link_df["id"]
taz_cc_link_df["roadway"] = "taz"

In [71]:
taz_cc_shape_gdf

Unnamed: 0,id,geometry,fromIntersectionId,toIntersectionId,shstGeometryId
0,taz_cc661,"LINESTRING (-122.48890 37.77798, -122.48574 37...",55d370526a55a5d348e23751aad86ac0,,taz_cc661
1,taz_cc830,"LINESTRING (-122.48890 37.77798, -122.48560 37...",55d370526a55a5d348e23751aad86ac0,,taz_cc830
2,taz_cc1710,"LINESTRING (-122.48890 37.77798, -122.49096 37...",55d370526a55a5d348e23751aad86ac0,,taz_cc1710
3,taz_cc323,"LINESTRING (-122.40358 37.73441, -122.40141 37...",5c7c469988248e3572b82b39c894dfd6,,taz_cc323
4,taz_cc494,"LINESTRING (-122.42312 37.73736, -122.42210 37...",7eaf8516bbeb338c7fc1a89ec32accd2,,taz_cc494
...,...,...,...,...,...
16475,taz_cc15624,"LINESTRING (-122.51882 37.96222, -122.51903 37...",ff2becc676a9c4cedab5f099f10a4eee,,taz_cc15624
16476,taz_cc15572,"LINESTRING (-122.53791 38.00294, -122.53528 38...",14965915f21991aa34f40aabcff54712,,taz_cc15572
16477,taz_cc15641,"LINESTRING (-122.61559 38.10042, -122.61509 38...",bdc4168ac410b3d1c594d35e28b8750b,,taz_cc15641
16478,taz_cc15594,"LINESTRING (-122.55709 37.98342, -122.54763 37...",b689e4c0b34ac9137082d027c9fe452a,,taz_cc15594


In [72]:
maz_cc_shape_gdf

Unnamed: 0,id,geometry,fromIntersectionId,toIntersectionId,shstGeometryId
0,maz_cc1700,"LINESTRING (-122.48890 37.77798, -122.48934 37...",55d370526a55a5d348e23751aad86ac0,,maz_cc1700
1,maz_cc93330,"LINESTRING (-122.48890 37.77798, -122.48841 37...",55d370526a55a5d348e23751aad86ac0,,maz_cc93330
2,maz_cc4885,"LINESTRING (-122.48890 37.77798, -122.48827 37...",55d370526a55a5d348e23751aad86ac0,,maz_cc4885
3,maz_cc1147,"LINESTRING (-122.40358 37.73441, -122.40322 37...",5c7c469988248e3572b82b39c894dfd6,,maz_cc1147
4,maz_cc94197,"LINESTRING (-122.40358 37.73441, -122.40411 37...",5c7c469988248e3572b82b39c894dfd6,,maz_cc94197
...,...,...,...,...,...
265787,maz_cc254752,"LINESTRING (-122.53399 37.97255, -122.53402 37...",76a5f9c2d6475f92407e74851390afcd,,maz_cc254752
265788,maz_cc252175,"LINESTRING (-122.50579 38.01442, -122.50792 38...",fe0248568538ee4788947378fe2103ee,,maz_cc252175
265789,maz_cc251317,"LINESTRING (-122.52150 37.94815, -122.52165 37...",a53800167102307969b5b6b68cfcf79a,,maz_cc251317
265790,maz_cc251603,"LINESTRING (-122.52150 37.94815, -122.52209 37...",a53800167102307969b5b6b68cfcf79a,,maz_cc251603


# consolidate centroid connectors and add attributes

### consolidate and renumber

In [73]:
# merge TAZ cc shape and MAZ cc shape
all_cc_shape_gdf = pd.concat(
    [taz_cc_shape_gdf, maz_cc_shape_gdf],
    sort = False,
    ignore_index = True
)

In [74]:
# add 'county'
all_cc_shape_gdf = pd.merge(
    all_cc_shape_gdf,
    node_gdf[["shst_node_id", "county"]].rename(columns = {"shst_node_id" : "fromIntersectionId"}),
    how = "left",
    on = "fromIntersectionId"
)
print('TAZ/MAZ centroid connections total {} shapes'.format(all_cc_shape_gdf.id.nunique()))

TAZ/MAZ centroid connections total 282272 shapes


In [75]:
all_cc_shape_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 282272 entries, 0 to 282271
Data columns (total 6 columns):
 #   Column              Non-Null Count   Dtype   
---  ------              --------------   -----   
 0   id                  282272 non-null  object  
 1   geometry            282272 non-null  geometry
 2   fromIntersectionId  282272 non-null  object  
 3   toIntersectionId    0 non-null       object  
 4   shstGeometryId      282272 non-null  object  
 5   county              282272 non-null  object  
dtypes: geometry(1), object(5)
memory usage: 15.1+ MB


In [None]:
# maz_bike_cc_gdf = maz_bike_cc_gdf[~maz_bike_cc_gdf.B.isin([10186, 16084, 111432, 111433, 411178])]
# maz_walk_cc_gdf = maz_walk_cc_gdf[~maz_walk_cc_gdf.B.isin([10186, 16084, 111432, 111433, 411178])]

In [76]:
all_cc_shape_gdf.county.value_counts()

Santa Clara      67529
Alameda          59584
Contra Costa     42274
San Mateo        29748
San Francisco    26606
Sonoma           20509
Solano           19449
Marin            10111
Napa              6462
Name: county, dtype: int64

In [77]:
# convert to geodataframe
all_cc_shape_gdf = gpd.GeoDataFrame(all_cc_shape_gdf, geometry = all_cc_shape_gdf.geometry, crs = shape_gdf.crs)

  all_cc_shape_gdf = gpd.GeoDataFrame(all_cc_shape_gdf, geometry = all_cc_shape_gdf.geometry, crs = shape_gdf.crs)


In [78]:
# merge TAZ cc shape and MAZ cc shape, add additional fields

all_cc_link_df = pd.concat(
    [taz_cc_link_df, maz_cc_link_df],
    sort = False,
    ignore_index = True
)

all_cc_link_df = pd.merge(
    all_cc_link_df,
    all_cc_shape_gdf[["id", "county"]],
    how = "left",
    on = ["id"]
)

all_cc_link_df["drive_access"].fillna(0, inplace = True)
all_cc_link_df["bike_access"].fillna(0, inplace = True)
all_cc_link_df["walk_access"].fillna(0, inplace = True)

all_cc_link_df["drive_access"] = all_cc_link_df["drive_access"].astype(int)
all_cc_link_df["bike_access"] = all_cc_link_df["bike_access"].astype(int)
all_cc_link_df["walk_access"] = all_cc_link_df["walk_access"].astype(int)

all_cc_link_df["rail_only"] = 0

In [79]:
# get the last node and link number of counties

county_last_link_id_df = link_df.groupby("county")["model_link_id"].max().reset_index().rename(
    columns = {"model_link_id" : "county_last_id"})

all_cc_link_df = pd.merge(
    all_cc_link_df,
    county_last_link_id_df,
    how = "left",
    on = "county"
)

all_cc_link_df["model_link_id"] = all_cc_link_df.groupby(["county"]).cumcount() + 1

all_cc_link_df["model_link_id"] = all_cc_link_df["model_link_id"] + all_cc_link_df["county_last_id"]

In [80]:
print('centroid connectors has {} unique model_link_id, {} unique id'.format(
    all_cc_link_df.model_link_id.nunique(),
    all_cc_link_df.id.nunique()))

centroid connectors has 564544 unique model_link_id, 282272 unique id


In [81]:
# merge TAZ drive centroids and MAZ walk centroids
all_centroid_node_gdf = pd.concat(
    [taz_drive_centroid_gdf, maz_walk_centroid_gdf],
    sort = False,
    ignore_index = True
)

all_centroid_node_gdf["drive_access"] = 1
all_centroid_node_gdf['walk_access'] = 1
all_centroid_node_gdf['bike_access'] = 1
all_centroid_node_gdf['rail_only'] = 0

print('total {} centroids'.format(all_centroid_node_gdf.model_node_id.nunique()))

total 44482 centroids


In [82]:
all_centroid_node_gdf

Unnamed: 0,model_node_id,geometry,drive_access,walk_access,bike_access,rail_only
0,1,POINT (-122.42356 37.77046),1,1,1,0
1,2,POINT (-122.41851 37.76431),1,1,1,0
2,3,POINT (-122.42438 37.76777),1,1,1,0
3,4,POINT (-122.37238 37.82546),1,1,1,0
4,5,POINT (-122.42760 37.76654),1,1,1,0
...,...,...,...,...,...,...
44477,814481,POINT (-122.54322 38.06314),1,1,1,0
44478,814495,POINT (-122.55315 38.07583),1,1,1,0
44479,814497,POINT (-122.60985 38.10304),1,1,1,0
44480,814500,POINT (-122.50930 37.89107),1,1,1,0


In [83]:
#concat centroid and centroid connectors to network

all_link_df = pd.concat([link_df,
                        all_cc_link_df.drop(['county_last_id'], axis = 1)],
                       sort = False,
                       ignore_index = True)

all_shape_gdf = pd.concat([shape_gdf,
                          all_cc_shape_gdf],
                         sort = False,
                         ignore_index = True)

node_gdf = node_gdf.to_crs(epsg = 4326)

all_node_gdf = pd.concat([node_gdf,
                         all_centroid_node_gdf],
                        sort = False,
                        ignore_index = True)

### add length

In [84]:
all_cc_link_gdf = pd.merge(all_cc_link_df, all_cc_shape_gdf[["id", "geometry"]], how = "left", on = "id")

In [85]:
all_cc_link_gdf[all_cc_link_gdf.geometry.isnull()]

Unnamed: 0,A,B,drive_access,walk_access,bike_access,shstGeometryId,id,u,v,fromIntersectionId,toIntersectionId,roadway,county,rail_only,county_last_id,model_link_id,geometry


In [86]:
all_cc_link_gdf = gpd.GeoDataFrame(all_cc_link_gdf, 
                                   geometry = all_cc_link_gdf["geometry"], 
                                   crs = shape_gdf.crs)

In [87]:
geom_length = all_cc_link_gdf[['geometry']].copy()
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

all_cc_link_gdf["length"] = geom_length["length"]

### add locationreference

In [88]:
all_node_gdf['X'] = all_node_gdf['geometry'].apply(lambda p: p.x)
all_node_gdf['Y'] = all_node_gdf['geometry'].apply(lambda p: p.y)
all_node_gdf['point'] = [list(xy) for xy in zip(all_node_gdf.X, all_node_gdf.Y)]
node_dict = dict(zip(all_node_gdf.model_node_id, all_node_gdf.point))
    
all_cc_link_gdf['A_point'] = all_cc_link_gdf['A'].map(node_dict)
all_cc_link_gdf['B_point'] = all_cc_link_gdf['B'].map(node_dict)
all_cc_link_gdf['locationReferences'] = all_cc_link_gdf.apply(lambda x: [{'sequence':1, 
                                                             'point': x['A_point'],
                                                             'distanceToNextRef':x['length'],
                                                            'bearing' : 0,
                                                             'intersectionId':x['fromIntersectionId']},
                                                                         {'sequence':2, 
                                                             'point': x['B_point'],
                                                             'intersectionId':x['toIntersectionId']}],
                                                   axis = 1)

# write out

In [89]:
int_col = ["bike_access", "walk_access", "drive_access", "rail_only", "A", "B", "u", "v"]
for c in int_col:
    all_cc_link_gdf[c] = all_cc_link_gdf[c].fillna(0).astype(np.int64)

int_col = ["bike_access", "walk_access", "drive_access", "rail_only"]
for c in int_col:
    all_centroid_node_gdf[c] = all_centroid_node_gdf[c].fillna(0).astype(np.int64)

In [90]:
%%time

print("-------write out link shape geojson---------")

shape_prop = ['id', 'fromIntersectionId', 'toIntersectionId']
shape_geojson = link_df_to_geojson(all_cc_shape_gdf, shape_prop)

with open("../../data/interim/step7_centroid_connector/cc_shape.geojson", "w") as f:
    json.dump(shape_geojson, f)

-------write out link shape geojson---------
Wall time: 35.7 s


In [91]:
%%time

# write out link variable json
# link unique handle "shstReferenceId" + "shstGeometryId"

print("-------write out link json---------")

link_prop = all_cc_link_gdf.drop(["geometry"], axis = 1).columns.tolist()

out = all_cc_link_gdf[link_prop].to_json(orient = "records")

with open("../../data/interim/step7_centroid_connector/cc_link.json", 'w') as f:
    f.write(out)

-------write out link json---------
Wall time: 11.6 s


In [92]:
%%time

print("-------write out node geojson---------")

node_prop = all_centroid_node_gdf.drop(["geometry"], axis = 1).columns.tolist()
node_geojson = point_df_to_geojson(all_centroid_node_gdf, node_prop)

with open("../../data/interim/step7_centroid_connector/centroid_node.geojson", "w") as f:
    json.dump(node_geojson, f)

-------write out node geojson---------
Wall time: 6.02 s


In [93]:
print("-------write out pickle---------")

all_cc_link_gdf.drop(['county_last_id','A_point', 'B_point'], axis = 1).to_pickle(
    "../../data/interim/step7_centroid_connector/cc_link.pickle")
all_cc_shape_gdf.to_pickle("../../data/interim/step7_centroid_connector/cc_shape.pickle")
all_centroid_node_gdf.to_pickle("../../data/interim/step7_centroid_connector/centroid_node.pickle")

-------write out pickle---------


In [94]:
print(all_cc_link_gdf.shape)
print(all_cc_shape_gdf.shape)
print(all_centroid_node_gdf.shape)

(564544, 21)
(282272, 6)
(44482, 6)
