# This notebook goes through building drive, walk, and bike centroids and centorid connectors

1. centroid nodes come from existing network
2. drive centroid connector built by finding the new non-freeway drive node closest to the existing network's drive loading point
3. bike and walk centroid connector built by finding the closest new walk and bike node to the centroid

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, shape, LineString
from scipy.spatial import cKDTree
import json
import math


%matplotlib inline

import matplotlib.pyplot as plt

pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from methods import link_df_to_geojson
from methods import point_df_to_geojson
from methods import reproject
from methods import num_of_drive_loadpoint_per_centroid
from methods import num_of_walk_bike_loadpoint_per_centroid
from methods import find_new_load_point
from methods import generate_centroid_connectors
from methods import consolidate_cc

from methods import project_geometry
from methods import project_gdf
from methods import buffer1
from methods import buffer2
from methods import get_non_near_connectors

In [4]:
step6_output_folder = "../../data/interim/step6_gtfs/"
county_shape_folder = "../data/external/county/"

In [5]:
link_file = step6_output_folder + "link.feather"
link_df = pd.read_feather(link_file)

node_file = step6_output_folder + "node.geojson"
node_gdf = gpd.read_file(node_file)

shape_file = step6_output_folder + "shape.geojson"
shape_gdf = gpd.read_file(shape_file)

In [6]:
link_df[link_df.shstReferenceId == "02b0be7186de823a19b1bbd0ae45ddec"][["A", "B"]]

Unnamed: 0,A,B
9112,4500775,4500852


In [7]:
#taz_poly_gdf = gpd.read_file("../../../travel-model-two/maz_taz/shapefiles/tazs_TM2_v2_2.shp")
taz_poly_gdf = gpd.read_file("Z:/Data/Users/Sijia/MTC/github/travel-model-two/maz_taz/shapefiles/tazs_TM2_v2_2.shp")

taz_poly_gdf = taz_poly_gdf.to_crs(epsg = 4326)

In [8]:
#maz_poly_gdf = gpd.read_file("../../../travel-model-two/maz_taz/shapefiles/mazs_TM2_v2_2.shp")
maz_poly_gdf = gpd.read_file("Z:/Data/Users/Sijia/MTC/github/travel-model-two/maz_taz/shapefiles/mazs_TM2_v2_2.shp")

maz_poly_gdf = maz_poly_gdf.to_crs(epsg = 4326)

In [11]:
# legacy lookup for assignable
#tm2_lookup_df = pd.read_csv("../../../Lasso/mtc_data/lookups/legacy_tm2_attributes.csv")
tm2_lookup_df = pd.read_csv("Z:/Data/Users/Sijia/MTC/github/Lasso/mtc_data/lookups/legacy_tm2_attributes.csv")

In [34]:
# assignable analysis result from RSG

#from simpledbf import Dbf5

#dbf = Dbf5("D:/github/lfs-cleaning/travel-model-two-networks/data/processed/version_05/assignable_analysis_links.dbf")
#assignable_lookup_df = dbf.to_dataframe()

# export assignable lookup table

#assignable_lookup_df[["A", "B", "ASSIGNABLE"]].to_csv("D:/github/lfs-cleaning/travel-model-two-networks/data/processed/version_05/assignable_analysis_links.csv",
#                                                     index = False)

In [29]:
assignable_lookup_df = pd.read_csv("D:/github/lfs-cleaning/travel-model-two-networks/data/processed/version_05/assignable_analysis_links.csv")

In [13]:
tm2_lookup_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 757723 entries, 0 to 757722
Data columns (total 6 columns):
shstReferenceId    757723 non-null object
A_node             757723 non-null int64
B_node             757723 non-null int64
lanes              757723 non-null int64
assignable         757723 non-null int64
ft                 757723 non-null object
dtypes: int64(4), object(2)
memory usage: 34.7+ MB


In [30]:
assignable_lookup_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2230047 entries, 0 to 2230046
Data columns (total 3 columns):
A             int64
B             int64
ASSIGNABLE    int64
dtypes: int64(3)
memory usage: 51.0 MB


In [33]:
assignable_lookup_df.ASSIGNABLE.value_counts()

0    1807923
1     422124
Name: ASSIGNABLE, dtype: int64

In [36]:
assignable_lookup_df[assignable_lookup_df.ASSIGNABLE==1].CNTYPE.value_counts()

TANA    391050
TAZ      31069
PED          5
Name: CNTYPE, dtype: int64

In [37]:
link_df.shape

(1634769, 37)

In [39]:
link_gdf = pd.merge(link_df,
                    shape_gdf[["id", "geometry"]],
                    how = "left",
                    on = "id")

link_gdf = gpd.GeoDataFrame(link_gdf, 
                            geometry = link_gdf["geometry"],
                            crs={"init" : "epsg:4326"})

#link_gdf = pd.merge(
#    link_gdf,
#    tm2_lookup_df[["shstReferenceId", "assignable"]],
#    how = "left",
#    on = "shstReferenceId"
#)

link_gdf = pd.merge(
    link_gdf,
    assignable_lookup_df[["A", "B", "ASSIGNABLE"]],
    how = "left",
    on = ["A", "B"]
)

link_gdf.rename(columns = {"ASSIGNABLE" : "assignable"}, inplace = True)

In [12]:
link_gdf.assignable.value_counts()

1.000    695049
0.000     57163
Name: assignable, dtype: int64

In [40]:
link_gdf.assignable.value_counts()

0    1246456
1     388313
Name: assignable, dtype: int64

In [41]:
node_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 644475 entries, 0 to 644474
Data columns (total 9 columns):
osm_node_id      643811 non-null float64
shst_node_id     644475 non-null object
county           644475 non-null object
drive_access     644475 non-null int64
walk_access      644475 non-null int64
bike_access      644475 non-null int64
model_node_id    644475 non-null int64
rail_only        644475 non-null int64
geometry         644475 non-null object
dtypes: float64(1), int64(5), object(3)
memory usage: 44.3+ MB


In [42]:
link_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1634769 entries, 0 to 1634768
Data columns (total 39 columns):
access                1632702 non-null object
area                  1632702 non-null object
bike_access           1634769 non-null int64
bridge                1632702 non-null object
drive_access          1634769 non-null int64
est_width             1632702 non-null object
fromIntersectionId    1632702 non-null object
highway               1632702 non-null object
id                    1634769 non-null object
junction              1632702 non-null object
key                   1632702 non-null object
landuse               1632702 non-null object
lanes                 1632702 non-null object
link                  1632702 non-null object
maxspeed              1632702 non-null object
name                  1632702 non-null object
oneWay                1632702 non-null object
ref                   1632702 non-null object
roadway               1632702 non-null object
roundab

# centroid connector

In [43]:
# read existing network

#existing_network_link_gdf = gpd.read_file("../../data/external/TM2_nonMarin/tm2_links.shp")
#existing_network_node_gdf = gpd.read_file("../../data/external/TM2_nonMarin/tm2_nodes.shp")

existing_network_link_gdf = gpd.read_file("Z:/Data/Users/Sijia/MTC/github/travel-model-two-networks/data/external/TM2_nonMarin/tm2_links.shp")
existing_network_node_gdf = gpd.read_file("Z:/Data/Users/Sijia/MTC/github/travel-model-two-networks/data/external/TM2_nonMarin/tm2_nodes.shp")

existing_network_link_gdf.crs = {"init" : "esri:102646"}
existing_network_node_gdf.crs = {"init" : "esri:102646"}

In [44]:
# convert to ESPG lat-lon

existing_network_link_gdf = existing_network_link_gdf.to_crs({'init': 'epsg:4326'})
existing_network_link_gdf.crs

existing_network_node_gdf = existing_network_node_gdf.to_crs({'init': 'epsg:4326'})
existing_network_node_gdf.crs

{'init': 'epsg:4326'}

existing_network_link_gdf[["CNTYPE","geometry"]].to_file("../../data/interim/step7_centroid_connector/tm2_connectors.geojson",
                                                        driver = "GeoJSON")

In [45]:
existing_network_link_gdf.geometry

0          LINESTRING (-122.4235552308525 37.770461316487...
1          LINESTRING (-122.4235552308525 37.770461316487...
2          LINESTRING (-122.4235552308525 37.770461316487...
3          LINESTRING (-122.4235552308525 37.770461316487...
4          LINESTRING (-122.4235552308525 37.770461316487...
5          LINESTRING (-122.4235552308525 37.770461316487...
6          LINESTRING (-122.4235552308525 37.770461316487...
7          LINESTRING (-122.4185110411505 37.764307980971...
8          LINESTRING (-122.4185110411505 37.764307980971...
9          LINESTRING (-122.4185110411505 37.764307980971...
10         LINESTRING (-122.4185110411505 37.764307980971...
11         LINESTRING (-122.4185110411505 37.764307980971...
12         LINESTRING (-122.4185110411505 37.764307980971...
13         LINESTRING (-122.4185110411505 37.764307980971...
14         LINESTRING (-122.4185110411505 37.764307980971...
15         LINESTRING (-122.424376790239 37.7677676751334...
16         LINESTRING (-

In [46]:
print(existing_network_node_gdf.columns)
print(existing_network_node_gdf.crs)

Index(['N', 'FAREZONE', 'X', 'Y', 'COUNTY', 'MODE', 'TYPE', 'ID', 'PNR_CAP',
       'PNR1', 'PNR_FEE1', 'PNR2', 'PNR_FEE2', 'PNR3', 'PNR_FEE3', 'PNR4',
       'PNR_FEE4', 'PNR5', 'PNR_FEE5', 'STOP', 'RTDMODE', 'TAZSEQ', 'MAZSEQ',
       'TAPSEQ', 'EXTSEQ', 'NEW_NODE', 'OLD_NODE', 'TEMP', 'TEMP2',
       'geometry'],
      dtype='object')
{'init': 'epsg:4326'}


In [47]:
existing_network_node_gdf.TAZSEQ.nunique()

4686

In [48]:
existing_network_node_gdf.EXTSEQ.nunique()

22

In [49]:
existing_network_node_gdf.TAZSEQ.min()

0

In [50]:
existing_network_node_gdf.MAZSEQ.nunique()

39676

In [51]:
existing_network_node_gdf.MAZSEQ.min()

0

In [52]:
existing_network_link_gdf.CNTYPE.value_counts()

TANA     807224
MAZ      252437
PED      221905
TAZ       63182
TAP       43512
USE        8981
BIKE       6678
LRAIL       864
CRAIL       144
HRAIL        90
FERRY        52
EXT          44
Name: CNTYPE, dtype: int64

In [53]:
existing_network_link_gdf[existing_network_link_gdf.CNTYPE == "EXT"]

Unnamed: 0,A,B,NUMLANES,F_JNCTID,T_JNCTID,FRC,NAME,FREEWAY,TOLLRD,ONEWAY,...,TAP_DRIVE,FT,FFS,USECLASS,TOLLBOOTH,DANGLING,HASTRANSIT,DELETE,TOLLSEG,geometry
178864,900001,4536205,9,0,0,2,CAL 1 - Sonoma/Mendicino,0,,,...,0,6,25.0,0,0,0,0,0,0,LINESTRING (-123.5190457272354 38.760762464191...
178865,900002,4536973,9,0,0,2,CAL 128 - Sonoma/Mendicino,0,,,...,0,6,25.0,0,0,0,0,0,0,LINESTRING (-123.0726942858131 38.855409819300...
178866,900003,4535977,9,0,0,2,US 101 - Sonoma/Mendicino,0,,FT,...,0,6,25.0,0,0,0,0,0,0,LINESTRING (-123.0336024327497 38.855188781886...
178867,900004,4013616,9,0,0,2,CAL 29 - Napa/Lake,0,,,...,0,6,25.0,0,0,0,0,0,0,LINESTRING (-122.5942129152742 38.680411102858...
178868,900005,4008102,9,0,0,2,CAL 128 - Napa/Yolo,0,,,...,0,6,25.0,0,0,0,0,0,0,LINESTRING (-122.103303766366 38.5132350248262...
178869,900006,3504977,9,0,0,2,I 505 - Solano/Yolo,0,,FT,...,0,6,25.0,0,0,0,0,0,0,"LINESTRING (-121.9529173734 38.52623326864123,..."
178870,900007,3501135,9,0,0,2,CAL 113 - Solano/Yolo,0,,FT,...,0,6,25.0,0,0,0,0,0,0,LINESTRING (-121.768232430548 38.5313499680038...
178871,900008,3502245,9,0,0,2,I 80 - Solano/Yolo,0,,FT,...,0,6,25.0,0,0,0,0,0,0,LINESTRING (-121.7388852272142 38.541664183304...
178872,900009,3521560,9,0,0,2,CAL 12 - Solano/Sacramento,0,,,...,0,6,25.0,0,0,0,0,0,0,LINESTRING (-121.6850500443406 38.158456006531...
178873,900010,3016324,9,0,0,2,CAL 160 - Contra Costa/Sacramento,0,,FT,...,0,6,25.0,0,0,0,0,0,0,LINESTRING (-121.7514279132374 38.020180672387...


#covert networks to espg 26915 for nearest node operation

link_gdf, node_gdf = reproject(link_gdf, 
                                node_gdf, 
                                26915)

existing_network_link_gdf, existing_network_node_gdf = reproject(existing_network_link_gdf,
                                                                existing_network_node_gdf,
                                                                26915)

In [54]:
existing_network_link_gdf, existing_network_node_gdf = reproject(existing_network_link_gdf,
                                                                existing_network_node_gdf,
                                                                26915)

In [55]:
taz_N_list = list(range(1, 10000)) + list(range(100001, 110000)) + list(range(200001, 210000)) + list(range(300001, 310000))\
+ list(range(400001, 410000)) + list(range(500001, 510000)) + list(range(600001, 610000)) + list(range(700001, 710000))\
+ list(range(800001, 810000)) + list(range(900001, 1000000))

maz_N_list = list(range(10001, 90000)) + list(range(110001, 190000)) + list(range(210001, 290000)) + list(range(310001, 390000))\
+ list(range(410001, 490000)) + list(range(510001, 590000)) + list(range(610001, 690000)) + list(range(710001, 790000))\
+ list(range(810001, 890000))

# existing taz centorid and centroid connectors

In [56]:
# existing taz level centroids
taz_node_gdf = existing_network_node_gdf[existing_network_node_gdf.N.isin(taz_N_list)].copy()

# existing taz level centroid connectors
taz_connectors_gdf = existing_network_link_gdf[existing_network_link_gdf.CNTYPE.isin(["TAZ", "EXT"])].copy()

taz_connectors_gdf = taz_connectors_gdf[(taz_connectors_gdf.A.isin(taz_N_list)) | 
                                       (taz_connectors_gdf.B.isin(taz_N_list))].copy()

# identify centroid (c) and non-centroid (non-c) for each centroid connector
taz_connectors_gdf["c"] = np.where(taz_connectors_gdf["A"].isin(taz_N_list),
                                  taz_connectors_gdf["A"],
                                  taz_connectors_gdf["B"])
taz_connectors_gdf["non_c"] = np.where(taz_connectors_gdf["A"].isin(taz_N_list),
                                  taz_connectors_gdf["B"],
                                  taz_connectors_gdf["A"])

taz_connectors_gdf.drop_duplicates(subset = ["c", "non_c"], inplace = True)

In [57]:
taz_connectors_gdf.c.nunique()

4756

In [58]:
taz_connectors_gdf.FT.value_counts()

6    31624
Name: FT, dtype: int64

In [59]:
def q15(x):
    m = x.quantile(q=0.15)
    return m

def q85(x):
    m = x.quantile(q=0.85)
    return m

In [60]:
taz_connectors_gdf["length"] = taz_connectors_gdf.geometry.length

taz_connectors_gdf["length_mean"] = taz_connectors_gdf.groupby(["c"]).length.transform("mean") 

taz_connectors_gdf["length_std"] = taz_connectors_gdf.groupby(["c"]).length.transform("std") 

taz_connectors_gdf["length_norm"] = (taz_connectors_gdf["length"] - taz_connectors_gdf["length_mean"]) / taz_connectors_gdf["length_std"]

taz_connectors_gdf["length_15"] = taz_connectors_gdf.groupby(["c"])["length"].transform(q15)
taz_connectors_gdf["length_85"] = taz_connectors_gdf.groupby(["c"])["length"].transform(q85)

taz_connectors_gdf["keep"] = np.where((taz_connectors_gdf.length <= taz_connectors_gdf.length_mean + taz_connectors_gdf.length_std) |
                                      (taz_connectors_gdf.length_std < taz_connectors_gdf.length_mean * 0.6) | 
                                      (taz_connectors_gdf.length_std.isnull()),
                                     1,
                                     0)

taz_drop_long_connectors_gdf = taz_connectors_gdf[taz_connectors_gdf.keep == 1].copy()

In [61]:
taz_drop_long_connectors_gdf.c.nunique()

4756

In [62]:
taz_drop_long_connectors_gdf.crs

{'init': 'epsg:26915', 'no_defs': True}

In [63]:
taz_node_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 4756 entries, 0 to 50697
Data columns (total 30 columns):
N           4756 non-null int64
FAREZONE    4756 non-null int64
X           4756 non-null float64
Y           4756 non-null float64
COUNTY      4756 non-null int64
MODE        4756 non-null int64
TYPE        4756 non-null int64
ID          4756 non-null int64
PNR_CAP     4756 non-null int64
PNR1        4756 non-null int64
PNR_FEE1    4756 non-null float64
PNR2        4756 non-null int64
PNR_FEE2    4756 non-null float64
PNR3        4756 non-null int64
PNR_FEE3    4756 non-null float64
PNR4        4756 non-null int64
PNR_FEE4    4756 non-null int64
PNR5        4756 non-null int64
PNR_FEE5    4756 non-null int64
STOP        0 non-null object
RTDMODE     4756 non-null int64
TAZSEQ      4756 non-null int64
MAZSEQ      4756 non-null int64
TAPSEQ      4756 non-null int64
EXTSEQ      4756 non-null int64
NEW_NODE    4756 non-null int64
OLD_NODE    4756 non-null int64
TEMP        

In [64]:
taz_connectors_gdf.CNTYPE.value_counts()

TAZ    31591
EXT       33
Name: CNTYPE, dtype: int64

# existing maz centroid and centroid connectors

In [65]:
# existing maz level centroids
maz_node_gdf = existing_network_node_gdf[existing_network_node_gdf.N.isin(maz_N_list)].copy()

# existing taz level centroid connectors
maz_connectors_gdf = existing_network_link_gdf[existing_network_link_gdf.CNTYPE == "MAZ"].copy()

maz_connectors_gdf = maz_connectors_gdf[(maz_connectors_gdf.A.isin(maz_N_list)) | 
                                       (maz_connectors_gdf.B.isin(maz_N_list))].copy()

# identify centroid (c) and non-centroid (non-c) for each centroid connector

maz_connectors_gdf["c"] = np.where(maz_connectors_gdf["A"].isin(maz_N_list),
                                  maz_connectors_gdf["A"],
                                  maz_connectors_gdf["B"])
maz_connectors_gdf["non_c"] = np.where(maz_connectors_gdf["A"].isin(maz_N_list),
                                  maz_connectors_gdf["B"],
                                  maz_connectors_gdf["A"])

maz_connectors_gdf.drop_duplicates(subset = ["c", "non_c"], inplace = True)

In [66]:
maz_node_gdf.N.nunique()

39726

In [67]:
maz_connectors_gdf.c.nunique()

39721

In [68]:
np.setdiff1d(maz_node_gdf.N.tolist(), maz_connectors_gdf.c.tolist())

array([ 10186,  16084, 111432, 111433, 411178])

In [69]:
existing_network_link_gdf[existing_network_link_gdf.B == 411178]

Unnamed: 0,A,B,NUMLANES,F_JNCTID,T_JNCTID,FRC,NAME,FREEWAY,TOLLRD,ONEWAY,...,TAP_DRIVE,FT,FFS,USECLASS,TOLLBOOTH,DANGLING,HASTRANSIT,DELETE,TOLLSEG,geometry


In [70]:
existing_network_node_gdf[existing_network_node_gdf.N == 411178]

Unnamed: 0,N,FAREZONE,X,Y,COUNTY,MODE,TYPE,ID,PNR_CAP,PNR1,...,RTDMODE,TAZSEQ,MAZSEQ,TAPSEQ,EXTSEQ,NEW_NODE,OLD_NODE,TEMP,TEMP2,geometry
34383,411178,0,-2105415.904,4625783.336,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,POINT (-2105415.903747404 4625783.33594758)


In [71]:
#maz_node_gdf = maz_node_gdf[maz_node_gdf.N.isin(maz_connectors_gdf.c.tolist())]

In [72]:
maz_connectors_gdf["length"] = maz_connectors_gdf.geometry.length

maz_connectors_gdf["length_mean"] = maz_connectors_gdf.groupby(["c"]).length.transform("mean") 

maz_connectors_gdf["length_std"] = maz_connectors_gdf.groupby(["c"]).length.transform("std") 

maz_connectors_gdf["length_norm"] = (maz_connectors_gdf["length"] - maz_connectors_gdf["length_mean"]) / maz_connectors_gdf["length_std"]

maz_connectors_gdf["keep"] = np.where((maz_connectors_gdf.length <= maz_connectors_gdf.length_mean + maz_connectors_gdf.length_std) |
                                      (maz_connectors_gdf.length_std < maz_connectors_gdf.length_mean * 0.6) | 
                                      (maz_connectors_gdf.length_std.isnull()),
                                     1,
                                     0)

maz_drop_long_connectors_gdf = maz_connectors_gdf[maz_connectors_gdf.keep == 1].copy()

In [73]:
maz_connectors_gdf.shape

(125745, 48)

In [74]:
maz_drop_long_connectors_gdf.shape

(122987, 48)

In [75]:
maz_drop_long_connectors_gdf.c.nunique()

39721

In [76]:
maz_drop_long_connectors_gdf.CNTYPE.value_counts()

MAZ    122987
Name: CNTYPE, dtype: int64

In [77]:
link_gdf[link_gdf.drive_access==1].roadway.value_counts()

residential       407426
service           389515
tertiary           93218
secondary          68529
primary            26492
motorway_link       4983
trunk               4160
motorway            2774
secondary_link      1566
primary_link        1506
trunk_link          1382
tertiary_link        611
Name: roadway, dtype: int64

# For each zone, find how many nodes that have only two assignable geometries (not reference)

In [78]:
assignable_link_gdf = link_gdf[(link_gdf.assignable == 1) & 
                               ~(link_gdf.roadway.isin(
                                   ["motorway_link", "motorway", "trunk", "trunk_link", "service"])) &
                              (link_gdf.drive_access == 1)].copy()

In [79]:
assignable_link_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 345830 entries, 2 to 1632695
Data columns (total 39 columns):
access                345830 non-null object
area                  345830 non-null object
bike_access           345830 non-null int64
bridge                345830 non-null object
drive_access          345830 non-null int64
est_width             345830 non-null object
fromIntersectionId    345830 non-null object
highway               345830 non-null object
id                    345830 non-null object
junction              345830 non-null object
key                   345830 non-null object
landuse               345830 non-null object
lanes                 345830 non-null object
link                  345830 non-null object
maxspeed              345830 non-null object
name                  345830 non-null object
oneWay                345830 non-null object
ref                   345830 non-null object
roadway               345830 non-null object
roundabout            34583

In [80]:
a_geometry_count_df = assignable_link_gdf.groupby(
    ["A", "shstGeometryId"])["model_link_id"].count().reset_index().rename(columns = {"A" : "model_node_id"})
b_geometry_count_df = assignable_link_gdf.groupby(
    ["B", "shstGeometryId"])["model_link_id"].count().reset_index().rename(columns = {"B" : "model_node_id"})

node_geometry_count_df = pd.concat([a_geometry_count_df, b_geometry_count_df], ignore_index = True, sort = False)

node_geometry_count_df = node_geometry_count_df.groupby(
    ["model_node_id", "shstGeometryId"]).count().reset_index().groupby(["model_node_id"])["shstGeometryId"].count().reset_index()

node_two_geometry_df = node_geometry_count_df[node_geometry_count_df.shstGeometryId == 2].copy()
node_two_geometry_id_list = node_two_geometry_df.model_node_id.tolist()

In [81]:
link_gdf[link_gdf.A == 1005079]

Unnamed: 0,access,area,bike_access,bridge,drive_access,est_width,fromIntersectionId,highway,id,junction,...,width,county,length,model_link_id,A,B,rail_traveltime,rail_only,geometry,assignable
112250,,,1,,1,,e15f4fe887edc6f09cbacdeef040904f,service,f6712e6b4f50eb057fd82edfa801f80c,,...,,San Francisco,7.539,5290,1005079,1010066,,0,"LINESTRING (-122.4166814 37.7507448, -122.4166...",1
290953,,,1,,1,,e15f4fe887edc6f09cbacdeef040904f,residential,d96d7c7a7ac3e8c019f5e73b866b8314,,...,,San Francisco,42.887,13813,1005079,1011449,,0,"LINESTRING (-122.4166814 37.7507448, -122.4171...",1
882146,,,1,,1,,e15f4fe887edc6f09cbacdeef040904f,service,cbf4479e1a363fb56c84bb8a4eb000ba,,...,,San Francisco,192.85,41416,1005079,1015386,,0,"LINESTRING (-122.4168425 37.752341, -122.41668...",1
1615608,,,1,,1,,e15f4fe887edc6f09cbacdeef040904f,residential,26854d11c05f932a98572ea50e87a52b,,...,,San Francisco,40.922,72768,1005079,1012427,,0,"LINESTRING (-122.4162547 37.7507707, -122.4166...",1


In [82]:
assignable_link_gdf[assignable_link_gdf.A == 1005079]

Unnamed: 0,access,area,bike_access,bridge,drive_access,est_width,fromIntersectionId,highway,id,junction,...,width,county,length,model_link_id,A,B,rail_traveltime,rail_only,geometry,assignable
290953,,,1,,1,,e15f4fe887edc6f09cbacdeef040904f,residential,d96d7c7a7ac3e8c019f5e73b866b8314,,...,,San Francisco,42.887,13813,1005079,1011449,,0,"LINESTRING (-122.4166814 37.7507448, -122.4171...",1
1615608,,,1,,1,,e15f4fe887edc6f09cbacdeef040904f,residential,26854d11c05f932a98572ea50e87a52b,,...,,San Francisco,40.922,72768,1005079,1012427,,0,"LINESTRING (-122.4162547 37.7507707, -122.4166...",1


In [83]:
node_geometry_count_df.shape

(178678, 2)

In [84]:
node_geometry_count_df[node_geometry_count_df.model_node_id == 1005079]

Unnamed: 0,model_node_id,shstGeometryId
2813,1005079,2


In [85]:
node_two_geometry_df.shape

(127885, 2)

In [86]:
node_two_geometry_df = pd.merge(
    node_two_geometry_df,
    node_gdf[["model_node_id", "osm_node_id", "shst_node_id", "geometry"]],
    how = "left",
    on = "model_node_id"
)

node_two_geometry_df = gpd.GeoDataFrame(node_two_geometry_df, 
                                        geometry = node_two_geometry_df["geometry"],
                                       crs = {"init" : "epsg:4326"})

In [87]:
node_two_geometry_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 127885 entries, 0 to 127884
Data columns (total 5 columns):
model_node_id     127885 non-null int64
shstGeometryId    127885 non-null int64
osm_node_id       127885 non-null float64
shst_node_id      127885 non-null object
geometry          127885 non-null object
dtypes: float64(1), int64(2), object(2)
memory usage: 5.9+ MB


import math

def buffer1(polygon):
    buffer_dist = 10
    poly_proj, crs_utm = project_geometry(polygon)
    poly_proj_buff = poly_proj.buffer(buffer_dist)
    poly_buff, _ = project_geometry(poly_proj_buff, crs=crs_utm, to_latlong=True)
    
    return poly_buff

def buffer2(polygon):
    return polygon.minimum_rotated_rectangle

In [88]:
taz_poly_buffer1_gdf = taz_poly_gdf.copy()
taz_poly_buffer2_gdf = taz_poly_gdf.copy()

def project_geometry(geometry, crs=None, to_crs=None, to_latlong=False):
    """
    Project a shapely geometry from its current CRS to another.
    If to_crs is None, project to the UTM CRS for the UTM zone in which the
    geometry's centroid lies. Otherwise project to the CRS defined by to_crs.
    Parameters
    ----------
    geometry : shapely.geometry.Polygon or shapely.geometry.MultiPolygon
        the geometry to project
    crs : dict or string or pyproj.CRS
        the starting CRS of the passed-in geometry. if None, it will be set to
        settings.default_crs
    to_crs : dict or string or pyproj.CRS
        if None, project to UTM zone in which geometry's centroid lies,
        otherwise project to this CRS
    to_latlong : bool
        if True, project to settings.default_crs and ignore to_crs
    Returns
    -------
    geometry_proj, crs : tuple
        the projected geometry and its new CRS
    """
    if crs is None:
        crs = {"init" : "epsg:4326"}

    gdf = gpd.GeoDataFrame(geometry=[geometry], crs=crs)
    gdf_proj = project_gdf(gdf, to_crs=to_crs, to_latlong=to_latlong)
    geometry_proj = gdf_proj["geometry"].iloc[0]
    return geometry_proj, gdf_proj.crs


def project_gdf(gdf, to_crs=None, to_latlong=False):
    """
    Project a GeoDataFrame from its current CRS to another.
    If to_crs is None, project to the UTM CRS for the UTM zone in which the
    GeoDataFrame's centroid lies. Otherwise project to the CRS defined by
    to_crs. The simple UTM zone calculation in this function works well for
    most latitudes, but may not work for some extreme northern locations like
    Svalbard or far northern Norway.
    Parameters
    ----------
    gdf : geopandas.GeoDataFrame
        the GeoDataFrame to be projected
    to_crs : dict or string or pyproj.CRS
        if None, project to UTM zone in which gdf's centroid lies, otherwise
        project to this CRS
    to_latlong : bool
        if True, project to settings.default_crs and ignore to_crs
    Returns
    -------
    gdf_proj : geopandas.GeoDataFrame
        the projected GeoDataFrame
    """
    if gdf.crs is None or len(gdf) < 1:
        raise ValueError("GeoDataFrame must have a valid CRS and cannot be empty")

    # if to_latlong is True, project the gdf to latlong
    if to_latlong:
        gdf_proj = gdf.to_crs({"init" : "epsg:4326"})
        #utils.log(f"Projected GeoDataFrame to {settings.default_crs}")

    # else if to_crs was passed-in, project gdf to this CRS
    elif to_crs is not None:
        gdf_proj = gdf.to_crs(to_crs)
        #utils.log(f"Projected GeoDataFrame to {to_crs}")

    # otherwise, automatically project the gdf to UTM
    else:
        #if CRS.from_user_input(gdf.crs).is_projected:
         #   raise ValueError("Geometry must be unprojected to calculate UTM zone")

        # calculate longitude of centroid of union of all geometries in gdf
        avg_lng = gdf["geometry"].unary_union.centroid.x

        # calculate UTM zone from avg longitude to define CRS to project to
        utm_zone = int(math.floor((avg_lng + 180) / 6.0) + 1)
        utm_crs = f"+proj=utm +zone={utm_zone} +ellps=WGS84 +datum=WGS84 +units=m +no_defs"

        # project the GeoDataFrame to the UTM CRS
        gdf_proj = gdf.to_crs(utm_crs)
        #utils.log(f"Projected GeoDataFrame to {gdf_proj.crs}")

    return gdf_proj

In [89]:
taz_poly_buffer1_gdf["geometry_buffer"] = taz_poly_buffer1_gdf["geometry"].apply(lambda x: buffer1(x))

In [90]:
taz_poly_buffer2_gdf["geometry_buffer"] = taz_poly_buffer2_gdf["geometry"].apply(lambda x: buffer2(x))

In [91]:
taz_poly_buffer1_gdf.rename(columns = {"geometry" : "geometry_orig", "geometry_buffer" : "geometry"}, inplace = True)

In [92]:
taz_poly_buffer2_gdf.rename(columns = {"geometry" : "geometry_orig", "geometry_buffer" : "geometry"}, inplace = True)

In [93]:
taz_node_two_geometry_df = gpd.sjoin(node_two_geometry_df, 
                                     taz_poly_gdf[["geometry", "taz"]], 
                                     how = "left", 
                                     op = "intersects")

  '(%s != %s)' % (left_df.crs, right_df.crs))


In [94]:
taz_buffer1_node_two_geometry_df = gpd.sjoin(node_two_geometry_df, 
                                             taz_poly_buffer1_gdf[["geometry", "taz"]], 
                                             how = "left", 
                                             op = "intersects")

In [95]:
taz_buffer2_node_two_geometry_df = gpd.sjoin(node_two_geometry_df, 
                                             taz_poly_buffer2_gdf[["geometry", "taz"]], 
                                             how = "left", 
                                             op = "intersects")

In [96]:
taz_node_two_geometry_df.model_node_id.nunique()

127885

In [97]:
taz_buffer1_node_two_geometry_df.model_node_id.nunique()

127885

In [98]:
taz_buffer2_node_two_geometry_df.model_node_id.nunique()

127885

In [99]:
node_two_geometry_df.model_node_id.nunique()

127885

In [100]:
taz_node_two_geometry_count_df = taz_node_two_geometry_df.groupby(["taz"])["model_node_id"].count().reset_index()
taz_buffer1_node_two_geometry_count_df = taz_buffer1_node_two_geometry_df.groupby(["taz"])["model_node_id"].count().reset_index()
taz_buffer2_node_two_geometry_count_df = taz_buffer2_node_two_geometry_df.groupby(["taz"])["model_node_id"].count().reset_index()

In [101]:
taz_node_two_geometry_count_df.taz.nunique()

4682

In [102]:
taz_buffer1_node_two_geometry_count_df.taz.nunique()

4716

In [103]:
taz_buffer2_node_two_geometry_count_df.taz.nunique()

4712

In [104]:
taz_node_two_geometry_count_df[taz_node_two_geometry_count_df.taz == 433]

Unnamed: 0,taz,model_node_id


In [105]:
taz_buffer1_node_two_geometry_count_df[taz_buffer1_node_two_geometry_count_df.taz == 435]

Unnamed: 0,taz,model_node_id


In [106]:
taz_buffer2_node_two_geometry_count_df[taz_buffer2_node_two_geometry_count_df.taz == 435]

Unnamed: 0,taz,model_node_id


In [107]:
taz_node_two_geometry_count_df[taz_node_two_geometry_count_df.taz == 435]

Unnamed: 0,taz,model_node_id


In [108]:
taz_buffer1_node_two_geometry_count_df[taz_buffer1_node_two_geometry_count_df.taz == 439]

Unnamed: 0,taz,model_node_id
420,439.0,3


In [109]:
taz_buffer2_node_two_geometry_count_df[taz_buffer2_node_two_geometry_count_df.taz == 439]

Unnamed: 0,taz,model_node_id
415,439,3


In [110]:
taz_node_two_geometry_count_df[taz_node_two_geometry_count_df.taz == 430]

Unnamed: 0,taz,model_node_id
398,430.0,15


In [111]:
taz_buffer1_node_two_geometry_count_df[taz_buffer1_node_two_geometry_count_df.taz == 430]

Unnamed: 0,taz,model_node_id
414,430.0,17


In [112]:
taz_buffer2_node_two_geometry_count_df[taz_buffer2_node_two_geometry_count_df.taz == 430]

Unnamed: 0,taz,model_node_id
409,430,15


In [113]:
taz_buffer1_node_two_geometry_count_df[taz_buffer1_node_two_geometry_count_df.model_node_id >= 3]

Unnamed: 0,taz,model_node_id
0,1.000,34
1,2.000,19
2,3.000,12
3,4.000,40
4,5.000,23
5,6.000,15
6,7.000,31
7,8.000,8
8,9.000,3
9,10.000,5


# taz drive centroid connectors

# nodes that are connected to assignable

assignable_link_gdf = link_gdf[(link_gdf.assignable == 1) & 
                               ~(link_gdf.roadway.isin(["motorway_link", "motorway", "trunk", "truck_link", "service"]))].copy()

a_node_count_df = assignable_link_gdf.groupby(["A"])["model_link_id"].count().reset_index()
b_node_count_df = assignable_link_gdf.groupby(["B"])["model_link_id"].count().reset_index()

two_assignable_way_node_list = list(set(a_node_count_df[a_node_count_df.model_link_id == 1].A) & 
                                    set(b_node_count_df[b_node_count_df.model_link_id == 1].B))

len(two_assignable_way_node_list)

exclude_links_df = link_gdf[link_gdf.roadway.isin(["motorway_link", "motorway", "trunk", "trunk_link"])]

drive_node_gdf = node_gdf[(node_gdf.drive_access == 1) & 
                          ~(node_gdf.osm_node_id.isin(exclude_links_df.u.tolist() + 
                              exclude_links_df.v.tolist())) &
                         (node_gdf.model_node_id.isin(two_assignable_way_node_list))].copy()

taz_drive_cc_gdf, taz_drive_centroid_gdf = generate_centroid_connectors('drive',
                                                                        taz_drop_long_connectors_gdf,
                                                                        drive_node_gdf,
                                                                        existing_network_node_gdf)

def num_of_drive_loadpoint_per_centroid(existing_drive_cc_df, existing_node_gdf):
    """
    decide number of loading point for drive access per centroid
    
    logic: for drive, find the closest points to the existing loading point
    
    return: 
    dataframe
    for each existing drive loading point, number of new loading point needs to be generated. currently set to 1.
    
    """
    existing_pairs_of_centroid_loadpoint_df = existing_drive_cc_df.groupby(['c', 'non_c']).count().reset_index().drop(['A','B'], axis = 1)
    
    existing_num_of_loadpoint_per_c_df = existing_drive_cc_df.groupby(['c', 'non_c']).count().groupby('c').count()[['A']].rename(columns = {'A':'abm_num_load'}).reset_index()
    
    num_drive_loadpoint_new_near_old = pd.merge(existing_pairs_of_centroid_loadpoint_df,
                                                        existing_num_of_loadpoint_per_c_df,
                                                        how = 'left',
                                                        on = 'c')
    
    num_drive_loadpoint_new_near_old['osm_num_load'] = 1
    
    num_drive_loadpoint_new_near_old = pd.merge(num_drive_loadpoint_new_near_old,
                                                        existing_node_gdf[['N', 'X', 'Y']],
                                                        how = 'left',
                                                        left_on = 'non_c',
                                                        right_on = 'N')
    return num_drive_loadpoint_new_near_old


def num_of_walk_bike_loadpoint_per_centroid(existing_centroid_df):
    """
    decide number of loading point for walk and bike access per centroid
    
    logic: find 5 closest points to centroid
    
    return: 
    dataframe
    for each centroid, number of loading point needs to be generated.
    
    """
    
    num_loadpoint = existing_centroid_df[['N', 'X', 'Y']].copy()
    num_loadpoint['osm_num_load'] = np.int(5)
    num_loadpoint.rename(columns = {'N':'c'}, inplace = True)
    
    return num_loadpoint


def generate_centroid_connectors(run_type, existing_drive_cc_df, node_gdf, existing_node_df):
    """
    calls function to generate loading point reference table, 
    and calls function to find loading points
    
    build linestring based on pairs of centroid and loading point
    
    return centroid connectors and centroids
    """
    
    if run_type == 'drive':
        abm_load_ref_df = num_of_drive_loadpoint_per_centroid(existing_drive_cc_df, existing_node_df)
    if (run_type == 'walk')|(run_type == 'bike'):
        abm_load_ref_df = num_of_walk_bike_loadpoint_per_centroid(existing_node_df)

    new_load_point_gdf = find_new_load_point(abm_load_ref_df, node_gdf)
    
    new_load_point_gdf = pd.merge(new_load_point_gdf,
                                 existing_node_df[['N', 'X', 'Y']],
                                 how = 'left', 
                                 left_on = 'c',
                                 right_on = 'N')
    
    new_load_point_gdf['geometry_c'] = [Point(xy) for xy in zip(new_load_point_gdf['X'], new_load_point_gdf['Y'])]
    new_load_point_gdf.drop(['N', 'X', 'Y'], axis = 1, inplace = True)
    
    #centroid coordinates
    new_centroid_gdf = new_load_point_gdf.copy()[['c', 'geometry_c']]
    new_centroid_gdf.rename(columns = {'c' : 'model_node_id', 'geometry_c' : 'geometry'}, inplace = True)
    new_centroid_gdf.drop_duplicates(['model_node_id'], inplace = True)

    new_centroid_gdf = gpd.GeoDataFrame(new_centroid_gdf)
    
    #inbound cc
    new_cc_gdf = new_load_point_gdf.copy()
    new_cc_gdf['geometry'] = [LineString(xy) for xy in zip(new_cc_gdf['geometry_ld'], new_cc_gdf['geometry_c'])]

    new_cc_gdf["fromIntersectionId"] = new_cc_gdf['shst_node_id']
    new_cc_gdf["shstGeometryId"] = range(1, 1+len(new_cc_gdf))
    new_cc_gdf["shstGeometryId"] = new_cc_gdf["shstGeometryId"].apply(lambda x: "cc" + str(x))
    new_cc_gdf["id"] = new_cc_gdf["shstGeometryId"]
    
    new_cc_gdf = new_cc_gdf.rename(columns = {'model_node_id' : 'A', 
                                              'c' : 'B',
                                             "osm_node_id" : "u"})
    
    #remove duplicates
    new_cc_gdf.drop_duplicates(['A', 'B'], inplace = True)
    
    new_cc_gdf.crs = {'init' : 'epsg:26915'}
    new_cc_gdf = new_cc_gdf.to_crs(epsg = 4326)
    new_centroid_gdf.crs = {'init' : 'epsg:26915'}
    new_centroid_gdf = new_centroid_gdf.to_crs(epsg = 4326)
    
    return new_cc_gdf, new_centroid_gdf


def find_new_load_point(abm_load_ref_df, all_node):
    """
    find the loading points in osm nodes
    
    input: osm node, loading point reference input
    
    output:  dataframe of pairs of centroid and loading point, with point geometry of loading point
    
    works in epsg = 26915
    
    """
    
    all_node_gdf = all_node.copy()
    
    all_node_gdf = all_node_gdf.to_crs(epsg = 26915)
    all_node_gdf["X"] = all_node_gdf["geometry"].apply(lambda g: g.x)
    all_node_gdf["Y"] = all_node_gdf["geometry"].apply(lambda g: g.y)
    
    inventory_node_df = all_node_gdf.copy()
    inventory_node_ref = inventory_node_df[["X", "Y"]].values
    tree_default = cKDTree(inventory_node_ref)
    
    new_load_point_gdf = gpd.GeoDataFrame()
    
    for i in range(len(abm_load_ref_df)):
  
        point = abm_load_ref_df.iloc[i][['X', 'Y']].values
        c_id = abm_load_ref_df.iloc[i]['c']
        n_neigh = abm_load_ref_df.iloc[i]['osm_num_load']
        
        if "c" in all_node_gdf.columns:
            inventory_node_df = all_node_gdf[all_node_gdf.c == c_id].copy().reset_index()
            if len(inventory_node_df) == 0:
                continue
            else:
                inventory_node_ref = inventory_node_df[["X", "Y"]].values
                tree = cKDTree(inventory_node_ref)
           
        else:
            inventory_node_df = all_node_gdf.copy()
            tree = tree_default
         
        
        dd, ii = tree.query(point, k = n_neigh)
        if n_neigh == 1:
            add_gdf = gpd.GeoDataFrame(inventory_node_df[['osm_node_id', "shst_node_id", "model_node_id", 'geometry']].iloc[ii])\
                            .transpose().reset_index(drop = True)
        else:
            add_gdf = gpd.GeoDataFrame(inventory_node_df[['osm_node_id', "shst_node_id", "model_node_id", 'geometry']].iloc[ii])\
                            .reset_index(drop = True)
        add_gdf['c'] = int(abm_load_ref_df.iloc[i]['c'])
        if i == 0:
            new_load_point_gdf = add_gdf.copy()
             
        else:
            new_load_point_gdf = new_load_point_gdf.append(add_gdf, ignore_index=True, sort=False)
    
    return new_load_point_gdf.rename(columns = {'geometry' : 'geometry_ld'})

In [114]:
%%time

taz_buffer1_node_two_geometry_df.rename(columns = {"taz" : "c"}, inplace = True)

taz_drive_cc_group1_gdf, taz_drive_centroid_group1_gdf = generate_centroid_connectors('drive',
                                                                        taz_drop_long_connectors_gdf,
                                                                        taz_buffer1_node_two_geometry_df,
                                                                        existing_network_node_gdf)

Wall time: 4min 2s


In [221]:
taz_drive_cc_group1_gdf[taz_drive_cc_group1_gdf.A == 300963]

Unnamed: 0,u,shst_node_id,A,geometry_ld,B,geometry_c,geometry,fromIntersectionId,shstGeometryId,id


In [115]:
taz_drive_cc_group1_count_df = taz_drive_cc_group1_gdf.groupby(["B"])["u"].count().reset_index()

taz_group2_list = taz_drive_cc_group1_count_df[
    taz_drive_cc_group1_count_df.u < 3].B.tolist() + \
[c for c in taz_node_gdf.N.tolist() if c not in taz_drive_cc_group1_count_df.B.tolist()]

In [116]:
taz_drive_cc_group1_count_df[taz_drive_cc_group1_count_df.u >= 3].B.nunique()

4551

In [232]:
len(taz_group2_list)

205

In [233]:
%%time

exclude_links_df = link_gdf[link_gdf.roadway.isin(["motorway_link", "motorway", "trunk", "trunk_link"])]

drive_node_gdf = node_gdf[(node_gdf.drive_access == 1) & 
                          ~(node_gdf.osm_node_id.isin(exclude_links_df.u.tolist() + 
                              exclude_links_df.v.tolist())) &
                         (node_gdf.model_node_id.isin(assignable_link_gdf.A.tolist() + 
                                                     assignable_link_gdf.A.tolist()))].copy()

taz_drive_cc_group2_gdf, taz_drive_centroid_group2_gdf = generate_centroid_connectors('drive',
                                                                        taz_drop_long_connectors_gdf[taz_drop_long_connectors_gdf.c.isin(taz_group2_list)],
                                                                        drive_node_gdf,
                                                                        existing_network_node_gdf)

Wall time: 1min 21s


In [119]:
taz_drop_long_connectors_gdf

Unnamed: 0,A,B,NUMLANES,F_JNCTID,T_JNCTID,FRC,NAME,FREEWAY,TOLLRD,ONEWAY,...,geometry,c,non_c,length,length_mean,length_std,length_norm,length_15,length_85,keep
0,1,1002463,0,0,0,0,,0,,,...,LINESTRING (-2116430.484461717 4613514.3841745...,1,1002463,285.197,223.048,93.423,0.665,127.617,314.907,1
1,1,1002702,0,0,0,0,,0,,,...,LINESTRING (-2116430.484461717 4613514.3841745...,1,1002702,109.697,223.048,93.423,-1.213,127.617,314.907,1
2,1,1003078,0,0,0,0,,0,,,...,LINESTRING (-2116430.484461717 4613514.3841745...,1,1003078,351.949,223.048,93.423,1.380,127.617,314.907,1
3,1,1004400,0,0,0,0,,0,,,...,LINESTRING (-2116430.484461717 4613514.3841745...,1,1004400,183.988,223.048,93.423,-0.418,127.617,314.907,1
4,1,1009833,0,0,0,0,,0,,,...,LINESTRING (-2116430.484461717 4613514.3841745...,1,1009833,310.792,223.048,93.423,0.939,127.617,314.907,1
5,1,1011503,0,0,0,0,,0,,,...,LINESTRING (-2116430.484461717 4613514.3841745...,1,1011503,129.608,223.048,93.423,-1.000,127.617,314.907,1
6,1,1012865,0,0,0,0,,0,,,...,LINESTRING (-2116430.484461717 4613514.3841745...,1,1012865,190.106,223.048,93.423,-0.353,127.617,314.907,1
7,2,1007742,0,0,0,0,,0,,,...,LINESTRING (-2116216.80471594 4612656.45176858...,2,1007742,143.774,228.945,120.212,-0.709,142.061,330.650,1
8,2,1008103,0,0,0,0,,0,,,...,LINESTRING (-2116216.80471594 4612656.45176858...,2,1008103,330.655,228.945,120.212,0.846,142.061,330.650,1
9,2,1011332,0,0,0,0,,0,,,...,LINESTRING (-2116216.80471594 4612656.45176858...,2,1011332,433.102,228.945,120.212,1.698,142.061,330.650,1


In [236]:
taz_drive_cc_group2_gdf[taz_drive_cc_group2_gdf.B == 300963].A.tolist()

[2579949, 2563374, 2604856, 2607569, 2504237]

In [237]:
taz_drive_cc_group2_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 790 entries, 0 to 844
Data columns (total 10 columns):
u                     790 non-null object
shst_node_id          790 non-null object
A                     790 non-null object
geometry_ld           790 non-null object
B                     790 non-null int64
geometry_c            790 non-null object
geometry              790 non-null object
fromIntersectionId    790 non-null object
shstGeometryId        790 non-null object
id                    790 non-null object
dtypes: int64(1), object(9)
memory usage: 67.9+ KB


In [238]:
taz_drive_cc_group1_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 27013 entries, 0 to 31099
Data columns (total 10 columns):
u                     27013 non-null object
shst_node_id          27013 non-null object
A                     27013 non-null object
geometry_ld           27013 non-null object
B                     27013 non-null int64
geometry_c            27013 non-null object
geometry              27013 non-null object
fromIntersectionId    27013 non-null object
shstGeometryId        27013 non-null object
id                    27013 non-null object
dtypes: int64(1), object(9)
memory usage: 2.3+ MB


In [239]:
taz_drive_cc_gdf = pd.concat(
    [taz_drive_cc_group1_gdf, taz_drive_cc_group2_gdf],
    sort = False,
    ignore_index = True
)

taz_drive_cc_gdf.drop_duplicates(subset = ["A", "B"], inplace = True)

taz_drive_cc_gdf["shstGeometryId"] = range(1, 1+len(taz_drive_cc_gdf))
taz_drive_cc_gdf["shstGeometryId"] = taz_drive_cc_gdf["shstGeometryId"].apply(lambda x: "drive_cc" + str(x))
taz_drive_cc_gdf["id"] = taz_drive_cc_gdf["shstGeometryId"]

taz_drive_centroid_gdf = pd.concat(
    [taz_drive_centroid_group1_gdf, taz_drive_centroid_group2_gdf],
    sort = False,
    ignore_index = True
)

taz_drive_centroid_gdf.drop_duplicates(subset = ["model_node_id"], inplace = True)

In [240]:
taz_drive_cc_gdf.head(3)

Unnamed: 0,u,shst_node_id,A,geometry_ld,B,geometry_c,geometry,fromIntersectionId,shstGeometryId,id
0,1723563906.0,c4f793a9cc09e7ea16381ca307a17bab,1001671,POINT (-2116707.794831044 4613515.833833667),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.4263017 37.7697203, -122.4235...",c4f793a9cc09e7ea16381ca307a17bab,drive_cc1,drive_cc1
1,5437876960.0,0166709338db50c787ff1f0cbde03108,1024255,POINT (-2116335.318325921 4613451.919851268),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.4224009 37.7702293, -122.4235...",0166709338db50c787ff1f0cbde03108,drive_cc2,drive_cc2
2,65292019.0,47607b5b93cfe3112c30ca77905ed17c,1007266,POINT (-2116776.151573973 4613447.413254976),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.4267436 37.76899790000001, -1...",47607b5b93cfe3112c30ca77905ed17c,drive_cc3,drive_cc3


In [241]:
taz_drive_cc_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 27727 entries, 0 to 27802
Data columns (total 10 columns):
u                     27727 non-null object
shst_node_id          27727 non-null object
A                     27727 non-null object
geometry_ld           27727 non-null object
B                     27727 non-null int64
geometry_c            27727 non-null object
geometry              27727 non-null object
fromIntersectionId    27727 non-null object
shstGeometryId        27727 non-null object
id                    27727 non-null object
dtypes: int64(1), object(9)
memory usage: 2.3+ MB


In [242]:
keep_taz_drive_cc_gdf = get_non_near_connectors(taz_drive_cc_gdf)

keep_taz_drive_cc_gdf = taz_drive_cc_gdf[taz_drive_cc_gdf.id.isin(keep_taz_drive_cc_gdf.id)].copy()

In [243]:
keep_taz_drive_cc_gdf.shape

(16480, 10)

In [244]:
keep_taz_drive_cc_gdf.shape

(16480, 10)

In [245]:
keep_taz_drive_cc_gdf.head(3)

Unnamed: 0,u,shst_node_id,A,geometry_ld,B,geometry_c,geometry,fromIntersectionId,shstGeometryId,id
0,1723563906.0,c4f793a9cc09e7ea16381ca307a17bab,1001671,POINT (-2116707.794831044 4613515.833833667),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.4263017 37.7697203, -122.4235...",c4f793a9cc09e7ea16381ca307a17bab,drive_cc1,drive_cc1
1,5437876960.0,0166709338db50c787ff1f0cbde03108,1024255,POINT (-2116335.318325921 4613451.919851268),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.4224009 37.7702293, -122.4235...",0166709338db50c787ff1f0cbde03108,drive_cc2,drive_cc2
5,378431868.0,e7e0b5fba6a4d11dd2407eb28bf4e90f,1021297,POINT (-2116405.388054292 4613642.497189956),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.423745 37.7715348, -122.42355...",e7e0b5fba6a4d11dd2407eb28bf4e90f,drive_cc6,drive_cc6


In [246]:
keep_taz_drive_cc_gdf.to_pickle("../../data/interim/step7_centroid_connector/taz_drive_cc.pickle")
taz_drive_centroid_gdf.to_pickle("../../data/interim/step7_centroid_connector/taz_drive_centroid.pickle")

In [247]:
keep_taz_drive_cc_gdf.id.nunique()

16480

In [248]:
keep_taz_drive_cc_gdf.B.nunique()

4756

In [249]:
taz_drive_centroid_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 4756 entries, 0 to 4920
Data columns (total 2 columns):
model_node_id    4756 non-null int64
geometry         4756 non-null object
dtypes: int64(1), object(1)
memory usage: 111.5+ KB


In [250]:
gpd.GeoDataFrame(keep_taz_drive_cc_gdf[["geometry", "id"]],
                                    geometry = keep_taz_drive_cc_gdf['geometry'], crs = {"init" : "epsg:4326"}).to_file(
    "../../data/interim/step7_centroid_connector/taz_drive.geojson",
                        driver = "GeoJSON")

PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'd:\\github\\lfs-cleaning\\travel-model-two-networks\\data\\interim\\step7_centroid_connector\\taz_drive.geojson'

walk_node_gdf = node_gdf[(node_gdf.walk_access == 1) & (node_gdf.rail_only != 1)]

new_walk_cc_gdf, new_walk_centroid_gdf = generate_centroid_connectors('walk',
                                                                        taz_connectors_gdf,
                                                                        walk_node_gdf, 
                                                                        taz_node_gdf)

new_walk_centroid_gdf
new_walk_cc_gdf.info()

bike_node_gdf = node_gdf[(node_gdf.bike_access == 1)]

new_bike_cc_gdf, new_bike_centroid_gdf = generate_centroid_connectors('bike',
                                                                        taz_connectors_gdf,
                                                                        bike_node_gdf, 
                                                                        taz_node_gdf)

new_bike_centroid_gdf.info()

new_bike_cc_gdf.info()

In [251]:
taz_cc_link_df, taz_cc_shape_gdf = consolidate_cc(link_gdf,
                                             taz_drive_centroid_gdf,
                                             node_gdf,
                                             keep_taz_drive_cc_gdf)

In [252]:
keep_taz_drive_cc_gdf.head(3)

Unnamed: 0,u,shst_node_id,A,geometry_ld,B,geometry_c,geometry,fromIntersectionId,shstGeometryId,id
0,1723563906.0,c4f793a9cc09e7ea16381ca307a17bab,1001671,POINT (-2116707.794831044 4613515.833833667),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.4263017 37.7697203, -122.4235...",c4f793a9cc09e7ea16381ca307a17bab,drive_cc1,drive_cc1
1,5437876960.0,0166709338db50c787ff1f0cbde03108,1024255,POINT (-2116335.318325921 4613451.919851268),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.4224009 37.7702293, -122.4235...",0166709338db50c787ff1f0cbde03108,drive_cc2,drive_cc2
5,378431868.0,e7e0b5fba6a4d11dd2407eb28bf4e90f,1021297,POINT (-2116405.388054292 4613642.497189956),1,POINT (-2116430.484461717 4613514.38417453),"LINESTRING (-122.423745 37.7715348, -122.42355...",e7e0b5fba6a4d11dd2407eb28bf4e90f,drive_cc6,drive_cc6


In [253]:
taz_cc_shape_gdf.id.nunique()

16480

In [254]:
taz_cc_shape_gdf

Unnamed: 0,id,geometry,fromIntersectionId,toIntersectionId
0,cc661,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,
1,cc830,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,
2,cc1710,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,
3,cc323,"LINESTRING (-122.4035822 37.73440549999999, -1...",5c7c469988248e3572b82b39c894dfd6,
4,cc494,"LINESTRING (-122.4231171 37.73735660000001, -1...",7eaf8516bbeb338c7fc1a89ec32accd2,
5,cc501,"LINESTRING (-122.4231171 37.73735660000001, -1...",7eaf8516bbeb338c7fc1a89ec32accd2,
6,cc1787,"LINESTRING (-122.4072972 37.7820986, -122.4055...",372e280074d00e16ebdcfa6cc52f8845,
7,cc983,"LINESTRING (-122.4178959 37.7384283, -122.4183...",bf84502e995a4ef9d27b4495c35a4584,
8,cc1078,"LINESTRING (-122.4238616 37.80623219999999, -1...",10b851e25ff33b8c166778b14d5d6fed,
9,cc1199,"LINESTRING (-122.427484 37.7705796, -122.42694...",353d40605cbeca10c486013810a08663,


In [255]:
taz_cc_link_df.shape

(32960, 11)

In [256]:
taz_cc_shape_gdf = gpd.GeoDataFrame(taz_cc_shape_gdf, geometry = taz_cc_shape_gdf['geometry'], crs = {"init" : "epsg:4326"})

taz_cc_shape_gdf.to_file("../../data/interim/step7_centroid_connector/taz.geojson",
                        driver = "GeoJSON")

# get minimum bounding box around polygon
box = polygon.minimum_rotated_rectangle

# get coordinates of polygon vertices
x, y = box.exterior.coords.xy

# get length of bounding box edges
edge_length = (Point(x[0], y[0]).distance(Point(x[1], y[1])), Point(x[1], y[1]).distance(Point(x[2], y[2])))

# get length of polygon as the longest edge of the bounding box
length = max(edge_length)

# get width of polygon as the shortest edge of the bounding box
width = min(edge_length)

# MAZ connectors

In [145]:
maz_poly_buffer1_gdf = maz_poly_gdf.copy()
maz_poly_buffer2_gdf = maz_poly_gdf.copy()

In [146]:
maz_poly_buffer1_gdf["geometry_buffer"] = maz_poly_buffer1_gdf["geometry"].apply(lambda x: buffer1(x))
maz_poly_buffer2_gdf["geometry_buffer"] = maz_poly_buffer2_gdf["geometry"].apply(lambda x: buffer2(x))

In [147]:
maz_poly_buffer1_gdf.rename(columns = {"geometry" : "geometry_orig", "geometry_buffer" : "geometry"}, inplace = True)

maz_buffer1_node_two_geometry_df = gpd.sjoin(node_two_geometry_df, maz_poly_buffer1_gdf[["geometry", "maz"]], 
                                             how = "left", op = "intersects")

In [148]:
maz_buffer1_node_two_geometry_df.sort_values(by = "maz")

Unnamed: 0,model_node_id,shstGeometryId,osm_node_id,shst_node_id,geometry,index_right,maz
1285,1004522,2,65288883.000,1bb27779a751a0f42dd9b7c3650c31b0,POINT (-122.4404764 37.7493034),0.000,10001.000
2449,1008745,2,65303941.000,95fc2fbc1a4d2c1c4ad5540bd867d923,POINT (-122.4406333 37.7509038),0.000,10001.000
1440,1005165,2,65288881.000,565417a8d1516ac88d1d9c0b33724ffc,POINT (-122.4393904 37.7493692),1.000,10002.000
7056,1025630,2,599157363.000,c5c4c83556a3c226a57f083ff579ae05,POINT (-122.4356195 37.7560731),2.000,10003.000
530,1001867,2,601241643.000,2e1cc2f9a9546a92c603bd8c44432e64,POINT (-122.4345605 37.7560113),4.000,10005.000
2545,1009069,2,65356357.000,15ecec445ac198aa4921fe7c5715a3ee,POINT (-122.434723 37.745612),6.000,10007.000
4881,1017037,2,65327268.000,25ca021446359e3b52e35210fa3f162d,POINT (-122.4336547 37.7464603),6.000,10007.000
6957,1024381,2,2371453146.000,b7f5d312c2cb22678eaac84a2dddb888,POINT (-122.432612 37.746604),6.000,10007.000
282,1000975,2,2371453143.000,20e8eb7d367d016c19a8edbb79024cca,POINT (-122.4326044 37.7465216),6.000,10007.000
1776,1006402,2,65306688.000,9c09c43259ea73cd03d8eefdfb1684cf,POINT (-122.4346224 37.7432226),7.000,10009.000


In [149]:
%%time

maz_buffer1_node_two_geometry_df.rename(columns = {"maz" : "c"}, inplace = True)

maz_drive_cc_group1_gdf, maz_drive_centroid_group1_gdf = generate_centroid_connectors('drive',
                                                                        maz_drop_long_connectors_gdf,
                                                                        maz_buffer1_node_two_geometry_df,
                                                                        existing_network_node_gdf)

Wall time: 22min 34s


In [150]:
maz_buffer1_node_two_geometry_df[maz_buffer1_node_two_geometry_df.model_node_id == 1005079]

Unnamed: 0,model_node_id,shstGeometryId,osm_node_id,shst_node_id,geometry,index_right,c
1418,1005079,2,4000209750.0,e15f4fe887edc6f09cbacdeef040904f,POINT (-122.4166814 37.7507448),2415.0,14214.0
1418,1005079,2,4000209750.0,e15f4fe887edc6f09cbacdeef040904f,POINT (-122.4166814 37.7507448),2413.0,14211.0


In [151]:
maz_drive_cc_group1_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 71467 entries, 0 to 108276
Data columns (total 10 columns):
u                     71467 non-null object
shst_node_id          71467 non-null object
A                     71467 non-null object
geometry_ld           71467 non-null object
B                     71467 non-null int64
geometry_c            71467 non-null object
geometry              71467 non-null object
fromIntersectionId    71467 non-null object
shstGeometryId        71467 non-null object
id                    71467 non-null object
dtypes: int64(1), object(9)
memory usage: 6.0+ MB


In [152]:
maz_buffer1_node_two_geometry_df[maz_buffer1_node_two_geometry_df.c == 10001]

Unnamed: 0,model_node_id,shstGeometryId,osm_node_id,shst_node_id,geometry,index_right,c
1285,1004522,2,65288883.0,1bb27779a751a0f42dd9b7c3650c31b0,POINT (-122.4404764 37.7493034),0.0,10001.0
2449,1008745,2,65303941.0,95fc2fbc1a4d2c1c4ad5540bd867d923,POINT (-122.4406333 37.7509038),0.0,10001.0


In [153]:
maz_drive_cc_count_df = maz_drive_cc_group1_gdf.groupby(["B"])["u"].count().reset_index()

maz_group2_list = maz_drive_cc_count_df[maz_drive_cc_count_df.u < 2].B.tolist() + \
[c for c in maz_node_gdf.N.tolist() if c not in maz_drive_cc_count_df.B.tolist()]

In [154]:
maz_drive_cc_count_df[maz_drive_cc_count_df.u >= 2].B.nunique()

22596

In [257]:
len(maz_group2_list)

17130

In [258]:
%%time

exclude_links_df = link_gdf[link_gdf.roadway.isin(["motorway_link", "motorway", "trunk", "trunk_link"])]

drive_node_gdf = node_gdf[(node_gdf.drive_access == 1) & 
                          ~(node_gdf.osm_node_id.isin(exclude_links_df.u.tolist() + 
                              exclude_links_df.v.tolist())) &
                         (node_gdf.model_node_id.isin(assignable_link_gdf.A.tolist() + 
                                                     assignable_link_gdf.A.tolist()))].copy()

maz_drive_cc_group2_gdf, maz_drive_centroid_group2_gdf = generate_centroid_connectors('drive',
                                                                        maz_drop_long_connectors_gdf[maz_drop_long_connectors_gdf.c.isin(maz_group2_list)],
                                                                        drive_node_gdf,
                                                                        existing_network_node_gdf)

Wall time: 54min 4s


In [259]:
maz_drive_cc_group2_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 28592 entries, 0 to 41434
Data columns (total 10 columns):
u                     28592 non-null object
shst_node_id          28592 non-null object
A                     28592 non-null object
geometry_ld           28592 non-null object
B                     28592 non-null int64
geometry_c            28592 non-null object
geometry              28592 non-null object
fromIntersectionId    28592 non-null object
shstGeometryId        28592 non-null object
id                    28592 non-null object
dtypes: int64(1), object(9)
memory usage: 2.4+ MB


In [260]:
maz_drive_cc_gdf = pd.concat(
    [maz_drive_cc_group1_gdf, maz_drive_cc_group2_gdf],
    sort = False,
    ignore_index = True
)

maz_drive_cc_gdf.drop_duplicates(subset = ["A", "B"], inplace = True)

maz_drive_centroid_gdf = pd.concat(
    [maz_drive_centroid_group1_gdf, maz_drive_centroid_group2_gdf],
    sort = False,
    ignore_index = True
)

maz_drive_centroid_gdf.drop_duplicates(subset = ["model_node_id"], inplace = True)

In [261]:
maz_drive_cc_gdf.head(3)

Unnamed: 0,u,shst_node_id,A,geometry_ld,B,geometry_c,geometry,fromIntersectionId,shstGeometryId,id
0,65288883.0,1bb27779a751a0f42dd9b7c3650c31b0,1004522,POINT (-2118792.35268053 4611634.728435045),10001,POINT (-2118816.58475166 4611740.259631149),"LINESTRING (-122.4404764 37.7493034, -122.4410...",1bb27779a751a0f42dd9b7c3650c31b0,cc1,cc1
1,65288881.0,565417a8d1516ac88d1d9c0b33724ffc,1005165,POINT (-2118691.601076762 4611608.276112062),10002,POINT (-2118617.785452967 4611688.247021644),"LINESTRING (-122.4393904 37.7493692, -122.4389...",565417a8d1516ac88d1d9c0b33724ffc,cc3,cc3
2,599157363.0,c5c4c83556a3c226a57f083ff579ae05,1025630,POINT (-2118086.908593064 4612253.551938111),10003,POINT (-2118117.395978091 4612355.835871712),"LINESTRING (-122.4356195 37.75607309999999, -1...",c5c4c83556a3c226a57f083ff579ae05,cc5,cc5


In [262]:
maz_drive_cc_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 92228 entries, 0 to 100055
Data columns (total 10 columns):
u                     92228 non-null object
shst_node_id          92228 non-null object
A                     92228 non-null object
geometry_ld           92228 non-null object
B                     92228 non-null int64
geometry_c            92228 non-null object
geometry              92228 non-null object
fromIntersectionId    92228 non-null object
shstGeometryId        92228 non-null object
id                    92228 non-null object
dtypes: int64(1), object(9)
memory usage: 7.7+ MB


In [124]:
maz_drive_cc_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 98881 entries, 0 to 105259
Data columns (total 10 columns):
u                     98881 non-null object
shst_node_id          98881 non-null object
A                     98881 non-null object
geometry_ld           98881 non-null object
B                     98881 non-null int64
geometry_c            98881 non-null object
geometry              98881 non-null object
fromIntersectionId    98881 non-null object
shstGeometryId        98881 non-null object
id                    98881 non-null object
dtypes: int64(1), object(9)
memory usage: 8.3+ MB


In [263]:
keep_maz_drive_cc_gdf = get_non_near_connectors(maz_drive_cc_gdf)

keep_maz_drive_cc_gdf = maz_drive_cc_gdf[maz_drive_cc_gdf.id.isin(keep_maz_drive_cc_gdf.id)].copy()

In [267]:
keep_maz_drive_cc_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_drive_cc.pickle")
maz_drive_centroid_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_drive_centroid.pickle")

%%time

exclude_links_df = link_gdf[link_gdf.roadway.isin(["motorway_link", "motorway", "trunk", "truck_link"])]

drive_node_gdf = node_gdf[(node_gdf.drive_access == 1) & 
                          ~(node_gdf.osm_node_id.isin(exclude_links_df.u.tolist() + 
                              exclude_links_df.v.tolist())) &
                         (node_gdf.model_node_id.isin(two_assignable_way_node_list))].copy()

maz_drive_cc_gdf, maz_drive_centroid_gdf = generate_centroid_connectors('drive',
                                                                        maz_drop_long_connectors_gdf,
                                                                        drive_node_gdf, 
                                                                        existing_network_node_gdf)

In [264]:
keep_maz_drive_cc_gdf.crs

{'init': 'epsg:4326', 'no_defs': True}

In [265]:
keep_maz_drive_cc_gdf.B.nunique()

39721

In [266]:
maz_drive_centroid_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 39721 entries, 0 to 51455
Data columns (total 2 columns):
model_node_id    39721 non-null int64
geometry         39721 non-null object
dtypes: int64(1), object(1)
memory usage: 931.0+ KB


In [268]:
keep_maz_drive_cc_gdf[["geometry"]].to_file("../../data/interim/step7_centroid_connector/maz_drive.geojson",
                        driver = "GeoJSON")

In [269]:
maz_drop_long_connectors_gdf.c.min()

10001

In [130]:
%%time

walk_node_gdf = node_gdf[(node_gdf.walk_access == 1) & (node_gdf.rail_only != 1)]

maz_walk_cc_gdf, maz_walk_centroid_gdf = generate_centroid_connectors('walk',
                                                                        maz_drop_long_connectors_gdf,
                                                                        walk_node_gdf, 
                                                                        maz_node_gdf)

Wall time: 2h 11min 45s


In [131]:
maz_walk_cc_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_walk_cc.pickle")
maz_walk_centroid_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_walk_centroid.pickle")

In [132]:
%%time

bike_node_gdf = node_gdf[(node_gdf.bike_access == 1)]

maz_bike_cc_gdf, maz_bike_centroid_gdf = generate_centroid_connectors('bike',
                                                                        maz_drop_long_connectors_gdf,
                                                                        bike_node_gdf, 
                                                                        maz_node_gdf)

Wall time: 2h 2min 42s


In [133]:
maz_bike_cc_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_bike_cc.pickle")
maz_bike_centroid_gdf.to_pickle("../../data/interim/step7_centroid_connector/maz_bike_centroid.pickle")

In [134]:
maz_bike_cc_gdf[["geometry"]].to_file("../../data/interim/step7_centroid_connector/maz_bike.geojson",
                        driver = "GeoJSON")
maz_walk_cc_gdf[["geometry"]].to_file("../../data/interim/step7_centroid_connector/maz_walk.geojson",
                        driver = "GeoJSON")

In [135]:
print(maz_walk_centroid_gdf.shape)
maz_walk_cc_gdf.info()

(39726, 2)
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 198630 entries, 0 to 198629
Data columns (total 10 columns):
u                     198630 non-null float64
shst_node_id          198630 non-null object
A                     198630 non-null int64
geometry_ld           198630 non-null object
B                     198630 non-null int64
geometry_c            198630 non-null object
geometry              198630 non-null object
fromIntersectionId    198630 non-null object
shstGeometryId        198630 non-null object
id                    198630 non-null object
dtypes: float64(1), int64(2), object(7)
memory usage: 16.7+ MB


In [136]:
maz_walk_cc_gdf.B.nunique()

39726

In [137]:
print(maz_bike_centroid_gdf.shape)
maz_bike_cc_gdf.info()

(39726, 2)
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 198630 entries, 0 to 198629
Data columns (total 10 columns):
u                     198630 non-null float64
shst_node_id          198630 non-null object
A                     198630 non-null int64
geometry_ld           198630 non-null object
B                     198630 non-null int64
geometry_c            198630 non-null object
geometry              198630 non-null object
fromIntersectionId    198630 non-null object
shstGeometryId        198630 non-null object
id                    198630 non-null object
dtypes: float64(1), int64(2), object(7)
memory usage: 16.7+ MB


In [143]:
# leave previously disconnected 5 MAZs disconnected

maz_bike_cc_gdf = maz_bike_cc_gdf[~maz_bike_cc_gdf.B.isin([10186, 16084, 111432, 111433, 411178])]
maz_walk_cc_gdf = maz_walk_cc_gdf[~maz_walk_cc_gdf.B.isin([10186, 16084, 111432, 111433, 411178])]

In [257]:
keep_maz_drive_cc_gdf[keep_maz_drive_cc_gdf.A.isin([10186, 16084, 111432, 111433, 411178])]

Unnamed: 0,u,shst_node_id,A,geometry_ld,B,geometry_c,geometry,fromIntersectionId,shstGeometryId,id


In [150]:
maz_drive_centroid_gdf[maz_drive_centroid_gdf.model_node_id.isin([10186, 16084, 111432, 111433, 411178])]

Unnamed: 0,model_node_id,geometry


In [187]:
maz_walk_cc_gdf = pd.read_pickle("../../data/interim/step7_centroid_connector/maz_walk_cc.pickle")
maz_walk_centroid_gdf = pd.read_pickle("../../data/interim/step7_centroid_connector/maz_walk_centroid.pickle")

maz_bike_cc_gdf = pd.read_pickle("../../data/interim/step7_centroid_connector/maz_bike_cc.pickle")

In [270]:
maz_bike_cc_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 198630 entries, 0 to 198629
Data columns (total 10 columns):
u                     198630 non-null float64
shst_node_id          198630 non-null object
A                     198630 non-null int64
geometry_ld           198630 non-null object
B                     198630 non-null int64
geometry_c            198630 non-null object
geometry              198630 non-null object
fromIntersectionId    198630 non-null object
shstGeometryId        198630 non-null object
id                    198630 non-null object
dtypes: float64(1), int64(2), object(7)
memory usage: 16.7+ MB


In [271]:
maz_cc_link_df, maz_cc_shape_gdf = consolidate_cc(link_gdf,
                                             maz_drive_centroid_gdf,
                                             node_gdf,
                                             keep_maz_drive_cc_gdf, 
                                             maz_walk_cc_gdf, 
                                             maz_bike_cc_gdf, 
                                             )

In [272]:
maz_cc_link_df.info()
maz_cc_shape_gdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 531534 entries, 0 to 531533
Data columns (total 11 columns):
A                     531534 non-null int64
B                     531534 non-null int64
drive_access          175936 non-null float64
walk_access           510204 non-null float64
bike_access           506798 non-null float64
shstGeometryId        531534 non-null object
id                    531534 non-null object
u                     265767 non-null float64
v                     265767 non-null float64
fromIntersectionId    265767 non-null object
toIntersectionId      265767 non-null object
dtypes: float64(5), int64(2), object(4)
memory usage: 44.6+ MB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 265767 entries, 0 to 265766
Data columns (total 4 columns):
id                    265767 non-null object
geometry              265767 non-null object
fromIntersectionId    265767 non-null object
toIntersectionId      0 non-null object
dtypes: object(4)
memory usage: 10.1+ MB


In [273]:
maz_cc_shape_gdf["id"] = "maz_" + maz_cc_shape_gdf["id"]
maz_cc_shape_gdf["shstGeometryId"] = maz_cc_shape_gdf["id"]
maz_cc_link_df["id"] = "maz_" + maz_cc_link_df["id"]
maz_cc_link_df["shstGeometryId"] = maz_cc_link_df["id"]
maz_cc_link_df["roadway"] = "maz"

In [274]:
taz_cc_shape_gdf["id"] = "taz_" + taz_cc_shape_gdf["id"]
taz_cc_shape_gdf["shstGeometryId"] = taz_cc_shape_gdf["id"]
taz_cc_link_df["id"] = "taz_" + taz_cc_link_df["id"]
taz_cc_link_df["shstGeometryId"] = taz_cc_link_df["id"]
taz_cc_link_df["roadway"] = "taz"

In [275]:
taz_cc_shape_gdf

Unnamed: 0,id,geometry,fromIntersectionId,toIntersectionId,shstGeometryId
0,taz_cc661,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,taz_cc661
1,taz_cc830,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,taz_cc830
2,taz_cc1710,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,taz_cc1710
3,taz_cc323,"LINESTRING (-122.4035822 37.73440549999999, -1...",5c7c469988248e3572b82b39c894dfd6,,taz_cc323
4,taz_cc494,"LINESTRING (-122.4231171 37.73735660000001, -1...",7eaf8516bbeb338c7fc1a89ec32accd2,,taz_cc494
5,taz_cc501,"LINESTRING (-122.4231171 37.73735660000001, -1...",7eaf8516bbeb338c7fc1a89ec32accd2,,taz_cc501
6,taz_cc1787,"LINESTRING (-122.4072972 37.7820986, -122.4055...",372e280074d00e16ebdcfa6cc52f8845,,taz_cc1787
7,taz_cc983,"LINESTRING (-122.4178959 37.7384283, -122.4183...",bf84502e995a4ef9d27b4495c35a4584,,taz_cc983
8,taz_cc1078,"LINESTRING (-122.4238616 37.80623219999999, -1...",10b851e25ff33b8c166778b14d5d6fed,,taz_cc1078
9,taz_cc1199,"LINESTRING (-122.427484 37.7705796, -122.42694...",353d40605cbeca10c486013810a08663,,taz_cc1199


In [276]:
maz_cc_shape_gdf

Unnamed: 0,id,geometry,fromIntersectionId,toIntersectionId,shstGeometryId
0,maz_cc1700,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,maz_cc1700
1,maz_cc93326,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,maz_cc93326
2,maz_cc4885,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,maz_cc4885
3,maz_cc1147,"LINESTRING (-122.4035822 37.73440549999999, -1...",5c7c469988248e3572b82b39c894dfd6,,maz_cc1147
4,maz_cc94193,"LINESTRING (-122.4035822 37.73440549999999, -1...",5c7c469988248e3572b82b39c894dfd6,,maz_cc94193
5,maz_cc634,"LINESTRING (-122.4231171 37.73735660000001, -1...",7eaf8516bbeb338c7fc1a89ec32accd2,,maz_cc634
6,maz_cc91938,"LINESTRING (-122.4231171 37.73735660000001, -1...",7eaf8516bbeb338c7fc1a89ec32accd2,,maz_cc91938
7,maz_cc2294,"LINESTRING (-122.4231171 37.73735660000001, -1...",7eaf8516bbeb338c7fc1a89ec32accd2,,maz_cc2294
8,maz_cc89374,"LINESTRING (-122.3999294 37.72571270000001, -1...",798cae7a1d2ac00f3460b6f1452ee01d,,maz_cc89374
9,maz_cc88470,"LINESTRING (-122.4718894 37.75600090000002, -1...",2ceb892d1ee28b7a727941abe880a46f,,maz_cc88470


# number centroid connectors

In [277]:
all_cc_shape_gdf = pd.concat(
    [taz_cc_shape_gdf, maz_cc_shape_gdf],
    sort = False,
    ignore_index = True
)

In [278]:
all_cc_shape_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 282247 entries, 0 to 282246
Data columns (total 5 columns):
id                    282247 non-null object
geometry              282247 non-null object
fromIntersectionId    282247 non-null object
toIntersectionId      0 non-null object
shstGeometryId        282247 non-null object
dtypes: object(5)
memory usage: 10.8+ MB


In [279]:
node_gdf.crs

{'init': 'epsg:4326', 'no_defs': True}

In [280]:
all_cc_shape_gdf = pd.merge(
    all_cc_shape_gdf,
    node_gdf[["shst_node_id", "county"]].rename(columns = {"shst_node_id" : "fromIntersectionId"}),
    how = "left",
    on = "fromIntersectionId"
)

In [281]:
all_cc_shape_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 282247 entries, 0 to 282246
Data columns (total 6 columns):
id                    282247 non-null object
geometry              282247 non-null object
fromIntersectionId    282247 non-null object
toIntersectionId      0 non-null object
shstGeometryId        282247 non-null object
county                282247 non-null object
dtypes: object(6)
memory usage: 15.1+ MB


In [282]:
all_cc_shape_gdf.county.value_counts()

Santa Clara      67566
Alameda          59561
Contra Costa     42265
San Mateo        29733
San Francisco    26608
Sonoma           20503
Solano           19440
Marin            10108
Napa              6463
Name: county, dtype: int64

In [283]:
all_cc_shape_gdf = gpd.GeoDataFrame(all_cc_shape_gdf, geometry = all_cc_shape_gdf.geometry, crs = shape_gdf.crs)

all_cc_shape_gdf = gpd.GeoDataFrame(all_cc_shape_gdf, 
                                             geometry = all_cc_shape_gdf.geometry, 
                                             crs = shape_gdf.crs)

all_cc_shape_gdf = all_cc_shape_gdf.to_crs({'init' : 'epsg:26915'})

all_cc_shape_gdf['X'] = all_cc_shape_gdf['geometry'].apply(lambda p: p.centroid.x)
all_cc_shape_gdf['Y'] = all_cc_shape_gdf['geometry'].apply(lambda p: p.centroid.y)

all_cc_shape_county_gdf = pd.DataFrame()

node_county_matched_gdf = node_gdf.copy()

node_county_matched_gdf = node_county_matched_gdf.to_crs({'init' : 'epsg:26915'})

node_county_matched_gdf['X'] = node_county_matched_gdf.geometry.map(lambda g:g.x)
node_county_matched_gdf['Y'] = node_county_matched_gdf.geometry.map(lambda g:g.y)

node_matched_inventory_ref = node_county_matched_gdf[['X', 'Y']].values
node_matched_tree = cKDTree(node_matched_inventory_ref)

for i in range(len(all_cc_shape_gdf)):
    point = all_cc_shape_gdf.iloc[i][['X', 'Y']].values
    dd, ii = node_matched_tree.query(point, k = 1)
    add_snap_gdf = gpd.GeoDataFrame(node_county_matched_gdf.iloc[ii][["county"]]).transpose().reset_index(drop = True)
    
    add_snap_gdf = pd.concat([add_snap_gdf,
                              all_cc_shape_gdf.iloc[[i]].reset_index(drop = True)], 
                               axis = 1) 
    
    if i == 0:
        all_cc_shape_county_gdf = add_snap_gdf.copy()
    else:
        all_cc_shape_county_gdf = all_cc_shape_county_gdf.append(add_snap_gdf, ignore_index=True, sort=False)

In [284]:
all_cc_link_df = pd.concat(
    [taz_cc_link_df, maz_cc_link_df],
    sort = False,
    ignore_index = True
)

all_cc_link_df = pd.merge(
    all_cc_link_df,
    all_cc_shape_gdf[["id", "county"]],
    how = "left",
    on = ["id"]
)

all_cc_link_df["drive_access"].fillna(0, inplace = True)
all_cc_link_df["bike_access"].fillna(0, inplace = True)
all_cc_link_df["walk_access"].fillna(0, inplace = True)

all_cc_link_df["drive_access"] = all_cc_link_df["drive_access"].astype(int)
all_cc_link_df["bike_access"] = all_cc_link_df["bike_access"].astype(int)
all_cc_link_df["walk_access"] = all_cc_link_df["walk_access"].astype(int)

all_cc_link_df["rail_only"] = 0

In [285]:
#get the last node and link number of counties

county_last_link_id_df = link_df.groupby("county")["model_link_id"].max().reset_index().rename(
    columns = {"model_link_id" : "county_last_id"})

all_cc_link_df = pd.merge(
    all_cc_link_df,
    county_last_link_id_df,
    how = "left",
    on = "county"
)

all_cc_link_df["model_link_id"] = all_cc_link_df.groupby(["county"]).cumcount() + 1

all_cc_link_df["model_link_id"] = all_cc_link_df["model_link_id"] + all_cc_link_df["county_last_id"]

In [286]:
all_centroid_node_gdf = pd.concat(
    [taz_drive_centroid_gdf, maz_walk_centroid_gdf],
    sort = False,
    ignore_index = True
)

all_centroid_node_gdf["drive_access"] = 1
all_centroid_node_gdf['walk_access'] = 1
all_centroid_node_gdf['bike_access'] = 1
all_centroid_node_gdf['rail_only'] = 0

In [287]:
all_centroid_node_gdf.model_node_id.nunique()

44482

In [288]:
all_centroid_node_gdf

Unnamed: 0,model_node_id,geometry,drive_access,walk_access,bike_access,rail_only
0,1,POINT (-122.4235552308525 37.7704613164878),1,1,1,0
1,2,POINT (-122.4185110411505 37.76430798097193),1,1,1,0
2,3,POINT (-122.424376790239 37.76776767513344),1,1,1,0
3,4,POINT (-122.3723843332867 37.82546378616041),1,1,1,0
4,5,POINT (-122.4275951118974 37.7665410065728),1,1,1,0
5,6,POINT (-122.4277080093626 37.76375177913884),1,1,1,0
6,7,POINT (-122.4303984149702 37.76540237440019),1,1,1,0
7,8,POINT (-122.4361556003588 37.76163121820408),1,1,1,0
8,9,POINT (-122.4359169177398 37.75922606474905),1,1,1,0
9,10,POINT (-122.4379402255995 37.75731763424209),1,1,1,0


In [289]:
all_centroid_node_gdf.model_node_id.nunique()

44482

In [290]:
node_gdf.columns

Index(['osm_node_id', 'shst_node_id', 'county', 'drive_access', 'walk_access',
       'bike_access', 'model_node_id', 'rail_only', 'geometry'],
      dtype='object')

In [291]:
all_cc_link_df

Unnamed: 0,A,B,drive_access,walk_access,bike_access,shstGeometryId,id,u,v,fromIntersectionId,toIntersectionId,roadway,county,rail_only,county_last_id,model_link_id
0,1000000,205,1,0,0,taz_cc661,taz_cc661,65308539.000,,55d370526a55a5d348e23751aad86ac0,,taz,San Francisco,0,74758,74759
1,1000000,263,1,0,0,taz_cc830,taz_cc830,65308539.000,,55d370526a55a5d348e23751aad86ac0,,taz,San Francisco,0,74758,74760
2,1000000,544,1,0,0,taz_cc1710,taz_cc1710,65308539.000,,55d370526a55a5d348e23751aad86ac0,,taz,San Francisco,0,74758,74761
3,1000001,98,1,0,0,taz_cc323,taz_cc323,65344363.000,,5c7c469988248e3572b82b39c894dfd6,,taz,San Francisco,0,74758,74762
4,1000003,153,1,0,0,taz_cc494,taz_cc494,65288594.000,,7eaf8516bbeb338c7fc1a89ec32accd2,,taz,San Francisco,0,74758,74763
5,1000003,155,1,0,0,taz_cc501,taz_cc501,65288594.000,,7eaf8516bbeb338c7fc1a89ec32accd2,,taz,San Francisco,0,74758,74764
6,1000010,568,1,0,0,taz_cc1787,taz_cc1787,5429618398.000,,372e280074d00e16ebdcfa6cc52f8845,,taz,San Francisco,0,74758,74765
7,1000021,313,1,0,0,taz_cc983,taz_cc983,65293731.000,,bf84502e995a4ef9d27b4495c35a4584,,taz,San Francisco,0,74758,74766
8,1000025,345,1,0,0,taz_cc1078,taz_cc1078,65297262.000,,10b851e25ff33b8c166778b14d5d6fed,,taz,San Francisco,0,74758,74767
9,1000030,383,1,0,0,taz_cc1199,taz_cc1199,5437461596.000,,353d40605cbeca10c486013810a08663,,taz,San Francisco,0,74758,74768


In [292]:
all_cc_link_df.columns

Index(['A', 'B', 'drive_access', 'walk_access', 'bike_access',
       'shstGeometryId', 'id', 'u', 'v', 'fromIntersectionId',
       'toIntersectionId', 'roadway', 'county', 'rail_only', 'county_last_id',
       'model_link_id'],
      dtype='object')

In [293]:
all_cc_link_df.id.nunique()

282247

In [294]:
all_cc_link_df.model_link_id.nunique()

564494

In [295]:
all_cc_shape_gdf

Unnamed: 0,id,geometry,fromIntersectionId,toIntersectionId,shstGeometryId,county
0,taz_cc661,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,taz_cc661,San Francisco
1,taz_cc830,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,taz_cc830,San Francisco
2,taz_cc1710,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,taz_cc1710,San Francisco
3,taz_cc323,"LINESTRING (-122.4035822 37.73440549999999, -1...",5c7c469988248e3572b82b39c894dfd6,,taz_cc323,San Francisco
4,taz_cc494,"LINESTRING (-122.4231171 37.73735660000001, -1...",7eaf8516bbeb338c7fc1a89ec32accd2,,taz_cc494,San Francisco
5,taz_cc501,"LINESTRING (-122.4231171 37.73735660000001, -1...",7eaf8516bbeb338c7fc1a89ec32accd2,,taz_cc501,San Francisco
6,taz_cc1787,"LINESTRING (-122.4072972 37.7820986, -122.4055...",372e280074d00e16ebdcfa6cc52f8845,,taz_cc1787,San Francisco
7,taz_cc983,"LINESTRING (-122.4178959 37.7384283, -122.4183...",bf84502e995a4ef9d27b4495c35a4584,,taz_cc983,San Francisco
8,taz_cc1078,"LINESTRING (-122.4238616 37.80623219999999, -1...",10b851e25ff33b8c166778b14d5d6fed,,taz_cc1078,San Francisco
9,taz_cc1199,"LINESTRING (-122.427484 37.7705796, -122.42694...",353d40605cbeca10c486013810a08663,,taz_cc1199,San Francisco


In [296]:
all_cc_shape_gdf.id.nunique()

282247

all_link_df = pd.concat([link_df,
                        all_cc_link_df],
                       sort = False,
                       ignore_index = True)

all_shape_gdf = pd.concat([shape_gdf,
                          all_cc_shape_gdf],
                         sort = False,
                         ignore_index = True)

In [297]:
#concat centroid and centroid connectors to network

all_link_df = pd.concat([link_df,
                        all_cc_link_df.drop(['county_last_id'], axis = 1)],
                       sort = False,
                       ignore_index = True)

all_shape_gdf = pd.concat([shape_gdf,
                          all_cc_shape_gdf],
                         sort = False,
                         ignore_index = True)

node_gdf = node_gdf.to_crs(epsg = 4326)

all_node_gdf = pd.concat([node_gdf,
                         all_centroid_node_gdf],
                        sort = False,
                        ignore_index = True)

# add length

In [298]:
all_cc_link_gdf = pd.merge(all_cc_link_df, all_cc_shape_gdf[["id", "geometry"]], how = "left", on = "id")

In [299]:
all_cc_link_gdf[all_cc_link_gdf.geometry.isnull()]

Unnamed: 0,A,B,drive_access,walk_access,bike_access,shstGeometryId,id,u,v,fromIntersectionId,toIntersectionId,roadway,county,rail_only,county_last_id,model_link_id,geometry


In [300]:
all_cc_link_gdf = gpd.GeoDataFrame(all_cc_link_gdf, 
                                   geometry = all_cc_link_gdf["geometry"], 
                                   crs = shape_gdf.crs)

In [301]:
geom_length = all_cc_link_gdf[['geometry']].copy()
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

all_cc_link_gdf["length"] = geom_length["length"]

# add locationreference

In [302]:
all_node_gdf['X'] = all_node_gdf['geometry'].apply(lambda p: p.x)
all_node_gdf['Y'] = all_node_gdf['geometry'].apply(lambda p: p.y)
all_node_gdf['point'] = [list(xy) for xy in zip(all_node_gdf.X, all_node_gdf.Y)]
node_dict = dict(zip(all_node_gdf.model_node_id, all_node_gdf.point))
    
all_cc_link_gdf['A_point'] = all_cc_link_gdf['A'].map(node_dict)
all_cc_link_gdf['B_point'] = all_cc_link_gdf['B'].map(node_dict)
all_cc_link_gdf['locationReferences'] = all_cc_link_gdf.apply(lambda x: [{'sequence':1, 
                                                             'point': x['A_point'],
                                                             'distanceToNextRef':x['length'],
                                                            'bearing' : 0,
                                                             'intersectionId':x['fromIntersectionId']},
                                                                         {'sequence':2, 
                                                             'point': x['B_point'],
                                                             'intersectionId':x['toIntersectionId']}],
                                                   axis = 1)

In [303]:
all_cc_link_gdf.locationReferences.iloc[-1]

[{'sequence': 1,
  'point': [-122.50580293859502, 37.94830473568058],
  'distanceToNextRef': 51.382886808196375,
  'bearing': 0,
  'intersectionId': nan},
 {'sequence': 2,
  'point': [-122.50576480000001, 37.9487303],
  'intersectionId': '44c53879b5a9797cbb2eff72b5f64699'}]

# add shape_id (not necessary for MTC)

all_shape_gdf["shape_id"] = range(1, 1+ len(all_shape_gdf))

shape_id_dict = dict(zip(all_shape_gdf.id, all_shape_gdf.shape_id))

all_link_gdf["shape_id"] = all_link_gdf["id"].map(shape_id_dict)

# write out

In [304]:
all_centroid_node_gdf[all_centroid_node_gdf.model_node_id == 10186]

Unnamed: 0,model_node_id,geometry,drive_access,walk_access,bike_access,rail_only
4855,10186,POINT (-122.3304153039983 37.78318054990585),1,1,1,0


In [305]:
all_centroid_node_gdf.head(3)

Unnamed: 0,model_node_id,geometry,drive_access,walk_access,bike_access,rail_only
0,1,POINT (-122.4235552308525 37.7704613164878),1,1,1,0
1,2,POINT (-122.4185110411505 37.76430798097193),1,1,1,0
2,3,POINT (-122.424376790239 37.76776767513344),1,1,1,0


In [306]:
all_cc_shape_gdf.head(3)

Unnamed: 0,id,geometry,fromIntersectionId,toIntersectionId,shstGeometryId,county
0,taz_cc661,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,taz_cc661,San Francisco
1,taz_cc830,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,taz_cc830,San Francisco
2,taz_cc1710,"LINESTRING (-122.4888978 37.77797810000001, -1...",55d370526a55a5d348e23751aad86ac0,,taz_cc1710,San Francisco


In [307]:
int_col = ["bike_access", "walk_access", "drive_access", "rail_only", "A", "B", "u", "v"]
for c in int_col:
    all_cc_link_gdf[c] = all_cc_link_gdf[c].fillna(0).astype(np.int64)

int_col = ["bike_access", "walk_access", "drive_access", "rail_only"]
for c in int_col:
    all_centroid_node_gdf[c] = all_centroid_node_gdf[c].fillna(0).astype(np.int64)

# write out centroid and centroid connectors seperately

In [296]:
%%time

print("-------write out link shape geojson---------")

shape_prop = ['id', 'fromIntersectionId', 'toIntersectionId']
shape_geojson = link_df_to_geojson(all_cc_shape_gdf, shape_prop)

with open("../../data/interim/step7_centroid_connector/cc_shape.geojson", "w") as f:
    json.dump(shape_geojson, f)

-------write out link shape geojson---------
Wall time: 51 s


In [297]:
%%time

# write out link variable json
# link unique handle "shstReferenceId" + "shstGeometryId"

print("-------write out link json---------")

link_prop = all_cc_link_gdf.drop(["geometry"], axis = 1).columns.tolist()

out = all_cc_link_gdf[link_prop].to_json(orient = "records")

with open("../../data/interim/step7_centroid_connector/cc_link.json", 'w') as f:
    f.write(out)

-------write out link json---------
Wall time: 11.1 s


In [298]:
%%time

print("-------write out node geojson---------")

node_prop = all_centroid_node_gdf.drop(["geometry"], axis = 1).columns.tolist()
node_geojson = point_df_to_geojson(all_centroid_node_gdf, node_prop)

with open("../../data/interim/step7_centroid_connector/centroid_node.geojson", "w") as f:
    json.dump(node_geojson, f)

-------write out node geojson---------
Wall time: 9.23 s


In [308]:
print("-------write out pickle---------")

all_cc_link_gdf.drop(['county_last_id','A_point', 'B_point'], axis = 1).to_pickle(
    "../../data/interim/step7_centroid_connector/cc_link.pickle")
all_cc_shape_gdf.to_pickle("../../data/interim/step7_centroid_connector/cc_shape.pickle")
all_centroid_node_gdf.to_pickle("../../data/interim/step7_centroid_connector/centroid_node.pickle")

-------write out pickle---------


In [309]:
print(all_cc_link_gdf.shape)
print(all_cc_shape_gdf.shape)
print(all_centroid_node_gdf.shape)

(564494, 21)
(282247, 6)
(44482, 6)


In [310]:
maz_cc_link_df[maz_cc_link_df.A == 811960]

Unnamed: 0,A,B,drive_access,walk_access,bike_access,shstGeometryId,id,u,v,fromIntersectionId,toIntersectionId,roadway
522430,811960,5000992,1.0,0.0,0.0,maz_cc87612,maz_cc87612,,1331496807.0,,8e0433654142f54bc4f33dd4c4582aa4,maz
523076,811960,5002710,,1.0,1.0,maz_cc251561,maz_cc251561,,1818437417.0,,04b5de8ff00f922a8da9322520234ac5,maz
523558,811960,5003988,1.0,0.0,0.0,maz_cc87611,maz_cc87611,,110446729.0,,540c8a5c9a5a7ca5e9c9214a558224a0,maz
523992,811960,5005125,,1.0,1.0,maz_cc251562,maz_cc251562,,3400303578.0,,cb8023db19f6149505629e577fa2b833,maz
524319,811960,5005974,,1.0,1.0,maz_cc251564,maz_cc251564,,110367504.0,,c949eaecf7386c443aaa7012b7aaacbf,maz
524364,811960,5006150,,1.0,1.0,maz_cc251565,maz_cc251565,,110367528.0,,c67db9c9f0c1d1fabccb861ba3632cf0,maz
525600,811960,5009415,,1.0,1.0,maz_cc251563,maz_cc251563,,2837192655.0,,b204a4bc90efafeef1877e923bfda759,maz


In [311]:
taz_cc_link_df[taz_cc_link_df.A == 300963]

Unnamed: 0,A,B,drive_access,walk_access,bike_access,shstGeometryId,id,u,v,fromIntersectionId,toIntersectionId,roadway
26691,300963,2579949,1,0,0,taz_cc16369,taz_cc16369,,2286923039.0,,42fb9b206860ef90946ee5631f76a081,taz
27609,300963,2607569,1,0,0,taz_cc16370,taz_cc16370,,53017923.0,,a5c4242ffd0dee287f0ad0007bff5d9e,taz
