In [1]:
import geopandas as gpd
from shapely.geometry import Point, shape, LineString, Polygon
import pandas as pd
import numpy as np
import os

In [2]:
mile_to_meter = 1609.344

In [3]:
network_dir = r'L:\Application\Model_One\NextGenFwys\Scenarios\2035_TM152_NGF_NP02\OUTPUT\shapefile'

In [4]:
roadway_gdf = gpd.read_file(os.path.join(network_dir, 'network_links.shp'))
print('roadway has {:,} links'.format(roadway_gdf.shape[0]))
display(roadway_gdf.head())

roadway has 34,396 links


Unnamed: 0,A,B,DISTANCE,SPDCLASS,CAPCLASS,LANES,TSIN,GL,OT,CAP,...,VOL24HR_SMT,VOL24HR_HVT,VOL24HR_DAA,VOL24HR_S2A,VOL24HR_S3A,VOL24HR_TOT,DELAY24HR,VMT24HR,VHT24HR,geometry
0,1,7487,0.05201,6,6,7,1,1,0.17337,0,...,115.80737,0.0,304.58939,44.19926,21.98397,4847.74951,0.0,0.0,0.0,"LINESTRING (552969.250 4183031.292, 553038.812..."
1,1,7489,0.04758,6,6,7,1,1,0.1586,0,...,66.39857,0.0,433.10568,77.72451,50.3154,6065.3335,0.0,0.0,0.0,"LINESTRING (552969.250 4183031.292, 552960.375..."
2,2,7496,0.06356,6,6,7,1,1,0.21187,0,...,80.82608,0.0,233.9623,53.69812,31.22016,4540.75537,0.0,0.0,0.0,"LINESTRING (552652.000 4182953.000, 552749.062..."
3,2,7500,0.05204,6,6,7,1,1,0.17347,0,...,102.95451,0.0,811.05377,127.16299,73.54209,10484.73633,0.0,0.0,0.0,"LINESTRING (552652.000 4182953.000, 552581.062..."
4,3,7483,0.12645,6,6,7,1,1,0.4215,0,...,15.49508,0.0,247.40112,43.01975,25.89654,2410.89063,0.0,0.0,0.0,"LINESTRING (552346.750 4182827.020, 552176.875..."


In [5]:
major_art_gdf = roadway_gdf.loc[roadway_gdf.FT == 7]
print('{:,} arterial links'.format(major_art_gdf.shape[0]))

10,938 arterial links


In [6]:
freeway_gdf = roadway_gdf.loc[roadway_gdf.FT.isin([1, 2, 8])]
print('{:,} freeway links'.format(freeway_gdf.shape[0]))
print(freeway_gdf.crs)

3,575 freeway links
epsg:26910


In [7]:
# get A, B nodes of arterial links

# extract A, B points
major_art_gdf['A_point'] = major_art_gdf.apply(lambda x: Point(np.array(x.geometry.coords)[0]), axis=1)
major_art_gdf['B_point'] = major_art_gdf.apply(lambda x: Point(np.array(x.geometry.coords)[-1]), axis=1)

# get a node dataframe from A, B
major_art_node_df = pd.concat([
    major_art_gdf[['A', 'A_point']].rename(columns = {'A': 'node',
                                                      'A_point': 'geometry'}),
    major_art_gdf[['B', 'B_point']].rename(columns = {'B': 'node',
                                                      'B_point': 'geometry'})])
# print(major_art_node_df.shape[0])

major_art_node_df.drop_duplicates(inplace=True)
print('arterials have {:,} A B nodes'.format(major_art_node_df.shape[0]))

# convert to node geodataframe
# major_art_node_gdf = gpd.GeoDataFrame(major_art_node_df,
#                                       geometry=major_art_node_df['geometry'],
#                                       crs={'init': 'epsg:26910'})

major_art_node_gdf = gpd.GeoDataFrame(major_art_node_df,
                                      geometry=major_art_node_df['geometry'],
                                      crs=major_art_gdf.crs)
print(major_art_node_gdf.crs)

display(major_art_node_gdf.head())

  arr = construct_1d_object_array_from_listlike(values)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
  arr = construct_1d_object_array_from_listlike(values)


arterials have 4,797 A B nodes
epsg:26910


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Unnamed: 0,node,geometry
2702,1601,POINT (588038.000 4245402.000)
2710,1604,POINT (590059.000 4246594.000)
2714,1605,POINT (588430.000 4245875.000)
2719,1606,POINT (613001.000 4215923.000)
2721,1607,POINT (611067.000 4215458.000)


In [8]:
# get the link controids of arterial links

major_art_controid_gdf = major_art_gdf[['A', 'B', 'geometry']].copy()
major_art_controid_gdf['centroids'] = major_art_controid_gdf['geometry'].centroid
major_art_controid_gdf['node'] = major_art_controid_gdf['A'].astype(str) + '-' + major_art_controid_gdf['B'].astype(str)
display(major_art_controid_gdf.head(5))
major_art_controid_gdf.drop(columns=['geometry', 'A', 'B'], inplace=True)
major_art_controid_gdf.rename(columns={'centroids': 'geometry'}, inplace=True)
display(major_art_controid_gdf.head(5))
print(type(major_art_controid_gdf))
print('arterial centroids has {:,} rows'.format(major_art_controid_gdf.shape[0]))

Unnamed: 0,A,B,geometry,centroids,node
2702,1601,8838,"LINESTRING (588038.000 4245402.000, 588245.000...",POINT (588141.500 4245543.000),1601-8838
2703,1601,8844,"LINESTRING (588038.000 4245402.000, 587756.000...",POINT (587897.000 4245215.000),1601-8844
2710,1604,8849,"LINESTRING (590059.000 4246594.000, 590716.000...",POINT (590387.500 4247064.000),1604-8849
2713,1604,11966,"LINESTRING (590059.000 4246594.000, 589379.000...",POINT (589719.000 4246421.000),1604-11966
2714,1605,8830,"LINESTRING (588430.000 4245875.000, 587120.000...",POINT (587775.000 4245851.000),1605-8830


Unnamed: 0,geometry,node
2702,POINT (588141.500 4245543.000),1601-8838
2703,POINT (587897.000 4245215.000),1601-8844
2710,POINT (590387.500 4247064.000),1604-8849
2713,POINT (589719.000 4246421.000),1604-11966
2714,POINT (587775.000 4245851.000),1605-8830


<class 'geopandas.geodataframe.GeoDataFrame'>
arterial centroids has 10,938 rows


In [9]:
def get_nearest_link_by_perpendicular_distance(points_gdf, links_gdf, offset):

    """
    Given a set of points and a set of links, find the nearest link to each point based on perpendicular distance.
    - points_gdf, links_gdf should be crs:26915
    - offset should be set to capture at least one link for each point

    """

    node_match_gdf = gpd.GeoDataFrame()

    # create a bounding box around each station with the set offset;
    # bbox is a dataframe with columns 'minx', 'miny', 'maxx', 'maxy'
    bbox = points_gdf.bounds + [-offset, -offset, offset, offset]

    # use GeoPandas R-tree spatial indexing (sindex) to intersect each PEMS point's bbox with all candidate
    # links; the result "hit" has the same length as bbox, the values are the index of candidate link(s)
    # that are intersect with each bbox row (each PEMS point's bbox)

    hits = bbox.apply(lambda row: list(links_gdf.sindex.intersection(row)),
                                    axis=1)

    # convert hits into a dataframe with two columns: 'pt_idx' and 'link_i'
    tmp = pd.DataFrame({
        # index of points table 'pems_route_dir_ptype_gdf' (also bbox)
        "pt_idx": np.repeat(hits.index, hits.apply(len)),
        # ordinal position of link - access via iloc later
        "link_i": np.concatenate(hits.values)
    })

    # set pt_idx as index and join with pems_route_dir_ptype_gdf
    tmp.set_index(["pt_idx"], inplace=True)
    tmp = tmp.join(
        points_gdf.rename(
            columns={"geometry": "point"}),
        how='left')

    # rest link_condidates index and join tmp to it
    tmp.set_index(['link_i'], inplace=True)
    tmp = tmp.join(
        links_gdf[['A', 'B', 'geometry']].reset_index(drop=True),
        how='left')

    # find closest link to point
    # 1st, convert it to geodataframe with link's geometry as geometry
    tmp_gdf = gpd.GeoDataFrame(tmp, geometry=tmp['geometry'], crs=points_gdf.crs)
    # 2nd, calculate the snap distance between each point (PEMS station) and the hitted link
    tmp_gdf['snap_distance'] = tmp_gdf.geometry.distance(gpd.GeoSeries(tmp_gdf.point))
    # 3rd, sort by snap_distance
    tmp_gdf.sort_values(by=['snap_distance'], inplace=True)
    # 4th, for each station, keep the link with the shortest snap distance
    closest_gdf = tmp_gdf.groupby(['node']).first().reset_index()

    return closest_gdf


In [10]:
# get nearest freeway link for each arterial A, B nodes, as well as arterial link centroid

# convert to crs 26915
major_art_node_gdf = major_art_node_gdf.to_crs(26915)
major_art_controid_gdf = major_art_controid_gdf.to_crs(26915)
freeway_gdf = freeway_gdf.to_crs(26915)

# set offset
offset = mile_to_meter * 20

In [11]:
# nearest fwy link for A, B nodes
closest_fwy_gdf = get_nearest_link_by_perpendicular_distance(major_art_node_gdf, freeway_gdf, offset)
print('A B nodes closest_fwy_gdf has {:,} rows'.format(closest_fwy_gdf.shape[0]))
display(closest_fwy_gdf.head())

# join it back to arterial links
# add a field to represent freeway links with node pairs
closest_fwy_gdf['matchFwy'] = closest_fwy_gdf['A'].astype(str) + '-' + closest_fwy_gdf['B'].astype(str)

# matched freeway and the distance of node A
arterial_parallel = pd.merge(
    roadway_gdf.loc[roadway_gdf['FT'] == 7],
    closest_fwy_gdf[['node', 'matchFwy', 'snap_distance']].rename(
        columns={'node': 'A',
                 'matchFwy': 'matchFwy_A',
                 'snap_distance': 'distFwy_A'}),
    on='A',
    how='left')
# matched freeway and the distance of node B
arterial_parallel = pd.merge(
    arterial_parallel,
    closest_fwy_gdf[['node', 'matchFwy', 'snap_distance']].rename(
        columns={'node': 'B',
                 'matchFwy': 'matchFwy_B',
                 'snap_distance': 'distFwy_B'}),
    on='B',
    how='left')
# calculate angle: 
arterial_parallel.loc[arterial_parallel['FT'] == 7, 'dist_ratio'] = \
    abs(arterial_parallel['distFwy_A'] - arterial_parallel['distFwy_B']) / (arterial_parallel['DISTANCE'] * mile_to_meter)

display(arterial_parallel.head())
print(list(arterial_parallel))

print('median dist_ratio: {}'.format(arterial_parallel['dist_ratio'].median()))


A B nodes closest_fwy_gdf has 4,797 rows


Unnamed: 0,node,point,A,B,geometry,snap_distance
0,1601,POINT (-2054881.686 4666292.704),9313,10046,"LINESTRING (-2054320.903 4666007.667, -2054876...",348.257845
1,1604,POINT (-2052390.619 4666773.260),11943,1602,"LINESTRING (-2051607.225 4667219.453, -2052437...",42.32584
2,1605,POINT (-2054310.718 4666632.211),10048,9313,"LINESTRING (-2054012.623 4666057.004, -2054320...",615.086819
3,1606,POINT (-2040113.959 4627240.595),1622,1668,"LINESTRING (-2046184.115 4620862.849, -2046032...",8138.006203
4,1607,POINT (-2042251.400 4627463.591),1622,1668,"LINESTRING (-2046184.115 4620862.849, -2046032...",6930.727858


Unnamed: 0,A,B,DISTANCE,SPDCLASS,CAPCLASS,LANES,TSIN,GL,OT,CAP,...,VOL24HR_TOT,DELAY24HR,VMT24HR,VHT24HR,geometry,matchFwy_A,distFwy_A,matchFwy_B,distFwy_B,dist_ratio
0,1601,8838,0.29,47,47,2,0,6,0.0,1050,...,10075.41211,1.1484,2921.86938,98.54405,"LINESTRING (588038.000 4245402.000, 588245.000...",9313-10046,348.257845,9313-10046,522.542979,0.373434
1,1601,8844,0.29,47,47,2,0,6,0.0,1050,...,15.13191,0.0,4.38825,0.14628,"LINESTRING (588038.000 4245402.000, 587756.000...",9313-10046,348.257845,10046-9314,134.962443,0.457019
2,1604,8849,0.64,47,47,1,0,6,0.0,1050,...,3490.64478,0.92109,2234.0127,75.38818,"LINESTRING (590059.000 4246594.000, 590716.000...",11943-1602,42.32584,9317-10051,113.289886,0.068898
3,1604,11966,0.6,47,47,1,0,6,0.0,1050,...,1234.88428,0.10366,740.93054,24.80134,"LINESTRING (590059.000 4246594.000, 589379.000...",11943-1602,42.32584,10050-9312,209.945534,0.17359
4,1605,8830,0.76,47,47,2,0,6,0.0,1050,...,1437.70776,0.05878,1092.65784,36.48071,"LINESTRING (588430.000 4245875.000, 587120.000...",10048-9313,615.086819,9314-9308,1220.457247,0.494947


['A', 'B', 'DISTANCE', 'SPDCLASS', 'CAPCLASS', 'LANES', 'TSIN', 'GL', 'OT', 'CAP', 'AT', 'FT', 'SIGCOR', 'TOS', 'AUX', 'YEAR', 'FFS', 'FFT', 'FT2000', 'ROUTENUM', 'HOT', 'TOLLCLASS', 'STATE', 'CITYID', 'CITYNAME', 'REGFREIGHT', 'BRT', 'TOLLEA_DA', 'TOLLEA_S2', 'TOLLEA_S3', 'TOLLEA_VSM', 'TOLLEA_SML', 'TOLLEA_MED', 'TOLLEA_LRG', 'TOLLAM_DA', 'TOLLAM_S2', 'TOLLAM_S3', 'TOLLAM_VSM', 'TOLLAM_SML', 'TOLLAM_MED', 'TOLLAM_LRG', 'TOLLMD_DA', 'TOLLMD_S2', 'TOLLMD_S3', 'TOLLMD_VSM', 'TOLLMD_SML', 'TOLLMD_MED', 'TOLLMD_LRG', 'TOLLPM_DA', 'TOLLPM_S2', 'TOLLPM_S3', 'TOLLPM_VSM', 'TOLLPM_SML', 'TOLLPM_MED', 'TOLLPM_LRG', 'TOLLEV_DA', 'TOLLEV_S2', 'TOLLEV_S3', 'TOLLEV_VSM', 'TOLLEV_SML', 'TOLLEV_MED', 'TOLLEV_LRG', 'ROUTEDIR', 'PROJ', 'AUTOOPC', 'SMTROPC', 'LRTROPC', 'BUSOPC', 'VOT', 'TRUCKVOT', 'SR2COSTSHAR', 'SR3COSTSHAR', 'TRKPCE', 'FIRSTVALUE', 'OWNEDAV_ZPV', 'TNC_ZPV_FAC', 'MEANS_BASED', 'TOLL_FLAT', 'AV_PCE', 'HOVXPEN', '_SEGMENT', '_DIR', 'ACTION', 'PROJECT', '_PROJECT', 'PNROK', 'VOL_S2AV', '

In [12]:
# nearest fwy link for each arterial link centroid
# major_art_controid_gdf = major_art_controid_gdf.to_crs(26915)
centroid_closest_fwy_gdf = get_nearest_link_by_perpendicular_distance(major_art_controid_gdf, freeway_gdf, offset)
print('centroid_closest_fwy_gdf has {:,} rows'.format(centroid_closest_fwy_gdf.shape[0]))
display(centroid_closest_fwy_gdf.head())

# join it back to arterial links

# add a field to represent freeway links with node pairs
centroid_closest_fwy_gdf['matchFwy'] = centroid_closest_fwy_gdf['A'].astype(str) + '-' + centroid_closest_fwy_gdf['B'].astype(str)

# add a field to represent arterial links with node pairs - to be joined with 'node' in the centroid data
arterial_parallel['A-B'] = arterial_parallel['A'].astype(str) + '-' + arterial_parallel['B'].astype(str)

arterial_parallel = pd.merge(
    arterial_parallel,
    centroid_closest_fwy_gdf[['node', 'matchFwy', 'snap_distance']].rename(
        columns={'node': 'A-B',
                 'matchFwy': 'matchFwy_C',
                 'snap_distance': 'distFwy_C'}),
    on='A-B',
    how='left')

display(arterial_parallel.head())
print(list(arterial_parallel))

centroid_closest_fwy_gdf has 10,938 rows


Unnamed: 0,node,point,A,B,geometry,snap_distance
0,10053-10534,POINT (-2070113.368 4515576.872),4233,4229,"LINESTRING (-2069161.346 4516192.366, -2069166...",948.913204
1,10053-12661,POINT (-2070472.952 4516078.553),9576,9577,"LINESTRING (-2069449.482 4516655.899, -2069291...",1147.306874
2,10241-6951,POINT (-2113817.422 4612121.418),15104,15105,"LINESTRING (-2113802.525 4611917.288, -2113983...",48.510354
3,10241-6953,POINT (-2113541.994 4612057.896),15104,15105,"LINESTRING (-2113802.525 4611917.288, -2113983...",291.121478
4,10241-7068,POINT (-2113791.623 4612431.856),15104,15105,"LINESTRING (-2113802.525 4611917.288, -2113983...",168.39737


Unnamed: 0,A,B,DISTANCE,SPDCLASS,CAPCLASS,LANES,TSIN,GL,OT,CAP,...,VHT24HR,geometry,matchFwy_A,distFwy_A,matchFwy_B,distFwy_B,dist_ratio,A-B,matchFwy_C,distFwy_C
0,1601,8838,0.29,47,47,2,0,6,0.0,1050,...,98.54405,"LINESTRING (588038.000 4245402.000, 588245.000...",9313-10046,348.257845,9313-10046,522.542979,0.373434,1601-8838,9313-10046,435.401386
1,1601,8844,0.29,47,47,2,0,6,0.0,1050,...,0.14628,"LINESTRING (588038.000 4245402.000, 587756.000...",9313-10046,348.257845,10046-9314,134.962443,0.457019,1601-8844,10046-9314,240.889048
2,1604,8849,0.64,47,47,1,0,6,0.0,1050,...,75.38818,"LINESTRING (590059.000 4246594.000, 590716.000...",11943-1602,42.32584,9317-10051,113.289886,0.068898,1604-8849,10051-10050,45.290049
3,1604,11966,0.6,47,47,1,0,6,0.0,1050,...,24.80134,"LINESTRING (590059.000 4246594.000, 589379.000...",11943-1602,42.32584,10050-9312,209.945534,0.17359,1604-11966,10050-9312,63.319987
4,1605,8830,0.76,47,47,2,0,6,0.0,1050,...,36.48071,"LINESTRING (588430.000 4245875.000, 587120.000...",10048-9313,615.086819,9314-9308,1220.457247,0.494947,1605-8830,10046-9314,909.528153


['A', 'B', 'DISTANCE', 'SPDCLASS', 'CAPCLASS', 'LANES', 'TSIN', 'GL', 'OT', 'CAP', 'AT', 'FT', 'SIGCOR', 'TOS', 'AUX', 'YEAR', 'FFS', 'FFT', 'FT2000', 'ROUTENUM', 'HOT', 'TOLLCLASS', 'STATE', 'CITYID', 'CITYNAME', 'REGFREIGHT', 'BRT', 'TOLLEA_DA', 'TOLLEA_S2', 'TOLLEA_S3', 'TOLLEA_VSM', 'TOLLEA_SML', 'TOLLEA_MED', 'TOLLEA_LRG', 'TOLLAM_DA', 'TOLLAM_S2', 'TOLLAM_S3', 'TOLLAM_VSM', 'TOLLAM_SML', 'TOLLAM_MED', 'TOLLAM_LRG', 'TOLLMD_DA', 'TOLLMD_S2', 'TOLLMD_S3', 'TOLLMD_VSM', 'TOLLMD_SML', 'TOLLMD_MED', 'TOLLMD_LRG', 'TOLLPM_DA', 'TOLLPM_S2', 'TOLLPM_S3', 'TOLLPM_VSM', 'TOLLPM_SML', 'TOLLPM_MED', 'TOLLPM_LRG', 'TOLLEV_DA', 'TOLLEV_S2', 'TOLLEV_S3', 'TOLLEV_VSM', 'TOLLEV_SML', 'TOLLEV_MED', 'TOLLEV_LRG', 'ROUTEDIR', 'PROJ', 'AUTOOPC', 'SMTROPC', 'LRTROPC', 'BUSOPC', 'VOT', 'TRUCKVOT', 'SR2COSTSHAR', 'SR3COSTSHAR', 'TRKPCE', 'FIRSTVALUE', 'OWNEDAV_ZPV', 'TNC_ZPV_FAC', 'MEANS_BASED', 'TOLL_FLAT', 'AV_PCE', 'HOVXPEN', '_SEGMENT', '_DIR', 'ACTION', 'PROJECT', '_PROJECT', 'PNROK', 'VOL_S2AV', '

In [13]:
print(arterial_parallel.crs)

epsg:26910


In [14]:
# write out
arterial_parallel.to_file(r'L:\Application\Model_One\NextGenFwys\INPUT_DEVELOPMENT\pathway2\arterial_parallelness.shp')
print('wrote {:,} rows'.format(arterial_parallel.shape[0]))

  arterial_parallel.to_file(r'L:\Application\Model_One\NextGenFwys\INPUT_DEVELOPMENT\pathway2\arterial_parallelness.shp')


wrote 10,938 rows
