# 01_PL_09_Project_network_partitioning [Work in progress]

## Import libraries

In [1]:
first_folder_path = "../"

In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

# To create points (missing nodes) inside function network_partition()
from shapely.geometry import Point
# Time processes
import time

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(first_folder_path))
if module_path not in sys.path:
    sys.path.append(module_path)
    import src
else:
    import src

## Notebook config

In [22]:
# ----- ----- ----- City of analysis ----- ----- -----
city = 'guadalajara'

# ----- ----- ----- Input data directories ----- ----- ----- 
# Nodes and edges from notebook 01_PL_04_Combine_networks
nodes_dir = first_folder_path + f"data/output/shape/network_project/{city}/part02_step07_ntwsrebuild/{city}_nodes_proj_net_rebuilt.gpkg"
edges_dir = first_folder_path + f"data/output/shape/network_project/{city}/part02_step07_ntwsrebuild/{city}_edges_proj_net_rebuilt.gpkg"

# Polygons to create network partitions
polys_dir = first_folder_path + f"data/input/shape/Poligonos_Guadalajara/Poligonos.gpkg"

# ----- ----- ----- Projection to be used when needed ----- ----- ----- 
projected_crs = "EPSG:32613" #Guadalajara = 'EPSG:32613'

# ----- ----- ----- Output ----- ----- ----- 
output_dir = first_folder_path + f"data/output/shape/network_project_partitions/"
local_save = False

## Load data

In [4]:
# Load areas of interest
polys_gdf = gpd.read_file(polys_dir)
# Set crs
if polys_gdf.crs != projected_crs:
    polys_gdf = polys_gdf.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")

# Show
print(polys_gdf.crs)
print(polys_gdf.info())
polys_gdf

EPSG:32613
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   name      3 non-null      object  
 1   geometry  3 non-null      geometry
dtypes: geometry(1), object(1)
memory usage: 176.0+ bytes
None


Unnamed: 0,name,geometry
0,miramar,"POLYGON ((659404.786 2283009.827, 660250.98 22..."
1,providencia,"POLYGON ((668619.092 2288755.457, 668816.754 2..."
2,colinas,"POLYGON ((665060.447 2289080.643, 665057.51 22..."


In [6]:
# Load nodes
cons_nodes = gpd.read_file(nodes_dir)
# Set crs
if cons_nodes.crs != projected_crs:
    cons_nodes = cons_nodes.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")
# Filter for data of interest
cons_nodes = cons_nodes[['osmid','x','y','geometry']]

# Show
print(cons_nodes.crs)
print(cons_nodes.info())
cons_nodes.head(2)

EPSG:32613
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 124621 entries, 0 to 124620
Data columns (total 4 columns):
 #   Column    Non-Null Count   Dtype   
---  ------    --------------   -----   
 0   osmid     124621 non-null  int64   
 1   x         124621 non-null  float64 
 2   y         124621 non-null  float64 
 3   geometry  124621 non-null  geometry
dtypes: float64(2), geometry(1), int64(1)
memory usage: 3.8 MB
None


Unnamed: 0,osmid,x,y,geometry
0,67637870229114485,676378.709485,2291145.0,POINT (676378.709 2291144.858)
1,67640019229114922,676400.196372,2291149.0,POINT (676400.196 2291149.223)


In [8]:
# Before rebuilding the network, had this problem
#mask = cons_nodes['osmid'].astype(str).str.startswith('[') & cons_nodes['osmid'].astype(str).str.endswith(']')
#rows_with_lists_as_strings = cons_nodes[mask]
#rows_with_lists_as_strings

In [9]:
# Load edges
cons_edges = gpd.read_file(edges_dir)
# Set crs
if cons_edges.crs != projected_crs:
    cons_edges = cons_edges.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")
# Filter for data of interest
cons_edges = cons_edges[['u','v','key','geometry']]

# Show
print(cons_edges.crs)
print(cons_edges.info())
cons_edges.head(2)

EPSG:32613
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 197439 entries, 0 to 197438
Data columns (total 4 columns):
 #   Column    Non-Null Count   Dtype   
---  ------    --------------   -----   
 0   u         197439 non-null  int64   
 1   v         197439 non-null  int64   
 2   key       197439 non-null  int64   
 3   geometry  197439 non-null  geometry
dtypes: geometry(1), int64(3)
memory usage: 6.0 MB
None


Unnamed: 0,u,v,key,geometry
0,67637870229114485,67640019229114922,0,"MULTILINESTRING ((676378.709 2291144.858, 6763..."
1,67640019229114922,67640526229117504,0,"MULTILINESTRING ((676400.196 2291149.223, 6764..."


## Create partitions

In [20]:
def network_partition(aoi, nodes_gdf, edges_gdf, projected_crs):
    """
    Download OSMnx edges and nodes from DataBase according to GeoDataFrame boundary

    Arguments:
        aoi (geopandas.GeoDataFrame): GeoDataFrame polygon boundary.
        nodes_gdf (geopandas.GeoDataFrame): GeoDataFrame containing network nodes with 'osmid', 'x', 'y' and 'geometry' as columns.
        edges_gdf (geopandas.GeoDataFrame): GeoDataFrame containing network edges with 'u', 'v', 'key' and 'geometry' as columns.
        projected_crs (str, optional): string containing projected crs to be used depending on area of interest. Defaults to "EPSG:6372".

    Returns:
        G (networkx.MultiDiGraph): Graph with edges and nodes within boundaries.
		nodes (geopandas.GeoDataFrame): GeoDataFrame for nodes within boundaries.
		edges (geopandas.GeoDataFrame): GeoDataFrame for edges within boundaries.
    """

    # --- Prepare area of interest
    aoi = aoi.to_crs(projected_crs)
    aoi = aoi.buffer(1).reset_index().rename(columns={0: "geometry"})
    aoi = aoi.set_geometry("geometry")
    aoi = aoi[['geometry']]
    
    # --- Prepare nodes
    nodes_gdf = nodes_gdf[['osmid','x','y','geometry']].copy()
    nodes_gdf['osmid'] = nodes_gdf['osmid'].astype('str')
    nodes_gdf = nodes_gdf.to_crs(projected_crs)

    # --- Prepare edges
    edges_gdf = edges_gdf[['u','v','key','geometry']].copy()
    edges_gdf['u'] = edges_gdf['u'].astype('str')
    edges_gdf['v'] = edges_gdf['v'].astype('str')
    edges_gdf['key'] = edges_gdf['key'].astype('str')
    edges_gdf = edges_gdf.to_crs(projected_crs)

    # --- Identify edges within area of interest (Spatial join)
    edges_in_aoi = gpd.sjoin(edges_gdf, aoi)
    edges_in_aoi = edges_in_aoi.drop(columns=['index_right'])
    
    # --- Identify the nodes that are used by those edges (Using edge's 'u' and 'v' values)
    nodes_id = edges_in_aoi.v.unique().tolist()
    u = edges_in_aoi.u.unique().tolist()
    nodes_id.extend(u)
    myset = set(nodes_id)
    nodes_id = list(myset)
    nodes_from_edges = nodes_gdf.loc[nodes_gdf.osmid.isin(nodes_id)].copy()

    # --- OUTPUT: Final format for edges
    # Drop duplicates
    edges_in_aoi.drop_duplicates(inplace=True)
    # Solve 'key' values
    edges = src.resolve_duplicates_indexes(edges_in_aoi, projected_crs)
    # Set indexes and set crs
    edges = edges.set_index(["u", "v", "key"])

    # --- Create any missing node that's not available to avoid problems when creating G
    # Preformat nodes - Drop duplicates
    nodes_from_edges.drop_duplicates(inplace=True)
    # Preformat nodes - Reset index
    nodes_from_edges.reset_index(inplace=True,drop=True)
    # Preformat edges - Reset index to be able to access 'u' and 'v'
    edges_tmp = edges.reset_index().copy()
    # Turn nodes into a dictionary that will be turned back into a gdf
    nodes_dict = nodes_from_edges.to_dict()
    
    # Edges 'u' that are missing from nodes 'osmid'
    from_osmid = list(set(edges_tmp['u'].to_list()).difference(set(list(nodes_dict['osmid'].values()))))
    # For each missing 'u', append to nodes_dict the missing node
    for i in from_osmid:
        # Data
        row = edges_tmp.loc[(edges_tmp.u==i)].iloc[0]
        coords = [(coords) for coords in list(row['geometry'].coords)]
        first_coord, last_coord = [ coords[i] for i in (0, -1) ]
        # Register data
        nodes_dict['osmid'][len(nodes_dict['osmid'])] = i
        nodes_dict['x'][len(nodes_dict['x'])] = first_coord[0]
        nodes_dict['y'][len(nodes_dict['y'])] = first_coord[1]
        nodes_dict['geometry'][len(nodes_dict['geometry'])] = Point(first_coord)
            
    # Edges 'v' that are missing from nodes 'osmid'
    to_osmid = list(set(edges_tmp['v'].to_list()).difference(set(list(nodes_dict['osmid'].values()))))
    # For each missing 'v', append to nodes_dict the missing node
    for i in to_osmid:
        # Data
        row = edges_tmp.loc[(edges_tmp.v==i)].iloc[0]
        coords = [(coords) for coords in list(row['geometry'].coords)]
        first_coord, last_coord = [ coords[i] for i in (0, -1) ]
        # Register data
        nodes_dict['osmid'][len(nodes_dict['osmid'])] = i
        nodes_dict['x'][len(nodes_dict['x'])] = last_coord[0]
        nodes_dict['y'][len(nodes_dict['y'])] = last_coord[1]
        nodes_dict['geometry'][len(nodes_dict['geometry'])] = Point(last_coord)

    nodes_needing_creation = len(from_osmid) + len(to_osmid)
    print(f"Needed creation of {nodes_needing_creation} missing nodes.")
    if nodes_needing_creation>1:
        print("Missing nodes:")
        print(list(set(from_osmid+to_osmid)))
    
    # --- OUTPUT: Turn registered data back into gdf
    nodes = pd.DataFrame.from_dict(nodes_dict)
    nodes = gpd.GeoDataFrame(nodes, crs=projected_crs, geometry='geometry')
    nodes.set_index('osmid',inplace=True)

    # --- OUTPUT: Create G from final nodes and edges
    G = ox.graph_from_gdfs(nodes, edges)

    return G, nodes, edges

In [21]:
for area_of_interest in list(polys_gdf.name.unique()):
    print(f"Clipping network for {area_of_interest}.")
    # Select current area of interest
    aoi = polys_gdf.loc[polys_gdf.name==area_of_interest].copy()
    # Clip network to current area of interest
    G, nodes, edges = network_partition(aoi, cons_nodes, cons_edges, projected_crs)
    # Save result locally
    if local_save:
        nodes.to_file(output_dir + f"{area_of_interest}_nodes_proj_net_rebuilt.gpkg")
        #edges.to_file(output_dir + f"{area_of_interest}_edges_proj_net_rebuilt.gpkg")

Clipping network for miramar.
Needed creation of 0 missing nodes.
Clipping network for providencia.
Needed creation of 0 missing nodes.
Clipping network for colinas.
Needed creation of 0 missing nodes.
