# 01_PL_04a_test_create_network()

__Notebook originaly in vref repository.__

The notebook was used to check whether there's no changes on the input network when using function network_entities() [From VREF repository] vs using create_network() [From accesibilidad_urbana()]

* __RESULT:__ No differences as long as "remove_redundant_nodes()" is not used in network_entities()
* __NOTE: Result no longer applies as remove_redundant_nodes() and network_entities() functions were updated__

## Import libraries

In [1]:
first_folder_path = "../"

In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np
import zipfile
import matplotlib.pyplot as plt
import seaborn as sns

# To create Point from coordinates
from shapely import Point
# For calculate distance between points
import math
# To know if it is a LineString or a MultiLineString, and create them
from shapely.geometry import LineString, MultiLineString
# To split a line using a point in that line
from shapely.ops import split
# To reverse a line's geometry (Check for duplicated edges)
import shapely
# Time processes
import time

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(first_folder_path))
if module_path not in sys.path:
    sys.path.append(module_path)
    import src
else:
    import src

## Notebook config [Will use Tessellations only for this test]

In [3]:
# ----- ----- ----- Input (Accepts .zip files, must unzip in following cell) ----- ----- -----
# Boeing input network - Specify nodes and edges files.
boeing_nodes_dir = first_folder_path + "data/output/shape/network_boeing/guadalajara/guadalajara_nodes.shp"
boeing_edges_dir = first_folder_path + "data/output/shape/network_boeing/guadalajara/guadalajara_edges.shp"
# Tessellations input network - Specify nodes and edges files.
tess_nodes_dir = first_folder_path+ "data/output/shape/network_tessellations/guadalajarapolys/nodes_guadalajarapolys_tessellation.gpkg"
tess_edges_dir = first_folder_path+ "data/output/shape/network_tessellations/guadalajarapolys/edges_guadalajarapolys_tessellation.gpkg"
#tess_nodes_dir = first_folder_path+ "data/output/shape/network_tessellations/guadalajara/guadalajara_tessellations_nodes.shp"
#tess_edges_dir = first_folder_path+ "data/output/shape/network_tessellations/guadalajara/guadalajara_tessellations_edges.zip"


# ----- ----- ----- Projection to be used when needed ----- ----- -----
projected_crs = "EPSG:32613" #[ "EPSG:32618" Medellín, "EPSG:32613" Guadalajara]

# ----- ----- ----- Output ----- ----- -----
# Output dir - Specify where outputs will be saved
output_dir = first_folder_path + "data/output/shape/network_project_tests/guadalajarapolys/network_entities()/"

# Outputs to be saved - Specify which steps will be saved
# PART 01 STEP 01 - Saves pre-formated Boeing nodes and edges [Base network], and tessellation nodes and edges [Complementary network]
localsave_01_01 = True

# Show dirs
print(boeing_nodes_dir)
print(boeing_edges_dir)
print(tess_nodes_dir)
print(tess_edges_dir)

../data/output/shape/network_boeing/guadalajara/guadalajara_nodes.shp
../data/output/shape/network_boeing/guadalajara/guadalajara_edges.shp
../data/output/shape/network_tessellations/guadalajarapolys/nodes_guadalajarapolys_tessellation.gpkg
../data/output/shape/network_tessellations/guadalajarapolys/edges_guadalajarapolys_tessellation.gpkg


#### __Unzip files if necessary.__ (Was necessary for Guadalajara's tessellation edges due to size.)

In [4]:
a="""
# Input dir to be extracted
input_dir = tess_edges_dir
# Output location of extracted data (unzipped files)
save_path = "../data/output/shape/network_tessellations/guadalajara/"
# Extract .zip
with zipfile.ZipFile(input_dir, "r") as zip_ref:
    zip_ref.extractall(save_path)
#"""

In [5]:
# Name of file to be looked for (input dir replacement)
#tess_edges_dir = "../data/output/shape/network_tessellations/guadalajara/guadalajara_tessellations_edges.shp"

## Functions

### __Functions -__ Common functions

In [6]:
def produce_osmid(nodes_network_1, nodes_network_2, previously_produced):
    
    stop = False
    produced_osmid = previously_produced
    
    while stop == False:

        # Evaluate if fabricated_osmid exists in any network
        if (produced_osmid in list(nodes_network_1.osmid.unique())) or (produced_osmid in list(nodes_network_2.osmid.unique())):
            # Try the next one
            produced_osmid +=1
            
        else:
            # Reached an unique fabricated_osmid
            stop = True
            return produced_osmid

In [7]:
# Distance between two points
def distance_between_points(point1, point2):
    return round(math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2), 2)

## __Part 01 - Step 00__ - Load and preprocess networks

This step __loads input networks__ (Boeing nodes and edges, Tessellations nodes and edges) __and transforms their ID data (nodes 'osmid', edges 'u' and 'v') into coordinates__ using function src.create_network(). Inside that function a modification was made so that a unique key is assigned to each edge, starting from 0 and increasing by one each time an edge with the same 'u' and 'v' is found.

The resulting networks id data (nodes 'osmid', edges 'u' and 'v') are then modified again to __ensure that there are no duplicates in those columns__, even after concatenating both nodes gdfs and both edges gdfs.

### __Input data__ - Boeing network

In [22]:
# Load nodes data
boeing_nodes = gpd.read_file(boeing_nodes_dir)

# Set CRS
if boeing_nodes.crs != projected_crs:
    try:
        boeing_nodes = boeing_nodes.set_crs(projected_crs)
    except:
        boeing_nodes = boeing_nodes.to_crs(projected_crs)

# Filter and rename data
print(list(boeing_nodes.columns))
boeing_nodes.reset_index(inplace=True)
boeing_nodes = boeing_nodes[['ID','geometry']]
boeing_nodes.rename(columns={'ID':'osmid'},inplace=True)

# Show
print(boeing_nodes.crs)
print(boeing_nodes.shape)
boeing_nodes.head(2)

['ID', 'coord_X', 'coord_Y', 'coord_Z', 'Type', 'geometry']
EPSG:32613
(82691, 2)


Unnamed: 0,osmid,geometry
0,28751344,POINT (676378.709 2291144.858)
1,28753224,POINT (675314.534 2289928.819)


In [23]:
# Load edges data
boeing_edges = gpd.read_file(boeing_edges_dir)

# Set CRS
if boeing_edges.crs != projected_crs:
    try:
        boeing_edges = boeing_edges.set_crs(projected_crs)
    except:
        boeing_edges = boeing_edges.to_crs(projected_crs)

# Filter and rename data
print(list(boeing_edges.columns))
boeing_edges.reset_index(inplace=True)
boeing_edges['key'] = 0
boeing_edges = boeing_edges[['from','to','key','geometry']]
boeing_edges.rename(columns={'from':'u','to':'v'},inplace=True)

# Show
print(boeing_edges.crs)
print(boeing_edges.shape)
boeing_edges.head(2)

['ID', 'from', 'to', 'length', 'grade_abs', 'speed', 'time', 'Type', 'geometry']
EPSG:32613
(122381, 4)


Unnamed: 0,u,v,key,geometry
0,28751344,1610958191,0,"LINESTRING (676378.709 2291144.858, 676386.243..."
1,28757777,28751344,0,"LINESTRING (676328.534 2290984.966, 676375.494..."


In [24]:
# Transform Boeing nodes and edges ID data to coordinates
boeing_coord_nodes, boeing_coord_edges = src.network_entities(boeing_nodes,boeing_edges,crs=projected_crs,expand_coords=(True,10))
#boeing_coord_nodes, boeing_coord_edges = src.create_network(boeing_nodes,boeing_edges,projected_crs,expand_coords=True)
boeing_coord_nodes.reset_index(inplace=True) #Added on 20250204 since function src.create_network was updated.
boeing_coord_edges.reset_index(inplace=True) #Added on 20250204 since function src.create_network was updated.
# Set projected crs
boeing_coord_nodes = boeing_coord_nodes.to_crs(projected_crs)
boeing_coord_edges = boeing_coord_edges.to_crs(projected_crs)

Creating unique ids for nodes based on coordinates...
Creating unique ids for edges based on coordinates...
Removing redundant nodes...
Resolving indexes u, v, key...


In [25]:
duplicated_nodes_ids = len(boeing_coord_nodes.loc[boeing_coord_nodes.duplicated(subset=['osmid'])])

if duplicated_nodes_ids>0:
    print("""Function src.create_network() created two nodes with the same osmid.
    If src.create_network's argument "expand_coords" is set to False, try setting to True.
    The problem is probably due to two nodes being one meter or less apart from each other.
    These nodes are duplicated, and the edges comming out of each node are now assigned to an osmid with two nodes.
    [expand_coords=True should solve the issue]
    """)
    intended_crash

# Show
print(boeing_coord_nodes.shape)
print(boeing_coord_nodes.crs)
print(boeing_coord_nodes.dtypes)
print(f"Duplicated osmids on nodes: {duplicated_nodes_ids}.")
boeing_coord_nodes.head(2)

(82230, 5)
EPSG:32613
osmid              object
geometry         geometry
streets_count     float64
x                 float64
y                 float64
dtype: object
Duplicated osmids on nodes: 0.


Unnamed: 0,osmid,geometry,streets_count,x,y
0,676378722911448,POINT (676378.709 2291144.858),3.0,676378.709485,2291145.0
1,675314522899288,POINT (675314.534 2289928.819),3.0,675314.533972,2289929.0


In [26]:
duplicated_edges_ids = len(boeing_coord_edges.loc[boeing_coord_edges.duplicated(subset=['u','v','key'],keep=False)])

# Show
print(boeing_coord_edges.shape)
print(boeing_coord_edges.crs)
print(boeing_coord_edges.dtypes)
print(f"Duplicated ids on edges: {duplicated_edges_ids}.")
boeing_coord_edges.head(2)

(121920, 5)
EPSG:32613
u             object
v             object
key            int64
geometry    geometry
length       float64
dtype: object
Duplicated ids on edges: 0.


Unnamed: 0,u,v,key,geometry,length
0,676378722911448,676400122911492,0,"LINESTRING (676378.709 2291144.858, 676386.243...",22.84475
1,670238022722194,670224522721779,0,"LINESTRING (670238.073 2272219.403, 670237.000...",43.566106


### __Input data__ - Tessellations network

In [8]:
# Load nodes data
tess_nodes = gpd.read_file(tess_nodes_dir)

# Set CRS
if tess_nodes.crs != projected_crs:
    try:
        tess_nodes = tess_nodes.set_crs(projected_crs)
    except:
        tess_nodes = tess_nodes.to_crs(projected_crs)

# Filter and rename data
print(list(tess_nodes.columns))
tess_nodes.reset_index(inplace=True)
tess_nodes = tess_nodes[['osmid','geometry']]

# Show
print(tess_nodes.crs)
print(tess_nodes.shape)
tess_nodes.head(2)

['osmid', 'streets_count', 'x', 'y', 'street_count', 'cluster', 'geometry']
EPSG:32613
(3436, 2)


Unnamed: 0,osmid,geometry
0,65818714228266312,POINT (658187.146 2282663.122)
1,65849674228275352,POINT (658496.741 2282753.520)


In [9]:
# Load edges data
tess_edges = gpd.read_file(tess_edges_dir)

# Set CRS
if tess_edges.crs != projected_crs:
    try:
        tess_edges = tess_edges.set_crs(projected_crs)
    except:
        tess_edges = tess_edges.to_crs(projected_crs)

# Filter and rename data
print(list(tess_edges.columns))
tess_edges.reset_index(inplace=True)
tess_edges = tess_edges[['u','v','key','geometry']]

# Show
print(tess_edges.crs)
print(tess_edges.shape)
tess_edges.head(2)

['u', 'v', 'key', 'length', 'geometry']
EPSG:32613
(5790, 4)


Unnamed: 0,u,v,key,geometry
0,65818714228266312,65849674228275352,0,"LINESTRING (658187.146 2282663.122, 658187.446..."
1,65818714228266312,65825936228287343,0,"LINESTRING (658187.146 2282663.122, 658158.022..."


In [10]:
# Transform Tessellations nodes and edges ID data to coordinates
tess_coord_nodes, tess_coord_edges = src.network_entities(tess_nodes,tess_edges,crs=projected_crs,expand_coords=(True,10))
#tess_coord_nodes, tess_coord_edges = src.create_network(tess_nodes,tess_edges,projected_crs,expand_coords=True)
tess_coord_nodes.reset_index(inplace=True) #Added on 20250204 since function src.create_network was updated.
tess_coord_edges.reset_index(inplace=True) #Added on 20250204 since function src.create_network was updated.
# Set projected crs
tess_coord_nodes = tess_coord_nodes.to_crs(projected_crs)
tess_coord_edges = tess_coord_edges.to_crs(projected_crs)

Creating unique ids for nodes based on coordinates...
Creating unique ids for edges based on coordinates...
Removing redundant nodes...
Resolving indexes u, v, key...


In [11]:
duplicated_nodes_ids = len(tess_coord_nodes.loc[tess_coord_nodes.duplicated(subset=['osmid'],keep=False)])

if duplicated_nodes_ids>0:
    print("""Function src.create_network() created two nodes with the same osmid.
    If src.create_network's argument "expand_coords" is set to False, try setting to True.
    The problem is probably due to two nodes being one meter or less apart from each other.
    These nodes are duplicated, and the edges comming out of each node are now assigned to an osmid with two nodes.
    [expand_coords=True should solve the issue]
    """)
    intended_crash

# Show
print(tess_coord_nodes.shape)
print(tess_coord_nodes.crs)
print(tess_coord_nodes.dtypes)
print(f"Duplicated osmids on nodes: {duplicated_nodes_ids}.")
tess_coord_nodes.head(2)

(3432, 5)
EPSG:32613
osmid              object
geometry         geometry
streets_count     float64
x                 float64
y                 float64
dtype: object
Duplicated osmids on nodes: 0.


Unnamed: 0,osmid,geometry,streets_count,x,y
0,658187122826631,POINT (658187.146 2282663.122),3.0,658187.14612,2282663.0
1,658496722827535,POINT (658496.741 2282753.520),3.0,658496.741134,2282754.0


In [12]:
duplicated_edges_ids = len(tess_coord_edges.loc[tess_coord_edges.duplicated(subset=['u','v','key'],keep=False)])

# Show
print(tess_coord_edges.shape)
print(tess_coord_edges.crs)
print(tess_coord_edges.dtypes)
print(f"Duplicated ids on edges: {duplicated_edges_ids}.")
tess_coord_edges.head(2)

(5786, 5)
EPSG:32613
u             object
v             object
key            int64
geometry    geometry
length       float64
dtype: object
Duplicated ids on edges: 0.


Unnamed: 0,u,v,key,geometry,length
0,658187122826631,658496722827535,0,"LINESTRING (658187.146 2282663.122, 658187.446...",337.494653
1,661818822868621,661818822868621,0,"LINESTRING (661818.822 2286862.149, 661825.049...",108.405303


### __Input data__ - Save

In [45]:
if localsave_01_01:
    boeing_coord_nodes.to_file(output_dir + "boeing_nodes_entities_2.gpkg")
    boeing_coord_edges.to_file(output_dir + "boeing_edges_entities_2.gpkg")
    #tess_coord_nodes.to_file(output_dir + "tess_nodes_entities_2.gpkg")
    #tess_coord_edges.to_file(output_dir + "tess_edges_entities_2.gpkg")

## Analysis

In [43]:
network_being_analysed = 'boeing'

### Analysis - Read data

In [46]:
output_dir = first_folder_path + "data/output/shape/network_project_tests/guadalajarapolys/old_create_network()/"
nodes_old = gpd.read_file(output_dir + f"{network_being_analysed}_nodes_old.gpkg")
edges_old = gpd.read_file(output_dir + f"{network_being_analysed}_edges_old.gpkg")

output_dir = first_folder_path + "data/output/shape/network_project_tests/guadalajarapolys/new_create_network()/"
nodes_new = gpd.read_file(output_dir + f"{network_being_analysed}_nodes_new.gpkg")
edges_new = gpd.read_file(output_dir + f"{network_being_analysed}_edges_new.gpkg")

output_dir = first_folder_path + "data/output/shape/network_project_tests/guadalajarapolys/network_entities()/"
nodes_entities = gpd.read_file(output_dir + f"{network_being_analysed}_nodes_entities_2.gpkg")
edges_entities = gpd.read_file(output_dir + f"{network_being_analysed}_edges_entities_2.gpkg")

In [47]:
nodes_entities['osmid'] = nodes_entities.osmid.astype(int)
edges_entities['u'] = edges_entities.u.astype(int)
edges_entities['v'] = edges_entities.v.astype(int)

### Analysis - Compare size and dtypes of data

In [48]:
# Show
print(nodes_old.shape)
print(nodes_old.crs)
print(nodes_old.dtypes)

print(nodes_new.shape)
print(nodes_new.crs)
print(nodes_new.dtypes)

print(nodes_entities.shape)
print(nodes_entities.crs)
print(nodes_entities.dtypes)

(82691, 2)
EPSG:32613
osmid          int64
geometry    geometry
dtype: object
(82691, 4)
EPSG:32613
osmid          int64
x            float64
y            float64
geometry    geometry
dtype: object
(82230, 5)
EPSG:32613
osmid               int64
streets_count     float64
x                 float64
y                 float64
geometry         geometry
dtype: object


In [49]:
# Show
print(edges_old.shape)
print(edges_old.crs)
print(edges_old.dtypes)

print(edges_new.shape)
print(edges_new.crs)
print(edges_new.dtypes)

print(edges_entities.shape)
print(edges_entities.crs)
print(edges_entities.dtypes)

(122381, 5)
EPSG:32613
u              int64
v              int64
key            int64
length       float64
geometry    geometry
dtype: object
(122380, 5)
EPSG:32613
u              int64
v              int64
key            int64
length       float64
geometry    geometry
dtype: object
(121920, 5)
EPSG:32613
u              int64
v              int64
key            int64
length       float64
geometry    geometry
dtype: object


### Analysis - Compare precense of unique IDs on data

#### Nodes

In [50]:
# Unique ID lists (osmid)
old_osmids = list(nodes_old.osmid.unique())
new_osmids = list(nodes_new.osmid.unique())
entities_osmids = list(nodes_entities.osmid.unique())

# Find if an osmid is present in one list but not in the other
missing_osmids_1 = set(old_osmids) ^ set(new_osmids)
missing_osmids_2 = set(old_osmids) ^ set(entities_osmids)
missing_osmids_3 = set(new_osmids) ^ set(entities_osmids)

# Show result
print(len(missing_osmids_1))
print(len(missing_osmids_2))
print(len(missing_osmids_3))

0
461
461


In [51]:
# Create unique edge_ids to compare
edges_old = src.create_unique_edge_id(edges_old)
edges_new = src.create_unique_edge_id(edges_new)
edges_entities = src.create_unique_edge_id(edges_entities)

# Unique ID lists
old_edgeids = list(edges_old.edge_id.unique())
new_edgeids = list(edges_new.edge_id.unique())
entities_edgeids = list(edges_entities.edge_id.unique())

# Find if an edge_id is present in one list but not in the other
missing_edge_ids_1 = set(old_edgeids) ^ set(new_edgeids)
missing_edge_ids_2 = set(old_edgeids) ^ set(entities_edgeids)
missing_edge_ids_3 = set(new_edgeids) ^ set(entities_edgeids)

print(len(missing_edge_ids_1))
print(len(missing_edge_ids_2))
print(len(missing_edge_ids_3))

1
1245
1246
