# 01_PL_06_Project_network_voronois

This notebook loads pop data (Looking for population and density) to nodes for the project.

__Inputs:__
* Area of interest (City, Guadalajara)
* Blocks with population data
* Nodes from the project network, which was generated using the networks join procedure (Notebook 01_PL_04_Combine_networks)
  
__Outputs:__
* Voronoi polygons for the project_network
* Nodes with pop data (Population and density)

## Import libraries

In [1]:
first_folder_path = "../"

In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

# Time processes
import time

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(first_folder_path))
if module_path not in sys.path:
    sys.path.append(module_path)
    import src
else:
    import src

In [3]:
print(ox.__version__)

2.0.1


## Notebook config

In [4]:
# ----- ----- ----- City of analysis ----- ----- -----
city = 'guadalajara'

# ----- ----- ----- Input data directories ----- ----- ----- 
# Nodes and edges from notebook 01_PL_04_Combine_networks
nodes_dir = first_folder_path + f"data/output/shape/network_project/{city}/part02_step05_ntwsclean/{city}_nodes_proj_net_final.gpkg"
edges_dir = first_folder_path + f"data/output/shape/network_project/{city}/part02_step05_ntwsclean/{city}_edges_proj_net_final.gpkg"

# Blocks with population data
blocks_dir = first_folder_path + f"data/input/shape/Manzanas_Pop_Guadalajara/pobcenso_inegi_20_mzaageb_mza_gdl.gpkg"
# Blocks unique ID column (Will become index)
blocks_unique_id = 'cvegeo_mza'
# Column where population data is located within each block
pop_col = 'pobtot'

# ----- ----- ----- Projection to be used when needed ----- ----- ----- 
projected_crs = "EPSG:32613" #Guadalajara = 'EPSG:32613'

# ----- ----- ----- Output ----- ----- ----- 
output_dir = first_folder_path + f"data/output/shape/network_project_voronoi/"
local_save = True

## Load data

### __Load data__ - Pop data in blocks

In [4]:
# ----- ----- ----- Time start
time_1 = time.time()
# ----- ----- ----- Process
# Load blocks
blocks_gdf = gpd.read_file(blocks_dir)
# Set crs
if blocks_gdf.crs != projected_crs:
    blocks_gdf = blocks_gdf.to_crs(projected_crs)
# Set index if necessary
if blocks_unique_id in blocks_gdf.columns:
    blocks_gdf.set_index(blocks_unique_id,inplace=True)
# Filter for data of interest
blocks_gdf = blocks_gdf[[pop_col,'geometry']]
# ----- ----- ----- Time end
time_2 = time.time()
print(f"TIME: {time_2-time_1} seconds.")


# Show
print(blocks_gdf.crs)
print(blocks_gdf.info())
blocks_gdf.head(2)

TIME: 535.2859220504761 seconds.
EPSG:32613
<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 54804 entries, 1412000620299011 to 1412000017065020
Columns: 237 entries, nom_ent to geometry
dtypes: float64(220), geometry(1), int64(2), object(14)
memory usage: 101.5+ MB
None


Unnamed: 0_level_0,nom_ent,nom_mun,nom_loc,pobtot,pobfem,pobmas,p_0a2,p_0a2_f,p_0a2_m,p_3ymas,...,cve_loc,cve_ageb,cve_mza,cvegeo_mun,cvegeo_loc,cvegeo_ageb,ambito,tipomza,city,geometry
cvegeo_mza,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1412000620299011,Jalisco,Zapopan,La Cuchilla (Extramuros de Tesistán),16,8.0,8.0,0.0,0.0,0.0,16.0,...,62,299,11,14120,141200062,1412000620299,Urbana,Típica,Guadalajara,"POLYGON ((659974.886 2297605.164, 659923.165 2..."
1412000620299032,Jalisco,Zapopan,La Cuchilla (Extramuros de Tesistán),18,8.0,10.0,0.0,0.0,0.0,8.0,...,62,299,32,14120,141200062,1412000620299,Urbana,Típica,Guadalajara,"POLYGON ((659879.976 2297823.117, 659923.355 2..."


### __Load data__ - Network

In [5]:
# ----- ----- ----- Time start
time_1 = time.time()
# ----- ----- ----- Process
# Load nodes
nodes_gdf = gpd.read_file(nodes_dir)
# Set crs
if nodes_gdf.crs != projected_crs:
    nodes_gdf = nodes_gdf.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")
# Set index if necessary
if 'osmid' in nodes_gdf.columns:
    original_len = len(nodes_gdf)
    nodes_gdf = nodes_gdf.drop_duplicates(subset=['osmid'])
    new_len = len(nodes_gdf)
    nodes_gdf.set_index('osmid',inplace=True)
    print(f"Dropped {new_len-original_len} nodes to set osmid as nodes index.")
# Filter for data of interest
nodes_gdf = nodes_gdf[['x','y','geometry']]
# ----- ----- ----- Time end
time_2 = time.time()
print(f"TIME: {time_2-time_1} seconds.")

# Show
print(nodes_gdf.crs)
print(nodes_gdf.info())
nodes_gdf.head(2)

Dropped 0 nodes to set osmid as nodes index.
TIME: 0.8252885341644287 seconds.
EPSG:32613
<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 138541 entries, 67637870229114485 to 67447978227424881
Data columns (total 3 columns):
 #   Column    Non-Null Count   Dtype   
---  ------    --------------   -----   
 0   x         138541 non-null  float64 
 1   y         138541 non-null  float64 
 2   geometry  138541 non-null  geometry
dtypes: float64(2), geometry(1)
memory usage: 4.2 MB
None


Unnamed: 0_level_0,x,y,geometry
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
67637870229114485,676378.709485,2291145.0,POINT (676378.709 2291144.858)
67531453228992881,675314.533972,2289929.0,POINT (675314.534 2289928.819)


In [6]:
# ----- ----- ----- Time start
time_1 = time.time()
# ----- ----- ----- Process
# Load edges
edges_gdf = gpd.read_file(edges_dir)
# Set crs
if edges_gdf.crs != projected_crs:
    edges_gdf = edges_gdf.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")
# Set index if necessary
if 'u' in edges_gdf.columns:
    original_len = len(edges_gdf)
    edges_gdf = edges_gdf.drop_duplicates(subset=['u','v','key'])
    new_len = len(edges_gdf)
    edges_gdf.set_index(['u','v','key'],inplace=True)
    print(f"Dropped {new_len-original_len} edges to set 'u','v' and 'key' as nodes index.")
# Filter for data of interest
edges_gdf = edges_gdf[['length','geometry']]
# ----- ----- ----- Time end
time_2 = time.time()
print(f"TIME: {time_2-time_1} seconds.")

# Show
print(edges_gdf.crs)
print(edges_gdf.info())
edges_gdf.head(2)

Dropped 0 edges to set 'u','v' and 'key' as nodes index.
TIME: 4.342734336853027 seconds.
EPSG:32613
<class 'geopandas.geodataframe.GeoDataFrame'>
MultiIndex: 211959 entries, (67637870229114485, 67640019229114922, 0) to (71055139227969248, 71063149227964435, 1)
Data columns (total 2 columns):
 #   Column    Non-Null Count   Dtype   
---  ------    --------------   -----   
 0   length    211959 non-null  float64 
 1   geometry  211959 non-null  geometry
dtypes: float64(1), geometry(1)
memory usage: 15.1 MB
None


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,length,geometry
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1
67637870229114485,67640019229114922,0,22.84475,"MULTILINESTRING ((676378.709 2291144.858, 6763..."
67632853229098496,67637870229114485,0,167.579445,"MULTILINESTRING ((676328.534 2290984.966, 6763..."


## Consolidate network to reduce the number of intersections/nodes from where voronoi polygons will be created

In [7]:
# Revisión de indexes
print(nodes_gdf.index.names)
# Revisión de duplicados en index
print(nodes_gdf.index.duplicated().sum())
# Revisión de geometrías (Deberían ser puntos)
print(nodes_gdf.geom_type.value_counts())

nodes_gdf.head(2)

['osmid']
0
Point    138541
Name: count, dtype: int64


Unnamed: 0_level_0,x,y,geometry
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
67637870229114485,676378.709485,2291145.0,POINT (676378.709 2291144.858)
67531453228992881,675314.533972,2289929.0,POINT (675314.534 2289928.819)


In [15]:
# Revisión de indexes
print(edges_gdf.index.names)
# Revisión de duplicados en index
print(edges_gdf.index.duplicated().sum())
# Revisión de geometrías (Deberían ser lineas)
print(edges_gdf.geom_type.value_counts())

# Show
edges_gdf.head(2)

['u', 'v', 'key']
0
MultiLineString    211959
Name: count, dtype: int64


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,length,geometry
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1
67637870229114485,67640019229114922,0,22.84475,"MULTILINESTRING ((676378.709 2291144.858, 6763..."
67632853229098496,67637870229114485,0,167.579445,"MULTILINESTRING ((676328.534 2290984.966, 6763..."


In [20]:
print('Creating graph...')
G = ox.graph_from_gdfs(nodes_gdf, edges_gdf)
print(f'Consolidating graph using tolerance of 5 meters...')
# consolidate graph
G2 = ox.consolidate_intersections(G, rebuild_graph=True, tolerance=5, dead_ends=True)
del G #Save space
# Extract nodes and edges from consolidated graph
cons_nodes, cons_edges = ox.graph_to_gdfs(G2)
del G2 #Save space
# Format nodes
print('Formating nodes...')
cons_nodes = cons_nodes.reset_index()
cons_nodes = cons_nodes.drop(columns=['osmid'])
cons_nodes = cons_nodes.rename(columns={'osmid_original':'osmid'})
cons_nodes = cons_nodes.set_index('osmid')
# Format edges
print('Formating edges...')
cons_edges = cons_edges.reset_index()
cons_edges = cons_edges.drop(columns=['u','v','index'])
cons_edges = cons_edges.rename(columns={'u_original':'u',
'v_original':'v'})
cons_edges = cons_edges.set_index(['u','v','key'])
# Drop column 'index' if present
if 'index' in cons_nodes.columns:
    cons_nodes = cons_nodes.drop(columns=['index'])
if 'index' in cons_edges.columns:
    cons_edges = cons_edges.drop(columns=['index'])

# Show
print(cons_nodes.crs)
print(cons_nodes.info())
cons_nodes.head(2)

Creating graph...


ValueError: `gdf_edges` must be multi-indexed by `(u, v, key)`.

In [12]:
print(edges_gdf.geom_type.value_counts())

MultiLineString    211959
Name: count, dtype: int64


## Create voronoi polygons using nodes

In [None]:
print(f"--- Creating voronois with nodes osmid data.")

#Time start
time_1 = time.time()
#Process
voronois_gdf = src.voronoi_polygons(cons_nodes)
#Time end
time_2 = time.time()
print(f"TIME: {time_2-time_1} seconds.")

# Show
print(voronois_gdf.crs)
print(voronois_gdf.info())
voronois_gdf.head(2)

## Assign blocks population data to voronois

In [None]:
print(f"--- Assigning pop col {pop_col} from blocks to voronois .")

#Time start
time_1 = time.time()
#Process
voronois_pop_gdf = assing_blocks_attribute_to_voronoi(blocks_gdf, voronois_gdf, attribute_column=pop_col)
#Time end
time_2 = time.time()
print(f"TIME: {time_2-time_1} seconds.")

# Show
print(voronois_pop_gdf.shape)
voronois_pop_gdf.head(2)

## Calculate pop density in nodes (Using it's voronoi polygon's area)

In [None]:
print("--- Adding density using each voronoi polygon's area.")
# Set crs
if voronois_pop_gdf.crs != projected_crs:
    voronois_pop_gdf = voronois_pop_gdf.to_crs(projected_crs)
# Calculate whole voronoi's area
voronois_pop_gdf['area_has'] = voronois_pop_gdf.area/10000
# Calculate density
voronois_pop_gdf['dens_pob_ha'] = voronois_pop_gdf[pop_col] / voronois_pop_gdf['area_has']

# Show
print(voronois_pop_gdf.shape)
voronois_pop_gdf.head(2)

## Save voronoi polygons with population and density data

In [None]:
if local_save:
    print("--- Saving result locally.")
    voronois_pop_gdf.to_file(output_dir+f"{city}_voronois_pop_gdf.gpkg")
    print("--- Result saved.")