In [1]:
import ray
import modules.network_extractor as net_extractor
import geopandas as geopd
import pandas as pd
from shapely import ops
from osmnx import settings
import pyproj
from pathlib import Path
import networkx as nx
import osmnx as ox

In [2]:
# Change for your own data base path
# Add here your path 
data_base_path = "/home/user/Desktop/JP/street-network-indices/data"

# The extractor instance
extractor = net_extractor.NetworkExtractor()
#extractor = NetworkExtractor()
extractor.DATA_BASE_PATH = data_base_path
 
# Custom OSMnx settings
settings.default_crs = "epsg:4326"

ray.shutdown()
ray.init()

2025-05-09 16:14:01,289	INFO worker.py:1832 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.9.12
Ray version:,2.42.1
Dashboard:,http://127.0.0.1:8265


In [3]:
# Extract information from the GHS dataset
urban_centers: geopd.GeoDataFrame

ghs_dataset_path = f"{data_base_path}/GHS_URBAN_CENTERS/GHS_URBAN_SIMPLIFIED_fixed.gpkg"
urban_centers = geopd.read_file(
    ghs_dataset_path,
    layer='GHS_URBAN_SIMPLIFIED_fixed'    
)
urban_centers = urban_centers.rename(columns={
    "GC_POP_TOT_2025": "population",
    "GC_UCA_KM2_2025": "area",
    "GC_DEV_USR_2025": "continent",
    "GC_UCN_MAI_2025": "name",
    "GC_CNT_GAD_2025": "country"
})
transform = pyproj.Transformer.from_crs("ESRI:54009", "EPSG:4326", always_xy=True).transform

In [4]:
capital_cities = f"{data_base_path}/capital_cities.csv"
capital_df = pd.read_csv(capital_cities, delimiter=",", header=None)
countries = list(capital_df[0])
city_names = list(capital_df[1])


In [5]:
# format the name strings
capitals = pd.DataFrame()
for i in range(len(countries)):
    capital_city = urban_centers.loc[
        (urban_centers["name"] == city_names[i]) &
        (urban_centers["country"] == countries[i])
    ]
    capitals = pd.concat([capitals, capital_city])

capitals = capitals.reset_index(drop=True)

In [6]:
# to get the bounding boxes for GEE
for cap in capitals.iterrows():
    # the city name in lowercase and slug_case for creating the folder to store the graphs and shapefiles
    city_name = cap[1][1].replace(" ", "_").lower()
    #city_name = "buenos_aires"

    # The name to search the city in the GHS dataset. Capital case. Also used for the DEM.
    search_name = cap[1][1]
    #search_name = "Buenos Aires"

    # The country in which the city is located for searching the GHS dataset. Capital case.
    country = cap[1][3]
    #country = "Argentina"

    # extract info from GHS with search_name and country
    city_info = urban_centers.loc[
        (urban_centers["name"] == search_name) &
        (urban_centers["country"] == country)
    ]

    if len(city_info) == 0:
        print(f"City {search_name}, {country} not found")

    else:
        geom = city_info["geometry"].values[0]
        geom = ops.transform(transform, geom)

    o_b = "{"
    c_b = "}"
    backslash = f"\\"
    specials = "áéíóú'șăŏã"
    replaces = "aeiou_saoa"

    display_name = city_name
    modified = False

    if "[" in display_name or "]" in display_name:
        modified  = True

    for i in range(len(specials)):
        if specials[i] in display_name:
            modified  = True
        display_name = display_name.replace(specials[i], replaces[i])
        display_name = display_name.replace("[", "")
        display_name = display_name.replace("]", "")

    ee_str = f"{o_b}'geometry': ee.Geometry.BBox{geom.bounds}, 'name': '{display_name}' {c_b},"
    

  city_name = cap[1][1].replace(" ", "_").lower()
  search_name = cap[1][1]
  country = cap[1][3]


In [7]:
# Select a single capital
capitals = capitals.loc[capitals["name"] == "Kampala"]
capitals

Unnamed: 0,ID_UC_G0,name,GC_UCN_LIS_2025,country,GC_CNT_UNN_2025,area,population,GC_DEV_WIG_2025,continent,GC_PLS_SCR_2025,GC_UCB_YOB_2025,GC_UCB_YOD_2025,GC_UCM_CAP,geometry
165,1711,Kampala,Kampala; Kira; Nansana; Entebbe; Wakiso,Uganda,Uganda,810,5177877.659,Low income,Sub-Saharan Africa,High,1975,2030,1,"MULTIPOLYGON (((3259001 59001, 3260000 59001, ..."


In [8]:
errors = []
for cap in capitals.iterrows():
    # the city name in lowercase and slug_case for creating the folder to store the graphs and shapefiles
    city_name = cap[1][1].replace(" ", "_").lower()
    #city_name = "buenos_aires"

    # The name to search the city in the GHS dataset. Capital case. Also used for the DEM.
    search_name = cap[1][1]
    #search_name = "Buenos Aires"

    # The country in which the city is located for searching the GHS dataset. Capital case.
    country = cap[1][3]
    #country = "Argentina"

    o_b = "{"
    c_b = "}"
    backslash = f"\\"
    specials = "áéíóú'șăŏã"
    replaces = "aeiou_saoa"

    display_name = city_name
    modified = False

    if "[" in display_name or "]" in display_name:
        modified  = True

    for i in range(len(specials)):
        if specials[i] in display_name:
            modified  = True
        display_name = display_name.replace(specials[i], replaces[i])
        display_name = display_name.replace("[", "")
        display_name = display_name.replace("]", "")

    city_name = display_name

    try:
        # extract info from GHS with search_name and country
        city_info = urban_centers.loc[
            (urban_centers["name"] == search_name) &
            (urban_centers["country"] == country)
        ]

        if len(city_info) == 0:
            print(f"City {search_name}, {country} not found")

        else:
            geom = city_info["geometry"].values[0]
            geom = ops.transform(transform, geom)

        print(geom.bounds)

        # the geometry from which to extract the network. Is given by the GHS dataset.
        geometry = geom

        # Variables for the elimination of duplicate pedestrian/driving streets
        dist_threshold = 20
        slope_threshold = 15

        # Assessment = False, so duplicate pedestrian/driving streets will be eliminated.
        assess = True

        # Create graph and shapefile folders if they do not exist
        Path(f"{data_base_path}/{city_name}/assessment").mkdir(parents=True, exist_ok=True) # graphml folder

        # Process the 4 networks in parallel using the paralellized download_network function.
        g_promises = []

        # the pedestrian network
        g_walk = extractor.assess_network(
            "walk", 
            geometry, 
            options = {

            }
        )

        # the cycling network
        # g_bike = extractor.assess_network(
        #     "bike", 
        #     geometry, 
        #     options = {

        #     }
        # )

        # Save graphs
        # extractor.save_as_graph(g_walk, f'{city_name}/assessment/walk_{city_name}')
        # extractor.save_as_graph(g_bike, f'{city_name}/assessment/bike_{city_name}')

        # Save shapefiles
        extractor.save_as_shp(g_walk, f'{city_name}/assessment/walk_{city_name}')
        # extractor.save_as_shp(g_bike, f'{city_name}/assessment/bike_{city_name}')
    except Exception as ex:
        errors.append(city_name)
        print(ex.with_traceback())

  city_name = cap[1][1].replace(" ", "_").lower()
  search_name = cap[1][1]
  country = cap[1][3]


(32.42794743175364, 0.032342629272880216, 32.777481610553444, 0.48527105501818074)
finish download
Raw number of edges: 861865
Raw number of nodes: 421700
start rebuilding graph
finish rebuilding graph
finish graph simplify
edges: 103197
Number of partitions: 4
starting with 30000 edges
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess street
assess stree

  nodes.to_file(f"{self.DATA_BASE_PATH}/{path}_nodes.shp", encoding='utf-8')
  ogr_write(


In [9]:
errors

[]