# Notebook for the calculation of street network indices.

This notebook contains all the code that was used for calculating the street network indices of all 176 capital cities.

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

import ray
import modules.network_extractor as net_extractor
import geopandas as geopd
import pandas as pd
from shapely import ops
from osmnx import settings
import pyproj
from pathlib import Path

2025-08-02 22:46:53,177	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [None]:
# Add here the absolute path to the data folder
data_base_path = ""

# The extractor instance
extractor = net_extractor.NetworkExtractor(base_path=data_base_path)
 
# Custom OSMnx settings
settings.default_crs = "epsg:4326"

# Initialize Ray
# Ray is required as the indices of proximity to POIs and to public transport are parallelized. 
# First shut down Ray if it was already initialized.
ray.shutdown()
# This will initialize Ray with the default settings, which uses all available CPUs.
ray.init()

# Instead, use the following line for specific resource allocation
# ray.init(num_cpus=2)

In [None]:
# Extract information from the GHS dataset
urban_centers: geopd.GeoDataFrame

ghs_geom_path = f"{data_base_path}/GHS_URBAN_CENTERS/GHS_URBAN_SIMPLIFIED_fixed.gpkg"

ghs_dataset_path = f"{data_base_path}/GHS_URBAN_CENTERS/GHS_UCDB_GLOBE_R2024A.gpkg"

urban_centers_geoms = geopd.read_file(
    ghs_geom_path,
    layer='GHS_URBAN_SIMPLIFIED_fixed'    
)
urban_centers_geoms = urban_centers_geoms.rename(columns={
    "GC_UCN_MAI_2025": "name",
    "GC_CNT_GAD_2025": "country",
})
transform = pyproj.Transformer.from_crs("ESRI:54009", "EPSG:4326", always_xy=True).transform

urban_centers_info = geopd.read_file(
    ghs_dataset_path,
    layer='GHS_UCDB_THEME_GHSL_GLOBE_R2024A'    
)
urban_centers_info = urban_centers_info.rename(columns={
    "GC_POP_TOT_2025": "population",
    "GC_UCA_KM2_2025": "area",
    "GC_DEV_USR_2025": "continent",
    "GC_UCN_MAI_2025": "name",
    "GC_CNT_GAD_2025": "country",
    "GC_DEV_WIG_2025": "income_group",
    "GH_BUS_TOT_2025": "builtup_area"
})

urban_centers_climate = geopd.read_file(
    ghs_dataset_path,
    layer='GHS_UCDB_THEME_CLIMATE_GLOBE_R2024A'    
)
urban_centers_climate = urban_centers_climate.rename(columns={
    "GC_UCN_MAI_2025": "name",
    "GC_CNT_GAD_2025": "country",
    "CL_KOP_CUR_2025": "climate_class",
})

capital_cities = f"{data_base_path}/capital_cities.csv"
capital_df = pd.read_csv(capital_cities, delimiter=",", header=None)
countries = list(capital_df[0])
city_names = list(capital_df[1])

capitals = pd.DataFrame()
for i in range(len(countries)):
    capital_city = urban_centers_geoms.loc[
        (urban_centers_geoms["name"] == city_names[i]) &
        (urban_centers_geoms["country"] == countries[i])
    ]
    capitals = pd.concat([capitals, capital_city])

capitals = capitals.reset_index(drop=True)


capitals["display_name"] = pd.Series()
for cap in capitals.iterrows():
    # the city name in lowercase and slug_case for creating the folder to store the graphs and shapefiles
    city_name = cap[1][1].replace(" ", "_").lower()
    #city_name = "buenos_aires"

    # The name to search the city in the GHS dataset. Capital case. Also used for the DEM.
    search_name = cap[1][1]
    #search_name = "Buenos Aires"

    # The country in which the city is located for searching the GHS dataset. Capital case.
    country = cap[1][3]
    #country = "Argentina"

    o_b = "{"
    c_b = "}"
    backslash = f"\\"
    specials = "áéíóú'șăŏã"
    replaces = "aeiou_saoa"

    display_name = city_name
    modified = False

    if "[" in display_name or "]" in display_name:
        modified  = True

    for i in range(len(specials)):
        if specials[i] in display_name:
            modified  = True
        display_name = display_name.replace(specials[i], replaces[i])
        display_name = display_name.replace("[", "")
        display_name = display_name.replace("]", "")

    capitals.loc[cap[0], "display_name"] = display_name


In [None]:
continent_mapping = {
    "Sub-Saharan Africa": "SA",
    "Oceania": "OC",
    "Northern America": "AM",
    "Northern Africa and Western Asia": "NW",
    "Latin America and the Caribbean": "LA",
    "Europe": "EU",
    "Eastern and South-Eastern Asia": "EA",
    "Central and Southern Asia": "CA",
    "Australia and New Zealand": "OC" 
}

In [None]:
from modules.indices import graph_based, proximity
load_errors = []
errors = []
metrics_result_path = "/home/user/Desktop/JP/street-network-indices/notebooks"


def add_result(results_dict, column, value):
    print(f"adding {column}")
    try:
        results_dict[column].append(value)
    except:
        results_dict[column].append(None)


for cap in capitals.iterrows():
    display_name = cap[1]["display_name"]
    city_name = display_name
    # The name to search the city in the GHS dataset. Capital case. Also used for the DEM.
    search_name = cap[1][1]

    # The country in which the city is located for searching the GHS dataset. Capital case.
    country = cap[1][3]
    print(f"starting {city_name}")

    # extract info from GHS with search_name and country
    city_info = urban_centers_info.loc[
        (urban_centers_info["name"] == search_name) &
        (urban_centers_info["country"] == country)
    ]

    # extract climate info from GHS with search_name and country
    city_climate = urban_centers_climate.loc[
        (urban_centers_climate["name"] == search_name) &
        (urban_centers_climate["country"] == country)
    ]

    # extract geometry from GHS with search_name and country
    city_geom = urban_centers_geoms.loc[
        (urban_centers_geoms["name"] == search_name) &
        (urban_centers_geoms["country"] == country)
    ]

    results_dict = {
        # general
        "city": [],
        "area": [],
        "geoarea": [],
        "population": [],
        "climate_class": [], 
        "builtup_area": [],
        "income_group": [],

        "orientation_entropy": [],

        # walking
        "walk_prox_poi_mean": [],
        "walk_prox_poi_median": [],
        "walk_prox_poi_range": [],
        "walk_prox_poi_std": [],
        "walk_prox_poi_iqr": [],

        "walk_prox_pub_transport_mean": [],
        "walk_prox_pub_transport_median": [],
        "walk_prox_pub_transport_range": [],
        "walk_prox_pub_transport_std": [],
        "walk_prox_pub_transport_iqr": [],

        "walk_circuity": [],

        "walk_street_len_mean": [],
        "walk_street_len_median": [],
        "walk_street_len_range": [],
        "walk_street_len_std": [],
        "walk_street_len_iqr": [],
        "walk_street_density": [],
        "walk_street_len_total": [],

        "walk_link_node_ratio": [],

        "walk_intersection_density": [],
        "walk_intersection_count": [],

        # cycling
        "bike_prox_poi_mean": [],
        "bike_prox_poi_median": [],
        "bike_prox_poi_range": [],
        "bike_prox_poi_std": [],
        "bike_prox_poi_iqr": [],

        "bike_slope_mean": [],
        "bike_slope_median": [],
        "bike_slope_range": [],
        "bike_slope_std": [],
        "bike_slope_iqr": [],

        "bike_circuity": [],
        
        "bike_street_len_mean": [],
        "bike_street_len_median": [],
        "bike_street_len_range": [],
        "bike_street_len_std": [],
        "bike_street_len_iqr": [],
        "bike_street_density": [],
        "bike_street_len_total": [],

        "bike_link_node_ratio": [],

        "bike_intersection_density": [],
        "bike_intersection_count": [],

    }

    try:
        Path(f"{data_base_path}/{city_name}/indices").mkdir(parents=True, exist_ok=True) #

        full_name = capitals.loc[capitals["display_name"] == city_name]
        full_name = full_name["name"].values[0]

        area = city_info["area"]
        area = area.values[0]

        population = city_info["population"]
        population = population.values[0]

        builtup_area = city_info["builtup_area"] # originally in m2
        builtup_area = builtup_area.values[0] * (10 ** -6) # convert to km2

        geoarea = city_info["continent"]
        geoarea = continent_mapping[geoarea.values[0]]

        income_group = city_info["income_group"]
        income_group = income_group.values[0]

        climate_class = city_climate["climate_class"]
        climate_class = climate_class.values[0]

        geometry_urban_centers = city_geom["geometry"]
        geom = geometry_urban_centers.values[0]
        geom = ops.transform(transform, geom)

        add_result(results_dict, "city", city_name)
        add_result(results_dict, "area", area)
        add_result(results_dict, "geoarea", geoarea)
        add_result(results_dict, "population", population)
        add_result(results_dict, "builtup_area", builtup_area)
        add_result(results_dict, "income_group", income_group)
        add_result(results_dict, "climate_class", climate_class)

        # Indicators using pedestrian network
        try:
            g_walk = extractor.load_graph(f'{city_name}/graph/walk_{city_name}')
            g_public = extractor.load_graph(f'{city_name}/graph/public_{city_name}')
        except Exception as ex:
            load_errors.append(city_name)
            continue
        
        # walking proximity to pois
        walk_prox_poi_mean, walk_prox_poi_median, walk_prox_poi_range,  walk_prox_poi_std, walk_prox_poi_iqr = proximity.proximity_to_pois(g_walk, geom, walk_time=15, parallelize=True, max_partition_size=5000)
        add_result(results_dict, "walk_prox_poi_mean", walk_prox_poi_mean)
        add_result(results_dict, "walk_prox_poi_median", walk_prox_poi_median)
        add_result(results_dict, "walk_prox_poi_range", walk_prox_poi_range)
        add_result(results_dict, "walk_prox_poi_std", walk_prox_poi_std)
        add_result(results_dict, "walk_prox_poi_iqr", walk_prox_poi_iqr)

        # walking proximity to public transport
        walk_prox_pub_transport_mean, walk_prox_pub_transport_median, walk_prox_pub_transport_range, walk_prox_pub_transport_std, walk_prox_pub_transport_iqr = proximity.proximity_to_public_transport(g_walk, g_public, distance=400, parallelize=True, max_partition_size=5000)
        add_result(results_dict, "walk_prox_pub_transport_mean", walk_prox_pub_transport_mean)
        add_result(results_dict, "walk_prox_pub_transport_median", walk_prox_pub_transport_median)
        add_result(results_dict, "walk_prox_pub_transport_range", walk_prox_pub_transport_range)
        add_result(results_dict, "walk_prox_pub_transport_std", walk_prox_pub_transport_std)
        add_result(results_dict, "walk_prox_pub_transport_iqr", walk_prox_pub_transport_iqr)

        # Walking Circuity
        walk_circuity = graph_based.circuity(g_walk, add_property=True)
        add_result(results_dict, "walk_circuity", walk_circuity)

        # Walking street length
        walk_st_len_mean, walk_st_len_med, walk_st_len_range, walk_st_len_std, walk_st_len_iqr, walk_st_len_total = graph_based.street_length(g_walk)
        add_result(results_dict, "walk_street_len_mean", walk_st_len_mean)
        add_result(results_dict, "walk_street_len_median", walk_st_len_med)
        add_result(results_dict, "walk_street_len_range", walk_st_len_range)
        add_result(results_dict, "walk_street_len_std", walk_st_len_std)
        add_result(results_dict, "walk_street_len_iqr", walk_st_len_iqr)
        add_result(results_dict, "walk_street_density", walk_st_len_total / area)
        add_result(results_dict, "walk_street_len_total", walk_st_len_total)

        # Walking Link-Node ratio
        walk_link_node_ratio = graph_based.link_node_ratio(g_walk)
        add_result(results_dict, "walk_link_node_ratio", walk_link_node_ratio)

        # Walking intersection density
        walk_intersection_density, walk_intersection_count = graph_based.intersection_density(g_walk, area)
        add_result(results_dict, "walk_intersection_density", walk_intersection_density)
        add_result(results_dict, "walk_intersection_count", walk_intersection_count)

        # save the pedestrian graph with indicators attached
        extractor.save_as_shp(g_walk, f'{city_name}/indices/walk_indices_{city_name}')
        # free memory
        del g_walk
        del g_public

        # Indicators using the cycling network
        try:
            g_bike = extractor.load_graph(f'{city_name}/graph/bike_{city_name}')
        except Exception as ex:
            load_errors.append(city_name)
            continue

        # cycling proximity to POIs
        bike_prox_poi_mean, bike_prox_poi_median, bike_prox_poi_range, bike_prox_poi_std, bike_prox_poi_iqr = proximity.proximity_to_pois(g_bike, geom, walk_time=15, parallelize=True, max_partition_size=5000)
        add_result(results_dict, "bike_prox_poi_mean", bike_prox_poi_mean)
        add_result(results_dict, "bike_prox_poi_median", bike_prox_poi_median)
        add_result(results_dict, "bike_prox_poi_range", bike_prox_poi_range)
        add_result(results_dict, "bike_prox_poi_std", bike_prox_poi_std)
        add_result(results_dict, "bike_prox_poi_iqr", bike_prox_poi_iqr)

        # Cycling slope
        bike_slope_mean, bike_slope_median, bike_slope_range, bike_slope_std, bike_slope_iqr = graph_based.steepness(g_bike)
        add_result(results_dict, "bike_slope_mean", bike_slope_mean)
        add_result(results_dict, "bike_slope_median", bike_slope_median)
        add_result(results_dict, "bike_slope_range", bike_slope_range)
        add_result(results_dict, "bike_slope_std", bike_slope_std)
        add_result(results_dict, "bike_slope_iqr", bike_slope_iqr)

        # Cycling circuity
        bike_circuity = graph_based.circuity(g_bike, add_property=True)
        add_result(results_dict, "bike_circuity", bike_circuity)

        # Cycling street length
        bike_st_len_mean, bike_st_len_med, bike_st_len_range, bike_st_len_std, bike_st_len_iqr, bike_st_len_total = graph_based.street_length(g_bike)
        add_result(results_dict, "bike_street_len_mean", bike_st_len_mean)
        add_result(results_dict, "bike_street_len_median", bike_st_len_med)
        add_result(results_dict, "bike_street_len_range", bike_st_len_range)
        add_result(results_dict, "bike_street_len_std", bike_st_len_std)
        add_result(results_dict, "bike_street_len_iqr", bike_st_len_iqr)
        add_result(results_dict, "bike_street_density", bike_st_len_total / area)
        add_result(results_dict, "bike_street_len_total", bike_st_len_total)

        # Cycling Link-Node ratio
        bike_link_node_ratio = graph_based.link_node_ratio(g_bike)
        add_result(results_dict, "bike_link_node_ratio", bike_link_node_ratio)

        # Cycling intersection density
        bike_intersection_density, bike_intersection_count = graph_based.intersection_density(g_bike, area)
        add_result(results_dict, "bike_intersection_density", bike_intersection_density)
        add_result(results_dict, "bike_intersection_count", bike_intersection_count)

        # save the cycling graph with indicators attached
        extractor.save_as_shp(g_bike, f'{city_name}/indices/bike_indices_{city_name}')
        # free memory
        del g_bike

        # Indicators using the driving network
        try:
            g_drive = extractor.load_graph(f'{city_name}/graph/drive_{city_name}')
        except Exception as ex:
            load_errors.append(city_name)
            continue

        orientation_entropy = graph_based.orientation_entropy(g_drive)
        add_result(results_dict, "orientation_entropy", orientation_entropy)

        # free memory
        del g_drive

        results_df = pd.DataFrame(results_dict)

        try:
            results_df_old = pd.read_csv(f'{metrics_result_path}/results_temp.csv', index_col=0)
            results_df_new = pd.concat([results_df_old, results_df]).reset_index(drop=True)
            results_df_new.to_csv(f'{metrics_result_path}/results_temp.csv')
        except:
            print("First time")
            results_df.to_csv(f'{metrics_result_path}/results_temp.csv')

    except Exception as ex:
        print(str(ex))
        print(ex.with_traceback())
        print(city_name)
        errors.append(city_name)


In [None]:
# display the cities with errors, if any
errors