In [1]:
import json
import math
import copy
import os
import time
import numpy as np
from pathlib import Path
import networkx as nx
import osmnx as ox
import pandas as pd
import geopandas as geopd
import matplotlib.pyplot as plt
import geopandas as geopd
import shapely
from shapely import ops
import pyproj
import requests
from itertools import chain
from modules import utils
from networkx import bfs_edges

import modules.network_extractor as net_ex
import modules.networker as netw

In [2]:
import modules.network_extractor as net_ex
from modules.networker import Networker
extractor = net_ex.NetworkExtractor()
extractor.DATA_BASE_PATH = "/home/user/Desktop/JP/street-network-indices/data"
networker = Networker()

folder = "bogota"

In [3]:
import warnings
warnings.filterwarnings("ignore", category=np.RankWarning) 

extractor = net_ex.NetworkExtractor()
extractor.DATA_BASE_PATH = "/home/user/Desktop/JP/street-network-indices/data"
networker = netw.Networker()

In [4]:
urban_centers: geopd.GeoDataFrame

urban_centers = geopd.read_file(
    "/home/user/Desktop/JP/street-network-indices/data/GHS_URBAN_CENTERS/GHS_URBAN_SIMPLIFIED_fixed.gpkg",
    layer='GHS_URBAN_SIMPLIFIED_fixed'    
)
urban_centers = urban_centers.rename(columns={
    "GC_POP_TOT_2025": "population",
    "GC_UCA_KM2_2025": "area",
    "GC_DEV_USR_2025": "continent",
    "GC_UCN_MAI_2025": "name",
    "GC_CNT_GAD_2025": "country"
})
transform = pyproj.Transformer.from_crs("ESRI:54009", "EPSG:4326", always_xy=True).transform

In [5]:
cities = {
    "buenos_aires": {"search": "Buenos Aires", "country": "Argentina"},
    "bogota": {"search": "Bogota", "country": "Colombia"},
    "lima": {"search": "Lima", "country": "Peru"},
    "chicago": {"search": "Chicago", "country": "United States"},
    "ottawa": {"search": "Ottawa", "country": "Canada"},
    "mexico_city": {"search": "Mexico City", "country": "México"},
    "panama_city": {"search": "Panama City", "country": "Panama"},
    "san_salvador": {"search": "San Salvador", "country": "El Salvador"},
    "havana": {"search": "Havana", "country": "Cuba"},
    "milan": {"search": "Milan", "country": "Italy"},
    "madrid": {"search": "Madrid", "country": "Spain"},
    "athens": {"search": "Athens", "country": "Greece"},
    "osaka": {"search": "Osaka", "country": "Japan"},
    "shanghai": {"search": "Shanghai", "country": "China"},
    "hanoi": {"search": "Hanoi", "country": "Vietnam"},
    "dubai": {"search": "Dubai", "country": "United Arab Emirates"},
    "riyadh": {"search": "Riyadh", "country": "Saudi Arabia"},
    "doha": {"search": "Doha", "country": "Qatar"},
    "wellington": {"search": "Wellington", "country": "New Zealand"},
    "sydney": {"search": "Sydney", "country": "Australia"},
    "port_moresby": {"search": "Port Moresby", "country": "Papua New Guinea"},
    "lagos": {"search": "Lagos", "country": "Nigeria"},
    "alexandria": {"search": "Alexandria", "country": "Egypt"},
    "pretoria": {"search": "Pretoria", "country": "South Africa"}
}

In [6]:
stats_dict = {
    "city": [],
    "area": [],
    "population": [],
    "drive_nodes": [],
    "drive_edges": [],
    "bike_nodes": [],
    "bike_edges": [],
    "walk_nodes": [],
    "walk_edges": [],
    "total_time": []
}

def add_stat(stats, stat, value):
    stats[stat].append(value)

# Extract the networks for the cities and save statistics about each one
for folder, city in cities.items():
    try:
        city_info = urban_centers.loc[
            (urban_centers["name"] == city["search"]) &
            (urban_centers["country"] == city["country"])
        ]

        geometry = city_info["geometry"].values[0]
        geom_reprojected = ops.transform(transform, geometry)

        add_stat(stats_dict, "area", city_info["area"].values[0])
        add_stat(stats_dict, "population", int(city_info["population"].values[0]))
        print(f"Total Area {city_info['area'].values[0]}")

        # start timer to check time to download graphs for the city.
        start = time.time()
        city_name = city["search"]
        add_stat(stats_dict, "city", city_name)
        print(f"extracting {city_name}")
        place = city_name
        stat_row = {}

        # Extract driving graph and save statistics
        city_drive = extractor.extract_network("drive", place, geometry=geom_reprojected)
        networker.add_elevation(city_drive, city_name=city_name, cpus=4)
        #add_edge_slope(city_drive)
        ox.add_edge_grades(city_drive)
        ox.add_edge_bearings(city_drive)
        add_stat(stats_dict, "drive_nodes", city_drive.number_of_nodes())
        add_stat(stats_dict, "drive_edges", city_drive.number_of_edges())
        
        # Extract bike graph and save statistics
        city_bike = extractor.extract_network("bike", place, geometry=geom_reprojected)
        networker.add_elevation(city_bike, city_name=city_name, cpus=4)
        #add_edge_slope(city_bike)
        ox.add_edge_grades(city_bike)
        ox.add_edge_bearings(city_bike)
        add_stat(stats_dict, "bike_nodes", city_bike.number_of_nodes())
        add_stat(stats_dict, "bike_edges", city_bike.number_of_edges())

        # Extract pedestrian graph and save statistics
        city_walk = extractor.extract_network("walk", place, geometry=geom_reprojected)
        networker.add_elevation(city_walk, city_name=city_name, cpus=4)
        #add_edge_slope(city_walk)
        ox.add_edge_grades(city_walk)
        ox.add_edge_bearings(city_walk)
        add_stat(stats_dict, "walk_nodes", city_walk.number_of_nodes())
        add_stat(stats_dict, "walk_edges", city_walk.number_of_edges())

        print("Saving the city graphs")
        # Saving the city graphs
        base_folder = f"{extractor.DATA_BASE_PATH}/{folder}/graph"
        Path(base_folder).mkdir(parents=True, exist_ok=True)
        extractor.save_as_graph(city_drive, f"{folder}/graph/{folder}_drive")
        extractor.save_as_graph(city_bike, f"{folder}/graph/{folder}_bike")
        extractor.save_as_graph(city_walk, f"{folder}/graph/{folder}_walk")

        end = time.time()
        total_time = end-start
        stat_row["total_time"] = total_time

        add_stat(stats_dict, "total_time", total_time)
        print(f"{city_name} Finished!")
        print("------------------------------------------------------------")

    except Exception as ex:
        print(ex)
        print(f"error in city {city}")

stats = pd.DataFrame(stats_dict)
stats.to_csv(f"{extractor.DATA_BASE_PATH}/stats.csv")

Total Area 114
extracting Bologna
/home/user/Desktop/JP/street-network-indices/data/DEM/Bologna_DEM.tif: No such file or directory
error in city {'search': 'Bologna', 'country': 'Italy'}


ValueError: All arrays must be of the same length

In [8]:
stats_dict

{'city': ['Bologna'],
 'area': [114],
 'population': [456780],
 'drive_nodes': [],
 'drive_edges': [],
 'bike_nodes': [],
 'bike_edges': [],
 'walk_nodes': [],
 'walk_edges': [],
 'total_time': []}

In [8]:
# Example of loading a graph after downloading
folder = "athens"
g_walk = extractor.load_graph(f"{folder}/graph/{folder}_walk")

In [10]:
g_walk.number_of_edges()

176348