# Accessibility to schools in rural areas

In [1]:
# If using colab
# Takes around 2-3 min
# !pip install "UrbanAccessAnalyzer[osm,plot,h3] @ git+https://github.com/CityScope/UrbanAccessAnalyzer.git"
# !pip install matplotlib mapclassify folium
# !apt-get install -y osmium-tool


# Restart notebook after installing this if needed

In [2]:
import os
from datetime import datetime, date, timedelta, time
import pandas as pd
import geopandas as gpd
from shapely import wkt
import os

import osmnx as ox

import matplotlib.pyplot as plt
import folium

import UrbanAccessAnalyzer.isochrones as isochrones
import UrbanAccessAnalyzer.graph_processing as graph_processing
import UrbanAccessAnalyzer.osm as osm
import UrbanAccessAnalyzer.utils as utils
import UrbanAccessAnalyzer.h3_utils as h3_utils
import UrbanAccessAnalyzer.population as population
import UrbanAccessAnalyzer.poi_utils as poi_utils
import UrbanAccessAnalyzer.plot_helpers as plot_helpers

import zipfile
import numpy as np

## 1 Inputs

In [None]:
city_name = "Arevalo, España"

In [3]:
download_buffer = 5000 # Download area should be larger than the aoi by 'download_buffer' meters
# It should be max(distance_steps) but there is the risk of downloading an area that is too large

distance_steps = [1000,3000,20000] # Distance steps for the isochrones (points in the street network reachable in x distance from any point of interest)
accessibility_values = ['walk','bike','bus'] # Value to assign to every distance step

min_edge_length = 30 # Simplify street graph to avoid edges of less than 'min_edge_length'

h3_resolution = 10 # If you want results in h3 this is the output h3 resolution

kids_only = True # If True the population data is only people under 18 years age

### Results folder

Where do you want to save the results?

In [None]:
results_path = os.path.normpath("output")

In [5]:
city_filename = utils.sanitize_filename(city_name)
city_results_path = os.path.join(results_path,city_filename)
os.makedirs(results_path,exist_ok=True)
os.makedirs(city_results_path,exist_ok=True)

In [6]:
poi_path = os.path.normpath(city_results_path+f"/schools.gpkg")
osm_xml_file = os.path.normpath(city_results_path+f"/streets.osm")
streets_graph_path = os.path.normpath(city_results_path+f"/streets.graphml")
streets_path = os.path.normpath(city_results_path+f"/streets.gpkg")
level_of_service_streets_path = os.path.normpath(city_results_path+f"/level_of_service_streets.gpkg")
population_results_path = os.path.normpath(city_results_path+f"/population.gpkg")

### Area of interest
**Area of interest (aoi)**: Polygon. Geographic area where you want to run your analysis.

**Option 1:** From the internet with the city name

In [7]:
aoi = utils.get_city_geometry(city_name)
geo_suggestions = utils.get_geographic_suggestions_from_string(city_name,user_agent="app")
geo_suggestions

{'country_codes': ['ES'],
 'subdivision_names': ['Castile and León', 'Segovia', 'Soria', 'Ávila'],
 'municipalities': ['Arévalo',
  'Arévalo de la Sierra',
  'Montejo de Arévalo',
  'Nava de Arévalo',
  'San Vicente de Arévalo']}

**Option 2:** Load your own file

In [8]:
# Geographic file (.gpkg, .geojson or .shp)

# aoi = gpd.read_file("")

In [9]:
# csv file with lat/lon columns in geographic coordinates


# df = pd.read_csv("")


# # Create geometry from lon/lat columns
# geometry = gpd.points_from_xy(df["lon"], df["lat"]) # Change column names if needed
# # Convert to GeoDataFrame
# aoi = gpd.GeoDataFrame(
#     df,
#     geometry=geometry,
#     crs="EPSG:4326"  # geographic crs Change if needed
# )

# # OR Parse WKT geometry column
# df["geometry"] = df["geometry"].apply(wkt.loads) # change to match your geometry column name
# # Convert to GeoDataFrame
# aoi = gpd.GeoDataFrame(
#     df,
#     geometry="geometry",
#     crs="EPSG:4326"  # set to whatever CRS the WKT represents
# )


Use UTM coords and create aoi_download with a buffer of X meters. To avoid boundary effects streets and pois should be downloaded for a larger area.

In [10]:
aoi = gpd.GeoDataFrame(geometry=[aoi.union_all()],crs=aoi.crs) # Ensure there is only one polygon
aoi = aoi.to_crs(aoi.estimate_utm_crs()) # Convert to utm

aoi_download = aoi.buffer(download_buffer) # Area to do streets and poi requests 

### Points of interest

**Point of interest (poi):** Point that people want to reach and that serve as the origin of isochrones.

**Isochrone:** All points in the street network that are reachable within a given distance from any point of interest.


**Option 1:** Openstreetmap data with an overpass api query

In [11]:
query = """
[out:xml] [timeout:25];
(
    node["amenity"="school"]( {{bbox}});
    way["amenity"="school"]( {{bbox}});
    relation["amenity"="school"]( {{bbox}});
);
(._;>;);
out body;
"""

In [12]:
# If it fails execute again

poi = osm.overpass_api_query(query,aoi_download)
poi.geometry = poi.geometry.centroid
poi = poi.to_crs(aoi.crs)
poi.to_file(poi_path)

**Option 2:** Load your own file

In [None]:
# Geographic file (.gpkg, .geojson or .shp)

# poi = gpd.read_file("")

In [14]:
# csv file with lat/lon columns in geographic coordinates


# city_name = "your city name"
# df = pd.read_csv("")


# # Create geometry from lon/lat columns
# geometry = gpd.points_from_xy(df["lon"], df["lat"]) # Change column names if needed
# # Convert to GeoDataFrame
# poi = gpd.GeoDataFrame(
#     df,
#     geometry=geometry,
#     crs="EPSG:4326"  # geographic crs Change if needed
# )

# # OR Parse WKT geometry column
# df["geometry"] = df["geometry"].apply(wkt.loads) # change to match your geometry column name
# # Convert to GeoDataFrame
# poi = gpd.GeoDataFrame(
#     df,
#     geometry="geometry",
#     crs="EPSG:4326"  # set to whatever CRS the WKT represents
# )


In [15]:
poi = poi.to_crs(aoi.crs)
poi = poi[poi.geometry.intersects(aoi_download.union_all())]
poi

Unnamed: 0,geometry,type,id,nodes,addr:housenumber,addr:street,amenity,education,name,barrier,...,addr:city,addr:postcode,ref_catastral,email,operator,operator:type,operator:wikidata,phone,wheelchair,building
1,POINT (355948.864 4546495.255),way,306603798,"[3115734989, 3115734992, 3115734988, 311573498...",,,school,school,C.P. La Moraña,,...,,,,,,,,,,
2,POINT (356036.134 4546384.393),way,306603800,"[3115734978, 3115734968, 3115734972, 311573498...",,,school,school,Colegio Amor de Dios,,...,,,,,,,,,,
3,POINT (355669.753 4546339.229),way,306603802,"[3115734976, 5913245095, 3115734967, 311573496...",,,school,school,Colegio Público Los Arévacos,fence,...,,,,,,,,,,
5,POINT (356149.601 4545990.981),way,527116592,"[5125102390, 5125102389, 5125102388, 512510238...",,,school,school,Instituto de Educación Secundaria Adaja,fence,...,,,,,,,,,,
6,POINT (355530.67 4546285.074),way,576397301,"[5528446970, 5528446969, 5528446968, 552844696...",1.0,Paseo San Juan Bosco,school,school,Colegio Los Salesianos,fence,...,Arévalo,5200.0,5665801UL5456N,,,,,,,
7,POINT (355871.736 4546359.625),way,809902849,"[7572073980, 8920136429, 8920100950, 757207398...",22.0,Avenida de Emilio Romero,school,school,Instituto de Educación Secundaria Eulogio Flor...,fence,...,Arévalo,5200.0,,05000427@educa.jcyl.es,Junta de Castilla y León,government,Q3314606,+34 920300221,yes,
8,POINT (355931.348 4546282.894),way,827559197,"[7725989554, 7725989555, 7725989556, 772598955...",,,school,school,Centro de Educación de Personas Adultas de Aré...,,...,,,,,,,,,,yes


Map of your aoi, the download area (aoi_buffer) and the pois

In [16]:
m = aoi_download.explore(
    color="red",
    fill=False,
    style_kwds={"weight": 4, "dashArray": "5,5", "opacity": 1.0},
)

m = plot_helpers.general_map(
    m=m,
    aoi=aoi,
    pois=poi,
)
m

## 2 Street graph

### 2.1 Regionwise file and cropping

- Download best regionwise pbf file. (Covers a large area)

- Crop it to cover our area of interest and save it in .osm format

#### OSMIUM

To download the street network needed for the study online, the **osmium** tool is used.  
It is only available for **Linux** and **Mac** (it works in Google Colab too).  

To install, you can either:  

- Visit [osmium-tool website](https://osmcode.org/osmium-tool/)  
- Or run the command:  
```bash
  sudo apt-get install -y osmium-tool
````

Make sure it is added to your `PATH`.

On **Windows**, you can use **conda-forge** to install it.

---

To avoid using **osmium**, you can manually download the data:

1. Go to [OpenStreetMap Export](https://www.openstreetmap.org/export#map=14/40.23633/-3.76084)
2. Select the bounding box containing your area of interest.
3. Click **Export**.
4. Copy the `.osm` file that is downloaded to your project folder.
5. Set the variable `osm_xml_file` to the path where the `.osm` file is located.

In [None]:
# WARNING: Execute only if osmium is installed
# Select what type of street network you want to load
network_filter = osm.osmium_network_filter("walk+bike+primary")
# Download the region pbf file crop it by aoi and convert to osm format
osm.geofabrik_to_osm(
    osm_xml_file,
    input_file=results_path,
    aoi=aoi_download,
    osmium_filter_args=network_filter,
    overwrite=False
)

File 'output/arevalo__espana/streets.osm' already exists. Skipping conversion.


'output/arevalo__espana/streets.osm'

In [None]:
# Only if you downloaded the street network externally
# osm_xml_file = 'path/to/file.osm'

### 2.2 Load to osmnx

This way the street network is a networkx graph

In [18]:
# Load
G = ox.graph_from_xml(osm_xml_file)
# Project geometry coordinates to UTM system to allow euclidean meassurements in meters (sorry americans)
G = ox.project_graph(G,to_crs=aoi.estimate_utm_crs())
# Save the graph in graphml format to avoid the slow loading process
ox.save_graphml(G,streets_graph_path)

### 2.3 Simplify graph

Edges with length smaler than X meters are deleted and its nodes merged

In [19]:
G = graph_processing.simplify_graph(G,min_edge_length=min_edge_length,min_edge_separation=min_edge_length*2,undirected=True)
# Save the result in graphml format
ox.save_graphml(G,streets_graph_path)

street_edges = ox.graph_to_gdfs(G,nodes=False)
street_edges = street_edges.to_crs(aoi.crs)
street_edges.to_file(streets_path)

# 3 Points of interest

### 3.1 Add Points of interest to graph

In [20]:
G, osmids = graph_processing.add_points_to_graph(
    poi,
    G,
    max_dist=100+min_edge_length, # Maximum distance from point to graph edge to project the point
    min_edge_length=min_edge_length # Minimum edge length after adding the new nodes
)
poi['osmid'] = osmids # Add the ids of the nodes in the graph to points

## 4 Compute isochrones

### 4.1 Isochrones

In [21]:
accessiblity_graph = isochrones.graph(
    G,
    poi,
    distance_steps, # If service_quality_col is None it could be a list of distances
    service_quality_col = None, # If all points have the same quality this could be None
    accessibility_values = accessibility_values, # could be None and it will set to the sorted unique values of the matrix
    min_edge_length = min_edge_length # Do not add new nodes if there will be an edge with less than this length
)
# Save edges as gpkg
accessibility_nodes, accessibility_edges = ox.graph_to_gdfs(accessiblity_graph)
accessibility_edges.to_file(level_of_service_streets_path)

100%|██████████| 3/3 [00:00<00:00,  8.12it/s]


### 4.2 Convert to H3

In [22]:
access_h3_df = h3_utils.from_gdf(
    accessibility_edges,
    resolution=h3_resolution,
    columns=['accessibility'],
    value_order=accessibility_values,
    contain="overlap",
    method="min",
    buffer=10
)

access_h3_df

Unnamed: 0_level_0,accessibility
h3_cell,Unnamed: 1_level_1
8a392a02c30ffff,bus
8a392a02c31ffff,bus
8a392a02c367fff,bus
8a392a02c377fff,bus
8a392a02c387fff,bus
...,...
8a392abaac1ffff,bus
8a392abaac8ffff,bus
8a392abaacaffff,bus
8a392abaacd7fff,bus


See everything on a map

In [23]:
m = plot_helpers.general_map(
    aoi=aoi,
    pois=poi,
    gdfs=[access_h3_df,accessibility_edges],
    cmap="managua",
    column="accessibility"
)
m.save(city_results_path+"/access_map.html")
m

## 5 Population

### 5.1 Download Worldpop tif file

- One file for every country
- 100m pixel size
- tif format
- available from 2000 to 2030
- gender and age

In [24]:
if kids_only:
    population_file = population.download_worldpop_population(
        aoi_download,
        2025,
        folder=results_path,
        resolution="100m",
        dataset="age_structures",
        subset="U18"
    )
else:
    population_file = population.download_worldpop_population(
        aoi_download,
        2025,
        folder=results_path,
        resolution="100m",
    )

Raster population path output/esp_under_age_18_2025_CN_100m_R2025A_v1.zip exists. Skipping download...


In [25]:
# If downloaded the U18 file unzip it
if ".zip" in population_file:
    zip_path = population_file

    # Extract to the same directory as the zip file
    extract_dir = os.path.splitext(zip_path)[0]
    os.makedirs(extract_dir, exist_ok=True)

    # Decompress the zip file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)

    # Find the .tif file that contains '_T_'
    for file_name in os.listdir(extract_dir):
        if file_name.lower().endswith('.tif') and '_T_' in file_name:
            population_file = os.path.join(extract_dir, file_name)
            break
    else:
        raise FileNotFoundError("No .tif file containing '_T_' found in the zip archive.")

In [26]:
pop_h3_df = h3_utils.from_raster(population_file,aoi=aoi_download,resolution=h3_resolution)
pop_h3_df = pop_h3_df.rename(columns={'value':'population'})

### 5.2 Assign level of service to each population cell

In [27]:
results_h3_df = access_h3_df.merge(pop_h3_df,left_index=True,right_index=True,how='outer')
results_h3_df = h3_utils.to_gdf(results_h3_df).to_crs(aoi.crs)
results_h3_df = results_h3_df[results_h3_df.intersects(aoi.union_all())]
results_h3_df.to_file(population_results_path)
results_h3_df

Unnamed: 0_level_0,accessibility,population,geometry
h3_cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
8a392a060027fff,bus,,"POLYGON ((355900.497 4538013.444, 355829.871 4..."
8a392a06002ffff,bus,,"POLYGON ((356030.986 4537990.925, 355960.36 45..."
8a392a060067fff,bus,2.333333e-06,"POLYGON ((356242.86 4538074.374, 356172.236 45..."
8a392a06006ffff,,3.333333e-07,"POLYGON ((356373.344 4538051.854, 356302.721 4..."
8a392a06010ffff,bus,,"POLYGON ((355851.397 4538141.933, 355780.77 45..."
...,...,...,...
8a392aab6d77fff,bus,2.199667e-03,"POLYGON ((354205.276 4549467.026, 354134.645 4..."
8a392aab6d87fff,,1.532633e-02,"POLYGON ((353879.673 4549043.408, 353809.039 4..."
8a392aab6d8ffff,bus,2.094570e-01,"POLYGON ((354010.17 4549020.816, 353939.537 45..."
8a392aab6d9ffff,,1.678529e-01,"POLYGON ((353928.768 4548914.909, 353858.135 4..."


In [32]:
pop_gdf_points = results_h3_df.copy()
pop_gdf_points.geometry = pop_gdf_points.geometry.centroid
pop_gdf_points = pop_gdf_points.dropna(subset=['population'])
pop_gdf_points = pop_gdf_points[pop_gdf_points['population'] > 1]
m = plot_helpers.general_map(
    aoi=aoi,
    pois=poi,
    gdfs=[pop_gdf_points],
    cmap="managua",
    column="accessibility",
    size_column="population"
)
m.save(city_results_path+"/population_map.html")
m

## Statistics

In [None]:
stats_df = results_h3_df.groupby('accessibility', as_index=False)['population'].sum()
total_population = stats_df['population'].sum()
stats_df = pd.concat([stats_df, pd.DataFrame([{'accessibility': 'total population', 'population': total_population}])], ignore_index=True)
stats_df['population %'] = (stats_df['population'] * 100 / total_population).round(2)
stats_df['population'] = stats_df['population'].round(0).astype(int)
stats_df.to_csv(city_results_path + "/stats.csv")
stats_df

Unnamed: 0,accessibility,population,population %
0,bike,367.16012,32.666142
1,bus,38.790884,3.451215
2,walk,718.02661,63.882643
3,total population,1123.977614,100.0


In [None]:
# !zip -r /content/output.zip "{results_path}" # For colab. Export the output folder as zip.

Important files:

- streets.gpkg Has the street geometry as lines (all streets)
- accessibility.gpkg Has the street geometry as lines with the accessibility score (only streets with score > 0)
- population.gpkg Is a grid with population and level of service
- stats.csv Population statistics