# GTFS Handler tutorial

In [None]:
# If using colab
# Takes around 2-3 min
# !pip install git+https://github.com/GeomaticsCaminosUPM/pyGTFSHandler.git
# !pip install -v git+https://github.com/GeomaticsCaminosUPM/UrbanAccessAnalyzer.git
# !pip install matplotlib mapclassify folium
# !apt-get install -y osmium-tool

In [None]:
import sys
import os

from pyGTFSHandler.feed import Feed
from pyGTFSHandler.downloaders.spain.NAP import APIClient
import pyGTFSHandler.plot_helper as plot_helper
import pyGTFSHandler.processing_helper as processing_helper
from datetime import datetime, date, timedelta, time
import pandas as pd
import geopandas as gpd

import osmnx as ox

import matplotlib.pyplot as plt

import UrbanAccessAnalyzer.isochrones as isochrones
import UrbanAccessAnalyzer.graph_processing as graph_processing
import UrbanAccessAnalyzer.osm as osm
import UrbanAccessAnalyzer.utils as utils

### 1 Paths and data
#### 1.1 Select your AOI and the region or city name

Exmaple for spain. Adapt to other countries data.

In [None]:
!curl -L -o recintos_municipales_inspire_peninbal_etrs89.gpkg "https://drive.upm.es/s/opBWkNpHXzJgGXM/download"

In [None]:
borders_path = "recintos_municipales_inspire_peninbal_etrs89.gpkg"
city_name = "palma"
municipalities_gdf = gpd.read_file(borders_path).to_crs(4326)
aoi = utils.gdf_fuzzy_match(municipalities_gdf,city_name,column="NAMEUNIT")
city_name = utils.sanitize_filename(aoi['NAMEUNIT'].iloc[0])
print(city_name)
aoi

#### 1.2 File paths

Paths where you want to save the downloaded info

In [None]:
base_path = f"{city_name}" # Base path for all files related with your city or region
gtfs_path = base_path + "/gtfs_files" # GTFS public transport files
results_path = base_path + "/level_of_service" # Accessibility and quality results

In [None]:
os.makedirs(base_path,exist_ok=True)
os.makedirs(gtfs_path,exist_ok=True)
os.makedirs(results_path,exist_ok=True)

### 2 Download GTFS feeds

This example is for spains API

In [None]:
api_key = 'b753bf33-9300-4bee-8c3d-8e35009de69c' # You should request your own key here: https://nap.transportes.gob.es/
api = APIClient(api_key)

#### 2.1 Find Feeds on the API

In [None]:
files = api.find_files(
    region=city_name, # Name of the region you want to explore
    region_type=["municipality"], # Type of region 'municipality', 'urbanarea', 'province' or 'state'
    transport_type=['bus','rail','boat'], # Type of transportation system 'bus', 'rail', 'boat' or 'plane'
    #file_description='urbano', # Text that the gtfs feed has to include in its name or description
    #start_date='01-10-2024', # The gtfs feed has to include the range of dates specified here
    #end_date='31-10-2024' # Use today for today's date
)

for file in files:
    print(file['nombre']) # print the names of the feeds that were found

#### 2.2A Download current active files

In [None]:
file_paths = api.download_files( # Download the feeds found before
    file_ids=files,
    output_path=gtfs_path,
    overwrite=False
)

#### 2.2B Download historic files


Download past files and stack them into one unique file

In [None]:
# file_paths = api.download_historic(
#     output_path=gtfs_path,
#     files=files,
#     start_date=datetime(day=1,month=10,year=2024),
#     end_date=datetime(day=30,month=10,year=2024),
#     day_separation=7, # Download one file every x days or more. If there is an update more ofter it will not be downloaded
#     overwrite=False,
#     aoi=aoi # Area of interest. Crop all files by this aoi.
# )

### 3 GTFS process

#### 3.1 Create the gtfs object

This will do:

- Load all .txt files of all gtfs folders given.
- Select only the stops from stops.txt inside the area of interest.
- Crop all trips in stop_times.txt with the stops inside the aoi + 1 more stop.
- Check the stop_sequence in stop_times.txt.
- Deal correctly with trips starting on one day and ending in the following day: hours always in 0-24 range but those trips are marked as next_day True. New service_ids are created to deal with that.
- If the file has frequencies.txt this is processed too dealing with the next day problem.
- If departure or arrival times are empty they get filled.
- A shape direction col is computed as the mean heading of the vector between stop coordinates to mean of the remainning stops coordinates.
- GTFS shapes are for now computed from the stop coordinates.

In [None]:
gtfs = Feed(
    file_paths,
    aoi=aoi,
    stop_group_distance=200, # Group stops into one that are less than x meters apart. This created or updates the parent_station column
    start_date=datetime(day=1,month=9,year=2025),
    end_date=datetime(day=30,month=9,year=2025),
)

#### 3.2 Service intensity

Service intensity is the product of the number of trips per stop times the number of stops in the feed. This is compued for every date in the range.

In [None]:
service_intensity = gtfs.get_service_intensity_in_date_range(
    start_date=None, # If None take the feed min date
    end_date=None, # If None take the feed max date
    date_type=None # Could be something like holiday, weekday, or monday to only consider some dates from the range.
)
service_intensity = service_intensity.to_pandas()
plot_helper.plot_service_intensity(service_intensity)

Select the most common weekday for our analysis

In [None]:
weekday_services = service_intensity.loc[
    (service_intensity['holiday'] == False) & (service_intensity['weekend'] == False)
]
idx = processing_helper.most_frequent_row_index(weekday_services['service_intensity'])
selected_weekday = weekday_services.iloc[idx]['date'].to_pydatetime()
selected_weekday

#### 3.3 Service quality

Service quality is evaluated depending the route type and the mean frequency in the selected time interval.

By default processing_helper.SERVICE_MATRIX to give grades


prcessing_helper.ROUTE_TYPES_TRANSLATOR translates from gtfs route_types to SERVICE_MATRIX route types and ROUTE_TYPES sets the priority or importance order if a stop has multiple route types

In [None]:
# Lets see what grade is given by default depending on route type and frequency.

processing_helper.SERVICE_MATRIX # interval in minutes. The grading for stops is 1 for best - 12 for worst.

In [None]:
start_hour = 8 # start at 8:00
end_hour = 20 # end at 20:900

service_quality_file = processing_helper.get_service_quality(results_path,gtfs,dates=selected_weekday,times=[start_hour,end_hour])
service_quality_gdf = gpd.read_file(service_quality_file)
m = plot_helper.service_quality_map(service_quality_gdf,start_time=start_hour,end_time=end_hour)
m.save(results_path + "/stops_map.html")
# If map does not render
# import webbrowser
# webbrowser.open(results_path + "/stops_map.html")
m

# Accessibility Tutorial

### Set paths

In [None]:
pbf_path = "pbf_files" # For general region or country osm street network files
graph_path = base_path + "/street_network" # City street network graphs

In [None]:
os.makedirs(pbf_path,exist_ok=True)
os.makedirs(graph_path,exist_ok=True)

In [None]:
osm_xml_file = graph_path + f"/{city_name}.osm" # City osm street network file
full_graph_path = graph_path + f"/{city_name}_full_graph.graphml" # Full original city street netwrok in graph format
simplified_graph_path = graph_path + f"/{city_name}_simplified_graph.graphml" # Simplified street network
level_of_service_graph_path = graph_path + f"/{city_name}_level_of_service.graphml" # Street network with quality and accessibility
level_of_service_nodes_path = graph_path + f"/{city_name}_level_of_service_nodes.gpkg" # Street network with quality and accessibility
level_of_service_edges_path = graph_path + f"/{city_name}_level_of_service_edges.gpkg" # Street network with quality and accessibility

## 1 Download street network

### 1.1 Regionwise file and cropping

- Download best regionwise pbf file. (Covers a large area)

- Crop it to cover our aoi and save it in .osm format

In [None]:
# Select what type of street network you want to load
network_filter = osm.osmium_network_filter("walk+bike+primary")
# Download the region pbf file crop it by aoi and convert to osm format
osm.geofabrik_to_osm(osm_xml_file,input_file=pbf_path,aoi=aoi,osmium_filter_args=network_filter,overwrite=False)

### 1.2 Load to osmnx

This way the street network is a netokx graph

(osmnx could be completely deleted from the process and its functions reimplemented to make it much faster, especially the loading process here)

In [None]:
# Load
G = ox.graph_from_xml(osm_xml_file)
# Project geometry coordinates to UTM system to allow euclidean meassurements in meters (sorry americans)
G = ox.project_graph(G,to_crs=aoi.estimate_utm_crs())
# Save the graph in graphml format to avoid the slow loading process
ox.save_graphml(G,full_graph_path)

### 1.3 Simplify graph

Edges with length smaler than X are deleted and its nodes merged

In [None]:
min_edge_length = 30
G = graph_processing.simplify_graph(G,min_edge_length=min_edge_length,min_edge_separation=min_edge_length*2,undirected=True)
# Save the result in graphml format
ox.save_graphml(G,simplified_graph_path)

## 2 Service points

- Load the points related to the service you want to evaluate (e.g. public transport stops or hospitals...)

- Add those points to the graph as new nodes

In [None]:
points = gpd.read_file(service_quality_file)
points.head(5)

Select the column of the points GeoDataFrame that has the service quality information

In [None]:
service_quality_col = f"service_quality_{start_hour}h_{end_hour}h"

Add the service points to the graph:

- Project the points to the graph edges

- Add new nodes at this projection points

In [None]:
G, osmids = graph_processing.add_points_to_graph(
    points,
    G,
    max_dist=100+min_edge_length, # Maximum distance from point to graph edge to project the point
    min_edge_length=min_edge_length # Minimum edge length after adding the new nodes
)
points['osmid'] = osmids # Add the ids of the nodes in the graph to points

### 3 Compute isochrones

#### 3.1 Distance steps and level of services

We need a DISTANCE_MATRIX to relate level of service classes to service qualities and distance to the service.

and we need a LEVEL_OF_SERIVCES list to order the level of services classes form best to worst (or leave it up to the code is the matrix is very symetric).

In [None]:
distance_matrix = processing_helper.DISTANCE_MATRIX
distance_matrix

In [None]:
level_of_services = processing_helper.LEVEL_OF_SERVICES
level_of_services

Compute the level of service graph:

- First it computes the isochrones with networkx at node level

- Then it add new nodes to make the isochrones exact

- The graph has a new property level_of_service with the level of service class

In [None]:
level_of_service_graph = isochrones.graph(
    G,
    points,
    distance_matrix, # If service_quality_col is None it could be a list of distances
    service_quality_col = service_quality_col, # If all points have the same quality this could be None
    level_of_services=level_of_services, # could be None and it will set to the sorted unique values of the matrix
    min_edge_length=min_edge_length # Do not add new nodes if there will be an edge with less than this length
)
# Save as graphml
ox.save_graphml(G,level_of_service_graph_path)

In [None]:
# Save edges and nodes as gpkg
nodes, edges = ox.graph_to_gdfs(level_of_service_graph)
nodes.to_file(level_of_service_nodes_path)
edges.to_file(level_of_service_edges_path)

### Lets visualize all results on a map

In [None]:
m = edges.explore(
    column='level_of_service',
    cmap="RdYlGn_r",
)

# m = nodes.explore(
#     m=m,
#     column='level_of_service',
#     cmap="RdYlGn_r",
#     style_kwds={
#         "radius": 3,
#     },
# )

m = points[[
    "stop_id",
    "parent_station",
    "stop_name",
    f"service_quality_{start_hour}h_{end_hour}h",
    f"interval_{start_hour}h_{end_hour}h",
    f"route_names_{start_hour}h_{end_hour}h",
    f"shape_directions_{start_hour}h_{end_hour}h",
    f"route_type_{start_hour}h_{end_hour}h",
    "route_type",
    "geometry"
]].explore(
    m=m,
    column=f"service_quality_{start_hour}h_{end_hour}h",
    cmap="RdYlGn_r",
    vmin=1,
    vmax=10,
    style_kwds={
        "color": "black",       # Border color
        "weight": 1,            # Border thickness
        "opacity": 1.0,         # Border opacity
        "fillOpacity": 1,
        "radius": 6,
    },
)

m.save(results_path + "/PToffer_map.html")
# If map does not render
# import webbrowser
# webbrowser.open(results_path + "/PToffer_map.html")
m

TODOS:
- gtfs.lf sometimes appears to have repeated rows (Should not affect results)

- sometimes there is a node added in an edge that has the same ls on both sides and the node has a much lower ls (This is inefficient)

- osmnx might not be needed and a custom implementation would be faster. Currently just using graph_to_gdfs and loading the osm file.

- Shape direction might be wrong (180º off)

- implement mobility data API