In [1]:
# !pip install pyrosm
# !pip install folium
# !pip install geojson
# !pip install r5py

In [3]:
import geopandas as gpd
import pandas as pd
import datetime
import time 
import r5py
import os

### 1. Building transportation network using OSM and GTFS data

In [4]:

# Ensure the file paths are correct
OSM_path = "/Users/max/Desktop/Transit_Dashboard/data/OSM_data/Toronto.osm.pbf" #"../../data/OSM_data/Toronto.osm.pbf"
GTFS_path = "/Users/max/Desktop/Transit_Dashboard/data/GTFS_data/raw/latest_feed_version_2024-10-22.zip"  #"../../data/GTFS_data/raw/latest_feed_version_2024-10-22.zip"


# Make the R5 transport network using OSM and GTFS 
network_start = time.time()
transport_network = r5py.TransportNetwork(OSM_path, [GTFS_path])
network_total_time = time.time()-network_start
print("Transportion network building time", network_total_time, "seconds." )

transport_modes = [
    r5py.TransportMode.TRANSIT,
    r5py.TransportMode.WALK,
]

Transportion network building time 61.77397012710571 seconds.


### 2. Load Origins and Destinations for travel time calculation.

In [9]:
import geopandas as gpd
import time
import r5py
import datetime

def compute_travel_time_matrix(origins_file_path, destinations_file_path, transport_network, output_path, origin_id_col='', destination_id_col=''):
    
    '''
    This function computes the travel time matrix between origins and destinations using a transport network.
    
    Parameters:
    origins_file (str): Path to the GeoJSON file containing the origin points (census tract centroids).
    destinations_file (str): Path to the GeoJSON file containing the destination points.
    transport_network (TransportNetwork): The transport network object (R5 or similar).
    output_file (str): Path to save the resulting travel time matrix CSV file.
    origin_id_col (str, optional): The name of the column in the origins file that serves as the ID.
    destination_id_col (str, optional): The name of the column in the destinations file that serves as the ID.
    
    Returns:
    travel_time_matrix (Dataframe): Matrix columns: origin:from_id, destination: to_id, travel time in minutes.
    '''
    
    
    
    # Load the GeoJSON files
    origins = gpd.read_file(origins_file_path)
    destinations = gpd.read_file(destinations_file_path)
    
    # Check if the origin ID column exists, if not, raise an error or rename it
    if "id" not in origins.columns:
        if origin_id_col not in origins.columns:
            raise ValueError(f"Origin ID column '{origin_id_col}' not found in the origins file.")
        origins['id'] = origins[origin_id_col]
    
    # Check if the destination ID column exists, if not, raise an error or rename it
    if "id" not in destinations.columns:
        if destination_id_col not in destinations.columns:
            raise ValueError(f"Destination ID column '{destination_id_col}' not found in the destinations file.")
        destinations['id'] = destinations[destination_id_col]
    
    # Start timer
    start = time.time()
    
    # Compute travel time matrix
    travel_time_matrix = r5py.TravelTimeMatrixComputer(
        transport_network,
        origins=origins,
        destinations=destinations,
        transport_modes=[r5py.TransportMode.TRANSIT],
        # Optional: specify the departure time if needed
        # departure=datetime.datetime(2019, 5, 13, 14, 0, 0),
    ).compute_travel_times()
    
    # End timer
    end = time.time()
    running_time = end - start
    print(f"Running time: {running_time} seconds")
    # Save the results to a CSV file
    travel_time_matrix.to_csv(output_path, index=False)
    return travel_time_matrix


In [None]:
# Census tract to census tract
compute_travel_time_matrix(
    origins_file_path="/Users/max/Desktop/Transit_Dashboard/data/census_tract_data/toronto_ct_centroids1.geojson",
    destinations_file_path="/Users/max/Desktop/Transit_Dashboard/data/census_tract_data/toronto_ct_centroids1.geojson",
    transport_network=transport_network,
    output_path="../results/travel_time_matrix_census_tract.csv",
    origin_id_col='CTUID',  # Specify the correct column name for origin ID if it's different
    destination_id_col='CTUID' 
)

Running time: 48.828712940216064 seconds


Unnamed: 0,from_id,to_id,travel_time
0,5350128.04,5350128.04,0.0
1,5350128.04,5350363.06,81.0
2,5350128.04,5350363.07,90.0
3,5350128.04,5350378.23,90.0
4,5350128.04,5350378.24,94.0
...,...,...,...
339884,5350017.02,5350210.04,79.0
339885,5350017.02,5350062.03,38.0
339886,5350017.02,5350062.04,33.0
339887,5350017.02,5350017.01,21.0


In [10]:
#  dissemination area to healthcare facilities 
compute_travel_time_matrix(
    origins_file_path="/Users/max/Desktop/Transit_Dashboard/data/census_tract_data/toronto_da_centroids.geojson",
    destinations_file_path="/Users/max/Desktop/Transit_Dashboard/data/key_destination_data/Healthcare.geojson",
    transport_network=transport_network,
    output_path="../results/travel_time_matrix_hospitals.csv",
    origin_id_col='DAUID',  # Specify the correct column name for origin ID if it's different
    destination_id_col='ADDRESS_POINT_ID'  # Specify the correct column name for destination ID if it's different
)

Running time: 265.77284479141235 seconds


Unnamed: 0,from_id,to_id,travel_time
0,35200002,7792696,106.0
1,35200002,9638452,67.0
2,35200002,54248,
3,35200002,6362828,52.0
4,35200002,9035631,107.0
...,...,...,...
374295,35205069,14120064,
374296,35205069,3256200,
374297,35205069,9942216,
374298,35205069,3232727,


In [21]:
import pandas as pd

def get_top_3_destinations(raw_travel_time_matrix_path, output_path):
    """
    Processes a travel time matrix to find the top 3 closest destinations for each origin.

    Parameters:
    raw_travel_time_matrix_path (str): Path to the input CSV file containing the travel time matrix.
    output_path (str): Path to save the output CSV file with the top 3 destinations for each origin.

    Returns:
    pd.DataFrame: A DataFrame containing the top 3 destinations (with the smallest travel time)
                  for each origin, excluding self-loops.
    """
    # Load the travel time matrix CSV file
    travel_time_matrix = pd.read_csv(raw_travel_time_matrix_path)
    
    # Filter out rows where from_id == to_id (self-loops)
    data = travel_time_matrix[travel_time_matrix['from_id'] != travel_time_matrix['to_id']]
    
    # Sort by travel time, group by 'from_id', and take the top 3 by travel time
    top_3_destinations = (
        data.sort_values('travel_time')  
        .groupby('from_id')
        .head(3)
    )
    
    # Select relevant columns and sort by 'to_id'
    top_3_destinations = top_3_destinations[['from_id', 'to_id', 'travel_time']].sort_values(['from_id','travel_time'])
    
    
    # Save the result to a CSV file without including the index
    top_3_destinations.to_csv(output_path, index=False)
    
    return top_3_destinations

# TTM stands for Travel Time Matrix
top_3 = get_top_3_destinations("../results/travel_time_matrix_hospitals.csv", "../results/top_3_healthcare_TTM.csv")
travel_time_matrix = pd.read_csv("../results/top_3_healthcare_TTM.csv")
display(travel_time_matrix.head(20))

Unnamed: 0,from_id,to_id,travel_time
0,35200002,4616608,51.0
1,35200002,6362828,52.0
2,35200002,9638452,67.0
3,35200003,6362828,45.0
4,35200003,4616608,51.0
5,35200003,9638452,65.0
6,35200004,6362828,44.0
7,35200004,4616608,51.0
8,35200004,9638452,64.0
9,35200005,6362828,41.0
