In [1]:
import pandas as pd
import numpy as np
import os
from itertools import product
from tqdm import tqdm

In [2]:
# Demand Size
demand_size_list = [i for i in range(100, 601, 50)]
# Demand Split Ratio (Intra Modal, %)
demand_split_ratio_list = [i for i in range(10, 91, 20)]

# Random Seed
random_seed_list = [3, 6, 9]

# Network Name
network_name = "11-500"

# Walking Speed (m/s)
WALKING_SPEED = 1.33

# PT Network Name list
pt_network_name_list = ["basic", "ring", "diagonal", "mature"]

# Demand File Directory
demand_dir = f"data/demand/{network_name}/total/"

# Output Directory
output_dir = f"data/demand/{network_name}/pt/"

# Network

In [3]:
# Load Network Files
network_name = "11-500"
network_path = f"data/network/{network_name}/"
node_df = pd.read_csv(network_path + "nodes.csv")
link_df = pd.read_csv(network_path + "edges.csv")

node_id_list = node_df['node_index'].tolist()
print(f"Number of nodes: {len(node_id_list)}")

left_node_id_list = [i for i in range(0, len(node_id_list)//2)]
right_node_id_list = [i for i in range(len(node_id_list)//2, len(node_id_list))]
print(f"Number of left nodes: {len(left_node_id_list)}")
print(f"Number of right nodes: {len(right_node_id_list)}")

left_mobility_node_id_list = [116, 117, 118, 119, 120]
right_mobility_node_id_list = [237, 238, 239, 240, 241]

Number of nodes: 242
Number of left nodes: 121
Number of right nodes: 121


In [4]:
# Load Distance Matrix
distance_matrix = np.load(network_path + "dist_matrix.npy")
print(f"Distance matrix shape: {distance_matrix.shape}")

Distance matrix shape: (242, 242)


In [5]:
# Load Travel Time Matrix
tt_matrix = np.load(network_path + "tt_matrix.npy")
print(f"Travel time matrix shape: {tt_matrix.shape}")

Travel time matrix shape: (242, 242)


# PT GTFS

In [6]:
def find_all_station_ids(stop_times_filepath):
    stop_times_df = pd.read_csv(stop_times_filepath)
    stop_id_list = stop_times_df['stop_id'].unique().tolist()
    # Get station id from stop id: 54-1 -> 54
    station_id_list = [stop_id.split('-')[0] for stop_id in stop_id_list]
    # Remove duplicates
    station_id_list = list(set(station_id_list))
    # Replace A and B with 120, 241
    station_id_list = [120 if stop_id == 'A' else 241 if stop_id == 'B' else stop_id for stop_id in station_id_list]
    # Convert to int and sort
    station_id_list = sorted([int(stop_id) for stop_id in station_id_list])
    return station_id_list

In [7]:
def find_nearest_station(node_id, station_id_list, distance_matrix, walking_speed):
    """Find the nearest station to the given node_id from the station_id_list.
    If multiple stations are at the same distance, return all.
    """
    min_distance = float('inf')
    nearest_station_ids = []
    # Find the minimum distance
    for station_id in station_id_list:
        dist = distance_matrix[node_id][station_id]
        if dist < min_distance:
            min_distance = dist
    # Find all stations with the min_distance
    for station_id in station_id_list:
        if distance_matrix[node_id][station_id] == min_distance:
            nearest_station_ids.append(station_id)

    travel_time_in_seconds = int(min_distance / walking_speed)  # in seconds
    return nearest_station_ids, min_distance, travel_time_in_seconds

# Create PT Demand

In [8]:
counter = 0
all_demand_combinations = list(product(demand_size_list, demand_split_ratio_list, random_seed_list))

for pt_network_name in pt_network_name_list:
    # Find all station ids for the PT network
    stop_times_filepath = f'data/GTFS/pt/{pt_network_name}_pt_train-10_bus-10/stop_times_fp.txt'
    station_id_list = find_all_station_ids(stop_times_filepath)
    print(f"PT Network: {pt_network_name}, Number of stations: {len(station_id_list)}")

    # Create PT Demand Save Directory
    pt_demand_output_dir = os.path.join(output_dir, pt_network_name)
    os.makedirs(pt_demand_output_dir, exist_ok=True)

    # Process each demand file
    for demand_size, demand_split_ratio, random_seed in tqdm(all_demand_combinations):
        demand_filepath = os.path.join(demand_dir, f"ds{demand_size}_dsr{demand_split_ratio}_rs{random_seed}.csv")
        demand_df = pd.read_csv(demand_filepath)

        counter += 1

        # Add New Columns
        demand_df['start_station_ids'] = ''
        demand_df['end_station_ids'] = ''
        
        demand_df['walk_start_to_station_distance'] = 0
        demand_df['walk_start_to_station_tt'] = 0
        
        demand_df['walk_station_to_end_distance'] = 0
        demand_df['walk_station_to_end_tt'] = 0
        
        demand_df['station_departure_time'] = 0

        # Process each demand record
        for index, row in demand_df.iterrows():
            start_node_id = row['start']
            end_node_id = row['end']
            rq_time = row['rq_time']

            # Find nearest station for start node
            start_station_ids, start_distance, start_tt = find_nearest_station(start_node_id, station_id_list, distance_matrix, WALKING_SPEED)
            # Find nearest station for end node
            end_station_ids, end_distance, end_tt = find_nearest_station(end_node_id, station_id_list, distance_matrix, WALKING_SPEED)

            # Update demand record
            demand_df.at[index, 'start_station_ids'] = str(start_station_ids).replace(",", ";")
            demand_df.at[index, 'end_station_ids'] = str(end_station_ids).replace(",", ";")
            
            demand_df.at[index, 'walk_start_to_station_distance'] = start_distance
            demand_df.at[index, 'walk_start_to_station_tt'] = start_tt
            
            demand_df.at[index, 'walk_station_to_end_distance'] = end_distance
            demand_df.at[index, 'walk_station_to_end_tt'] = end_tt

            demand_df.at[index, 'station_departure_time'] = start_tt + rq_time

        # Save the new demand file
        output_filepath = os.path.join(pt_demand_output_dir, f"pt_ds{demand_size}_dsr{demand_split_ratio}_rs{random_seed}.csv")
        demand_df.to_csv(output_filepath, index=False)

print(f"Total scenarios processed: {counter}")


PT Network: basic, Number of stations: 42


100%|██████████| 165/165 [00:06<00:00, 24.27it/s]


PT Network: ring, Number of stations: 82


100%|██████████| 165/165 [00:06<00:00, 25.04it/s]


PT Network: diagonal, Number of stations: 114


100%|██████████| 165/165 [00:07<00:00, 22.11it/s]


PT Network: mature, Number of stations: 138


100%|██████████| 165/165 [00:08<00:00, 20.35it/s]

Total scenarios processed: 660



