In [13]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os
from datetime import datetime, timedelta

# Variables

In [2]:
# BUS Network
bus_network_list = ['basic', 'diagonal', 'ring', 'mature']
# Train Headway (min)
train_headway_list = [i for i in range(10, 31, 5)]

# Network

In [3]:
# Load Network Files
network_name = "11-500"
network_path = f"data/network/{network_name}/"
node_df = pd.read_csv(network_path + "nodes.csv")
link_df = pd.read_csv(network_path + "edges.csv")

node_id_list = node_df['node_index'].tolist()
print(f"Number of nodes: {len(node_id_list)}")

left_node_id_list = [i for i in range(0, len(node_id_list)//2)]
right_node_id_list = [i for i in range(len(node_id_list)//2, len(node_id_list))]
print(f"Number of left nodes: {len(left_node_id_list)}")
print(f"Number of right nodes: {len(right_node_id_list)}")

left_mobility_node_id_list = [116, 117, 118, 119, 120]
right_mobility_node_id_list = [237, 238, 239, 240, 241]

Number of nodes: 242
Number of left nodes: 121
Number of right nodes: 121


In [4]:
# Load Distance Matrix
distance_matrix = np.load(network_path + "dist_matrix.npy")
print(f"Distance matrix shape: {distance_matrix.shape}")

Distance matrix shape: (242, 242)


In [12]:
# Load Travel Time Matrix
tt_matrix = np.load(network_path + "tt_matrix.npy")
print(f"Travel time matrix shape: {tt_matrix.shape}")

Travel time matrix shape: (242, 242)


# Create General Data

In [9]:
general_output_path = "data/gtfs/general/"
if not os.path.exists(general_output_path):
    os.makedirs(general_output_path)

# Create stations_fp.csv
stations_fp_df = pd.DataFrame(columns=['station_id','station_name','station_lat','station_lon','stops_included','station_stop_transfer_times','num_stops_included'])
for index, node in node_df.iterrows():
    station_id = node['node_index']
    station_name = station_id
    station_lat = node['pos_y']
    station_lon = node['pos_x']

    stops_included = "['{}-0';'{}-1';'{}-2';'{}-3';'{}-4';'{}-5']".format(station_id, station_id, station_id, station_id, station_id, station_id)
    station_stop_transfer_times = '[0;0;0;0;0;0]'
    num_stops_included = 6

    new_row = pd.DataFrame({
        'station_id': [station_id],
        'station_name': [station_name],
        'station_lat': [station_lat],
        'station_lon': [station_lon],
        'stops_included': [stops_included],
        'station_stop_transfer_times': [station_stop_transfer_times],
        'num_stops_included': [num_stops_included]
    })

    stations_fp_df = pd.concat([stations_fp_df, new_row], ignore_index=True)

# Remove non-stop nodes
left_non_stops_node_id_list = [1,2,3,4,6,7,8,13,14,15,17,18,21,22,31,32,33,34,37,42,43,44,45,50,52,53,62,63,65,70,71,72,73,78,81,82,83,84,93,94,97,98,100,101,102,107,108,109,111,112,113,114]
right_non_stops_node_id_list = [i + 121 for i in left_non_stops_node_id_list]
non_stop_node_id_list = left_non_stops_node_id_list + right_non_stops_node_id_list

stations_fp_df = stations_fp_df[~stations_fp_df['station_id'].isin(non_stop_node_id_list)].reset_index(drop=True)

# Change 120 to A, 241 to B
stations_fp_df.loc[stations_fp_df['station_id'] == 120, 'station_id'] = 'A'
stations_fp_df.loc[stations_fp_df['station_id'] == 'A', 'station_name'] = 'A Hub'
stations_fp_df.loc[stations_fp_df['station_id'] == 'A', 'stops_included'] = "['A-0';'A-1';'A-2';'A-3';'A-4';'A-5']"

stations_fp_df.loc[stations_fp_df['station_id'] == 241, 'station_id'] = 'B'
stations_fp_df.loc[stations_fp_df['station_id'] == 'B', 'station_name'] = 'B Hub'
stations_fp_df.loc[stations_fp_df['station_id'] == 'B', 'stops_included'] = "['B-0';'B-1';'B-2';'B-3';'B-4';'B-5']"

# Save stations_fp.txt
stations_fp_df.to_csv(general_output_path + "stations_fp.txt", index=False)

In [10]:
# Create stops_fp.txt
stops_fp_df = pd.DataFrame(columns=['stop_id'])
for index, row in stations_fp_df.iterrows():
    # Convert stops_included string to list
    s = row['stops_included']
    if isinstance(s, str):
        s_clean = s.strip()
        if s_clean.startswith('[') and s_clean.endswith(']'):
            s_clean = s_clean[1:-1]
        s_clean = s_clean.replace("'", "").replace('"', '').strip()
        if ';' in s_clean:
            parts = [p.strip() for p in s_clean.split(';') if p.strip()]
        elif ',' in s_clean:
            parts = [p.strip() for p in s_clean.split(',') if p.strip()]
        elif s_clean == '':
            parts = []
        else:
            parts = [s_clean]
    else:
        parts = list(s) if hasattr(s, '__iter__') and not isinstance(s, str) else [s]
    row['stops_included'] = ';'.join(parts)
    stop_ids = row['stops_included']
    for stop_id in stop_ids.split(';'):
        stops_fp_df = pd.concat([stops_fp_df, pd.DataFrame({'stop_id': [stop_id]})], ignore_index=True)
stops_fp_df.to_csv(os.path.join(general_output_path, "stops_fp.txt"), index=False)

In [15]:
# Prepare agency_fp.txt
agency_df = pd.DataFrame({
    'agency_id': [0, 1, 2, 3],
    'agency_name': ['intercity-express', 'bus-basic', 'bus-ring', 'bus-diagonal'],
})
agency_df.to_csv(os.path.join(general_output_path, "agency_fp.txt"), index=False)

In [16]:
# Create calendar_fp.txt
calendar_df = pd.DataFrame({
    'service_id': [0],
    'start_date': [20000101],
    'end_date': [20991231],
    'monday': [1],
    'tuesday': [1],
    'wednesday': [1],
    'thursday': [1],
    'friday': [1],
    'saturday': [1],
    'sunday': [1],
})
calendar_df.to_csv(os.path.join(general_output_path, "calendar_fp.txt"), index=False)

In [28]:
# Create street_station_transfers_fp.txt
all_station_ids = stations_fp_df['station_id'].tolist()

street_station_transfers_fp_df = pd.DataFrame({
    'node_id': all_station_ids,
    'closest_station_id': all_station_ids,
    'street_station_transfer_time': 60
})

# Change node id from A and B back to 120 and 241
street_station_transfers_fp_df.loc[street_station_transfers_fp_df['node_id'] == 'A', 'node_id'] = 120
street_station_transfers_fp_df.loc[street_station_transfers_fp_df['node_id'] == 'B', 'node_id'] = 241

street_station_transfers_fp_df.to_csv(os.path.join(general_output_path, "street_station_transfers_fp.txt"), index=False)

In [17]:
# Create transfers_fp.txt
# For each station, the transfer time between its stops is 30 seconds
transfers_fp_df = pd.DataFrame(columns=['from_stop_id', 'to_stop_id', 'min_transfer_time'])
for index, row in stations_fp_df.iterrows():
    stops_included = row['stops_included'].split(';')
    for i in range(len(stops_included)):
        for j in range(len(stops_included)):
            if i != j:
                new_row = pd.DataFrame({
                    'from_stop_id': [stops_included[i]],
                    'to_stop_id': [stops_included[j]],
                    'min_transfer_time': [30]
                })
                transfers_fp_df = pd.concat([transfers_fp_df, new_row], ignore_index=True)
transfers_fp_df.to_csv(os.path.join(general_output_path, "transfers_fp.txt"), index=False)

# Create GTFS for Basic Bus Network

In [23]:
def expand_timetable(base_trip_data, study_start_str, study_end_str, headway_minutes):
    """
    扩展单个基准行程的时刻表。

    参数:
    - base_trip_data (dict): 包含基准行程信息的字典。
    - study_start_str (str): 研究范围的开始时间 (HH:MM:SS)。
    - study_end_str (str): 研究范围的结束时间 (HH:MM:SS)。
    - headway_minutes (int): 发车间隔（分钟）。

    返回:
    - pd.DataFrame: 包含所有扩展行程的 DataFrame。
    """
    # 将时间字符串转换为 datetime 对象以便计算
    study_start_time = datetime.strptime(study_start_str, '%H:%M:%S')
    study_end_time = datetime.strptime(study_end_str, '%H:%M:%S')
    headway = timedelta(minutes=headway_minutes)

    # 将基准行程字典转换为 DataFrame
    base_df = pd.DataFrame(base_trip_data)
    
    # 将 DataFrame 中的时间字符串转换为 datetime 对象
    base_df['arrival_time'] = pd.to_datetime(base_df['arrival_time'], format='%H:%M:%S')
    base_df['departure_time'] = pd.to_datetime(base_df['departure_time'], format='%H:%M:%S')

    all_trips = [base_df]
    base_trip_id = base_df['trip_id'].iloc[0]

    # --- 向前扩展行程 ---
    current_trip_df = base_df.copy()
    counter = 1
    while True:
        next_trip_df = current_trip_df.copy()
        # 增加 headway 时间
        next_trip_df['arrival_time'] += headway
        next_trip_df['departure_time'] += headway
        
        # 检查新行程的开始时间是否在研究范围内
        if next_trip_df['departure_time'].iloc[0] >= study_end_time:
            break
            
        # 更新 trip_id
        next_trip_df['trip_id'] = f"{base_trip_id}-{counter}"
        all_trips.append(next_trip_df)
        
        current_trip_df = next_trip_df
        counter += 1

    # --- 向后扩展行程 ---
    current_trip_df = base_df.copy()
    while True:
        prev_trip_df = current_trip_df.copy()
        # 减去 headway 时间
        prev_trip_df['arrival_time'] -= headway
        prev_trip_df['departure_time'] -= headway

        # 检查新行程的开始时间是否在研究范围内
        if prev_trip_df['departure_time'].iloc[0] < study_start_time:
            break

        # 更新 trip_id
        prev_trip_df['trip_id'] = f"{base_trip_id}-{counter}"
        # 将向后生成的行程插入到列表的开头，以保持时间顺序
        all_trips.insert(0, prev_trip_df)
        
        current_trip_df = prev_trip_df
        counter += 1
        
    # 合并这个基准线路产生的所有行程
    expanded_df = pd.concat(all_trips, ignore_index=True)

    # Add 0 to trip_id for base trip
    expanded_df.loc[expanded_df['trip_id'] == base_trip_id, 'trip_id'] = f"{base_trip_id}-0"
    return expanded_df

In [None]:
BUS_HEADWAY = 10
STUDY_START = "00:00:00"
STUDY_END = "08:00:00"

# Prepare output path
basic_output_path = f"data/gtfs/bus/basic/bus_headway_{BUS_HEADWAY}/"
if not os.path.exists(basic_output_path):
    os.makedirs(basic_output_path)

# Create routes_fp.txt
routes_df = pd.DataFrame({
    'route_id': [1, 2, 3, 4],
    'route_short_name': ['bus-basic-we-left', 'bus-basic-ns-left', 'bus-basic-we-right', 'bus-basic-ns-right'],
    'route_desc': ['bus', 'bus', 'bus', 'bus'],
})
routes_df.to_csv(os.path.join(basic_output_path, "routes_fp.txt"), index=False)

In [25]:
# Create stop_times_fp.txt
# Use 10 minute headway
stop_times_fp_df = pd.DataFrame(columns=['trip_id', 'arrival_time', 'departure_time', 'stop_id', 'stop_sequence'])

# For w-e direction in the left side
we_left_base_trip_times_0 = {
    'trip_id': '1-0', # route_id-direction
    'stop_id': ['54-1', '55-1', '56-1', '57-1', '117-1', '120-1', '118-1', '58-1', '59-1', '60-1', '61-1'],
    'arrival_time': ['00:54:30', '00:56:00', '00:57:30', '00:59:00', '01:00:30', '01:02:00', '01:03:30', '01:05:00', '01:06:30', '01:08:00', '01:09:30'],
    'departure_time': ['00:54:30', '00:56:00', '00:57:30', '00:59:00', '01:00:30', '01:02:00', '01:03:30', '01:05:00', '01:06:30', '01:08:00', '01:09:30'],
    'stop_sequence': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
}

we_left_base_trip_times_1 = {
    'trip_id': '1-1', # route_id-direction
    'stop_id': ['54-1', '55-1', '56-1', '57-1', '117-1', '120-1', '118-1', '58-1', '59-1', '60-1', '61-1'],
    'arrival_time': ['00:54:30', '00:56:00', '00:57:30', '00:59:00', '01:00:30', '01:02:00', '01:03:30', '01:05:00', '01:06:30', '01:08:00', '01:09:30'],
    'departure_time': ['00:54:30', '00:56:00', '00:57:30', '00:59:00', '01:00:30', '01:02:00', '01:03:30', '01:05:00', '01:06:30', '01:08:00', '01:09:30'],
    'stop_sequence': [11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
}

ns_left_base_trip_times_0 = {
    'trip_id': '2-0', # route_id-direction
    'stop_id': ['110-2', '99-2', '88-2', '77-2', '119-2', '120-2', '116-2', '38-2', '27-2', '16-2', '5-2'],
    'arrival_time': ['00:59:30', '01:01:00', '01:02:30', '01:04:00', '01:05:30', '01:07:00', '01:08:30', '01:10:00', '01:11:30', '01:13:00', '01:14:30'],
    'departure_time': ['00:59:30', '01:01:00', '01:02:30', '01:04:00', '01:05:30', '01:07:00', '01:08:30', '01:10:00', '01:11:30', '01:13:00', '01:14:30'],
    'stop_sequence': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
}

ns_left_base_trip_times_1 = {
    'trip_id': '2-1', # route_id-direction
    'stop_id': ['110-2', '99-2', '88-2', '77-2', '119-2', '120-2', '116-2', '38-2', '27-2', '16-2', '5-2'],
    'arrival_time': ['00:59:30', '01:01:00', '01:02:30', '01:04:00', '01:05:30', '01:07:00', '01:08:30', '01:10:00', '01:11:30', '01:13:00', '01:14:30'],
    'departure_time': ['00:59:30', '01:01:00', '01:02:30', '01:04:00', '01:05:30', '01:07:00', '01:08:30', '01:10:00', '01:11:30', '01:13:00', '01:14:30'],
    'stop_sequence': [11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
}

we_right_base_trip_times_0 = {
    'trip_id': '3-0', # route_id-direction
    'stop_id': ['175-1', '176-1', '177-1', '178-1', '238-1', '241-1', '239-1', '179-1', '180-1', '181-1', '182-1'],
    'arrival_time': ['00:54:30', '00:56:00', '00:57:30', '00:59:00', '01:00:30', '01:02:00', '01:03:30', '01:05:00', '01:06:30', '01:08:00', '01:09:30'],
    'departure_time': ['00:54:30', '00:56:00', '00:57:30', '00:59:00', '01:00:30', '01:02:00', '01:03:30', '01:05:00', '01:06:30', '01:08:00', '01:09:30'],
    'stop_sequence': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
}

we_right_base_trip_times_1 = {
    'trip_id': '3-1', # route_id-direction
    'stop_id': ['175-1', '176-1', '177-1', '178-1', '238-1', '241-1', '239-1', '179-1', '180-1', '181-1', '182-1'],
    'arrival_time': ['00:54:30', '00:56:00', '00:57:30', '00:59:00', '01:00:30', '01:02:00', '01:03:30', '01:05:00', '01:06:30', '01:08:00', '01:09:30'],
    'departure_time': ['00:54:30', '00:56:00', '00:57:30', '00:59:00', '01:00:30', '01:02:00', '01:03:30', '01:05:00', '01:06:30', '01:08:00', '01:09:30'],
    'stop_sequence': [11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
}

ns_right_base_trip_times_0 = {
    'trip_id': '4-0', # route_id-direction
    'stop_id': ['231-2', '220-2', '209-2', '198-2', '240-2', '241-2', '237-2', '159-2', '148-2', '137-2', '126-2'],
    'arrival_time': ['00:59:30', '01:01:00', '01:02:30', '01:04:00', '01:05:30', '01:07:00', '01:08:30', '01:10:00', '01:11:30', '01:13:00', '01:14:30'],
    'departure_time': ['00:59:30', '01:01:00', '01:02:30', '01:04:00', '01:05:30', '01:07:00', '01:08:30', '01:10:00', '01:11:30', '01:13:00', '01:14:30'],
    'stop_sequence': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
}

ns_right_base_trip_times_1 = {
    'trip_id': '4-1', # route_id-direction
    'stop_id': ['231-2', '220-2', '209-2', '198-2', '240-2', '241-2', '237-2', '159-2', '148-2', '137-2', '126-2'],
    'arrival_time': ['00:59:30', '01:01:00', '01:02:30', '01:04:00', '01:05:30', '01:07:00', '01:08:30', '01:10:00', '01:11:30', '01:13:00', '01:14:30'],
    'departure_time': ['00:59:30', '01:01:00', '01:02:30', '01:04:00', '01:05:30', '01:07:00', '01:08:30', '01:10:00', '01:11:30', '01:13:00', '01:14:30'],
    'stop_sequence': [11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
}

# Use headway to create multiple trips for the study period: 00:00:00 to 08:00:00
base_schedules = [we_left_base_trip_times_0,
                    we_left_base_trip_times_1,
                    ns_left_base_trip_times_0,
                    ns_left_base_trip_times_1,
                    we_right_base_trip_times_0,
                    we_right_base_trip_times_1,
                    ns_right_base_trip_times_0,
                    ns_right_base_trip_times_1]

all_schedules_list = []
for schedule_data in base_schedules:
    expanded_schedule = expand_timetable(schedule_data, STUDY_START, STUDY_END, BUS_HEADWAY)
    all_schedules_list.append(expanded_schedule)

final_timetable = pd.concat(all_schedules_list, ignore_index=True)

final_timetable['arrival_time'] = final_timetable['arrival_time'].dt.strftime('%H:%M:%S')
final_timetable['departure_time'] = final_timetable['departure_time'].dt.strftime('%H:%M:%S')

stop_times_fp_df = final_timetable
stop_times_fp_df.to_csv(os.path.join(basic_output_path, "stop_times_fp.txt"), index=False)

In [27]:
# Create trips_fp.txt
trips_fp_df = pd.DataFrame(columns=['trip_id','route_id','service_id','direction_id'])

all_trip_ids = final_timetable['trip_id'].unique().tolist()
for trip_id in all_trip_ids:
    route_id = int(trip_id.split('-')[0])
    direction_id = int(trip_id.split('-')[1])
    new_row = pd.DataFrame({
        'trip_id': [trip_id],
        'route_id': [route_id],
        'service_id': [0],
        'direction_id': [direction_id]
    })
    trips_fp_df = pd.concat([trips_fp_df, new_row], ignore_index=True)

trips_fp_df.to_csv(os.path.join(basic_output_path, "trips_fp.txt"), index=False)