In [2]:
import logging
import partridge as ptg

# capture logs in notebook
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")

# load a GTFS of AC Transit
# path = 'gtfs.zip'
path = 'kr_gtfs.zip'
_date, service_ids = ptg.read_busiest_date(path)
view = {'trips.txt': {'service_id': service_ids}}
feed = ptg.load_feed(path, view)

DEBUG:root:test


In [3]:
import geopandas as gpd
import pyproj
from shapely.geometry import Point

# convert all known stops in the schedule to shapes in a GeoDataFrame
gdf = gpd.GeoDataFrame(
    {"stop_id": feed.stops.stop_id.tolist()},
    geometry=[
        Point(lon, lat)
        for lat, lon in zip(
            feed.stops.stop_lat,
            feed.stops.stop_lon)
    ]
)
gdf = gdf.set_index("stop_id")

# CRS 수정
gdf.crs = 'epsg:4326'

# re-cast to meter-based projection to allow for distance calculations
centroid = gdf.iloc[0].geometry.centroid
aeqd_crs = pyproj.CRS(
    proj='aeqd',
    ellps='WGS84',
    datum='WGS84',

    lat_0=centroid.y,
    lon_0=centroid.x
)

gdf = gdf.to_crs(crs=aeqd_crs)

In [103]:
feed

<partridge.gtfs.Feed at 0x2163d727050>

In [104]:
feed.trips

Unnamed: 0,route_id,service_id,trip_id
0,RR_ACC1_S-1-01-1D,B1,RR_ACC1_S-1-01-1D_Ord001
1,RR_ACC1_S-1-01-1D,B1,RR_ACC1_S-1-01-1D_Ord002
2,RR_ACC1_S-1-01-1D,B1,RR_ACC1_S-1-01-1D_Ord003
3,RR_ACC1_S-1-01-1D,B1,RR_ACC1_S-1-01-1D_Ord004
4,RR_ACC1_S-1-01-1D,B1,RR_ACC1_S-1-01-1D_Ord005
...,...,...,...
8247,RR_ACC1_S-1-WS-1U,B1,RR_ACC1_S-1-WS-1U_Ord246
8248,RR_ACC1_S-1-WS-1U,B1,RR_ACC1_S-1-WS-1U_Ord247
8249,RR_ACC1_S-1-WS-1U,B1,RR_ACC1_S-1-WS-1U_Ord248
8250,RR_ACC1_S-1-WS-1U,B1,RR_ACC1_S-1-WS-1U_Ord249


In [105]:
feed.stops

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon
0,RS_ACC1_S-1-0150,서울역(지하),37.555870,126.972100
1,RS_ACC1_S-1-0151,시청,37.565710,126.977120
2,RS_ACC1_S-1-0152,종각,37.570160,126.982920
3,RS_ACC1_S-1-0153,종로3가,37.570420,126.992110
4,RS_ACC1_S-1-0154,종로5가,37.570930,127.001850
...,...,...,...,...
722,RS_ACC1_S-1-4925,걸포북변,37.631639,126.705776
723,RS_ACC1_S-1-4926,사우(김포시청),37.620320,126.719766
724,RS_ACC1_S-1-4927,풍무,37.612428,126.732449
725,RS_ACC1_S-1-4928,고촌,37.601213,126.770326


In [66]:
# let's use this example origin and destination
# to find the time it would take to go from one to another
# from_stop_name = "Santa Clara Av & Mozart St"
# to_stop_name = "10th Avenue SB"
from_stop_name = "가천대"
to_stop_name = "수서"

# if we depart at 8:30 AM, schedule should suggest:
# look at all trips from that stop that are after the depart time
departure_secs = 8.5 * 60 * 60

# get all information, including the stop ids, for the start and end nodes
from_stop = feed.stops[feed.stops.stop_name == from_stop_name].head(1).squeeze()
to_stop = feed.stops[feed.stops.stop_name == to_stop_name].head(1).squeeze()

# 특정 정류장을 지정하기 위함
# to_stop = feed.stops[feed.stops.stop_name == to_stop_name].iloc[1].squeeze()

# for check
to_stop2 = feed.stops[feed.stops.stop_name == to_stop_name]

# extract just the stop ids
from_stop_id = from_stop.stop_id
to_stop_id = to_stop.stop_id

In [67]:
from_stop

stop_id      RS_ACC1_S-1-1851
stop_name                 가천대
stop_lat             37.44861
stop_lon             127.1267
Name: 312, dtype: object

In [68]:
to_stop

stop_id      RS_ACC1_S-1-0339
stop_name                  수서
stop_lat              37.4874
stop_lon            127.10199
Name: 90, dtype: object

In [69]:
to_stop2

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon
90,RS_ACC1_S-1-0339,수서,37.4874,127.10199
148,RS_ACC1_S-1-1030,수서,37.48725,127.10169


In [77]:
from copy import copy
from typing import Any
from typing import Dict
from typing import List

# assume all xfers are 3 minutes
TRANSFER_COST = (3 * 60)

def get_trip_ids_for_stop(feed, stop_id: str, departure_time: int):
    """Takes a stop and departure time and get associated trip ids."""
    mask_1 = feed.stop_times.stop_id == stop_id
    mask_2 = feed.stop_times.departure_time >= departure_time

    # extract the list of qualifying trip ids
    potential_trips = feed.stop_times[mask_1 & mask_2].trip_id.unique().tolist()

    return potential_trips


def stop_times_for_kth_trip(
    from_stop_id: str,
    stop_ids: List[str],
    time_to_stops_orig: Dict[str, Any],
) -> Dict[str, Any]:
    # prevent upstream mutation of dictionary
    time_to_stops = copy(time_to_stops_orig)
    stop_ids = list(stop_ids)
    potential_trips_num = 0

    for i, ref_stop_id in enumerate(stop_ids):
        # how long it took to get to the stop so far (0 for start node)
        # baseline_cost = time_to_stops[ref_stop_id]
        baseline_cost, baseline_transfers = time_to_stops[ref_stop_id]

        # get list of all trips associated with this stop
        potential_trips = get_trip_ids_for_stop(feed, ref_stop_id, departure_secs)
        potential_trips_num += int(len(potential_trips))
        
        for potential_trip in potential_trips:

            # get all the stop time arrivals for that trip
            stop_times_sub = feed.stop_times[feed.stop_times.trip_id == potential_trip]
            stop_times_sub = stop_times_sub.sort_values(by="stop_sequence")

            # get the "hop on" point
            from_her_subset = stop_times_sub[stop_times_sub.stop_id == ref_stop_id]
            from_here = from_her_subset.head(1).squeeze()

            # get all following stops
            stop_times_after_mask = stop_times_sub.stop_sequence >= from_here.stop_sequence
            stop_times_after = stop_times_sub[stop_times_after_mask]

            # for all following stops, calculate time to reach
            arrivals_zip = zip(stop_times_after.arrival_time, stop_times_after.stop_id)
            for arrive_time, arrive_stop_id in arrivals_zip:

                # # 이미 계산된 정류장은 건너뛰기
                # if arrive_stop_id in time_to_stops and ref_stop_id != from_stop_id:
                #     continue

                # 출발 정류장인 경우 arrive_time_adjusted를 0으로 설정
                if ref_stop_id == from_stop_id and arrive_stop_id == from_stop_id:
                    arrive_time_adjusted = 0
                else:
                    # time to reach is diff from start time to arrival (plus any baseline cost)
                    arrive_time_adjusted = arrive_time - departure_secs + baseline_cost

                # # time to reach is diff from start time to arrival (plus any baseline cost)
                # arrive_time_adjusted = arrive_time - departure_secs + baseline_cost

                new_transfers = baseline_transfers + [arrive_stop_id]

                # only update if does not exist yet or is faster
                if arrive_stop_id in time_to_stops:
                    if time_to_stops[arrive_stop_id][0] > arrive_time_adjusted:
                        time_to_stops[arrive_stop_id] = (arrive_time_adjusted, new_transfers)
                else:
                    time_to_stops[arrive_stop_id] = (arrive_time_adjusted, new_transfers)

    print("The final operation of potential trips num: ", potential_trips_num)
    return time_to_stops

def add_footpath_transfers(
    stop_ids: List[str],
    time_to_stops_orig: Dict[str, Any],
    stops_gdf: gpd.GeoDataFrame,
    transfer_cost=TRANSFER_COST,
) -> Dict[str, Any]:
    # prevent upstream mutation of dictionary
    time_to_stops = copy(time_to_stops_orig)
    stop_ids = list(stop_ids)

    # add in transfers to nearby stops
    for stop_id in stop_ids:
        stop_pt = stops_gdf.loc[stop_id].geometry

        # TODO: parameterize? transfer within .2 miles
        meters_in_miles = 1610
        qual_area = stop_pt.buffer(meters_in_miles/5)

        # get all stops within a short walk of target stop
        mask = stops_gdf.intersects(qual_area)

        # time to reach new nearby stops is the transfer cost plus arrival at last stop

        arrive_time_adjusted = time_to_stops[stop_id][0] + TRANSFER_COST
        new_transfers = time_to_stops[stop_id][1] + [stop_id]

        # only update if currently inaccessible or faster than currrent option
        for arrive_stop_id, row in stops_gdf[mask].iterrows():
            if arrive_stop_id in time_to_stops:
                if time_to_stops[arrive_stop_id][0] > arrive_time_adjusted:
                    time_to_stops[arrive_stop_id] = (arrive_time_adjusted, new_transfers)
            else:
                time_to_stops[arrive_stop_id] = (arrive_time_adjusted, new_transfers)

    return time_to_stops

In [78]:
import time
# initialize lookup with start node taking 0 seconds to reach
time_to_stops = {from_stop_id: (0, [])}  # (time, [list of transfers])

# setting transfer limit at 1
TRANSFER_LIMIT = 1
for k in range(TRANSFER_LIMIT + 1):
    logger.info("\nAnalyzing possibilities with {} transfers".format(k))

    # generate current list of stop ids under consideration
    stop_ids = list(time_to_stops.keys())
    logger.info("\tinital qualifying stop ids count: {}".format(len(stop_ids)))

    # update time to stops calculated based on stops accessible
    tic = time.perf_counter()
    time_to_stops = stop_times_for_kth_trip(from_stop_id, stop_ids, time_to_stops)
    toc = time.perf_counter()
    logger.info("\tstop times calculated in {:0.4f} seconds".format(toc - tic))

    added_keys_count = len((time_to_stops.keys())) - len(stop_ids)
    logger.info("\t\t{} stop ids added".format(added_keys_count))

    # now add footpath transfers and update
    tic = time.perf_counter()
    stop_ids = list(time_to_stops.keys())
    time_to_stops = add_footpath_transfers(stop_ids, time_to_stops, gdf)
    toc = time.perf_counter()
    logger.info("\tfootpath transfers calculated in {:0.4f} seconds".format(toc - tic))

    added_keys_count = len((time_to_stops.keys())) - len(stop_ids)
    logger.info("\t\t{} stop ids added".format(added_keys_count))

assert to_stop_id in time_to_stops, "Unable to find route to destination within transfer limit"

time_to_destination = time_to_stops[to_stop_id][0]
transfers = time_to_stops[to_stop_id][1]

logger.info("Time to destination: {} minutes".format(time_to_destination/60))
logger.info("Transfers: {}".format(" -> ".join(transfers)))

INFO:root:
Analyzing possibilities with 0 transfers
INFO:root:	inital qualifying stop ids count: 1
INFO:root:	stop times calculated in 2.6392 seconds
INFO:root:		62 stop ids added
INFO:root:	footpath transfers calculated in 0.0492 seconds
INFO:root:		18 stop ids added
INFO:root:
Analyzing possibilities with 1 transfers
INFO:root:	inital qualifying stop ids count: 81


The final operation of potential trips num:  284


INFO:root:	stop times calculated in 165.4149 seconds
INFO:root:		517 stop ids added


The final operation of potential trips num:  18050


INFO:root:	footpath transfers calculated in 0.4337 seconds
INFO:root:		30 stop ids added
INFO:root:Time to destination: 9.5 minutes
INFO:root:Transfers: RS_ACC1_S-1-1030 -> RS_ACC1_S-1-1030


In [79]:
# 경로 결과 표 출력
import pandas as pd
transfers_info = []

for stop_id in transfers:

    arrival_time = time_to_stops[stop_id][0]
    stop_name = feed.stops.loc[feed.stops['stop_id'] == stop_id, 'stop_name'].iloc[0]
    
    # 결과 정보 추가
    transfers_info.append({'Stop ID': stop_id, 'Stop Name': stop_name, 'Arrival Time': arrival_time/60})


df = pd.DataFrame(transfers_info)
print(df)

            Stop ID Stop Name  Arrival Time
0  RS_ACC1_S-1-1030        수서           6.5
1  RS_ACC1_S-1-1030        수서           6.5


In [64]:
print("Time to destination: {} minutes".format(time_to_destination/60))
# 경로 결과 표 출력
import pandas as pd
transfers_info = []

# 출발지 정보 추가
transfers_info.append({'Stop ID': from_stop_id, 'Stop Name': from_stop_name, 'Arrival Time': 0})

# 이미 방문한 정류장을 기록하기 위한 set
visited_stops = set([from_stop_id])

for stop_id in transfers:

    # 출발지인 경우 생략
    if stop_id == from_stop_id:
        continue

    # # 출발지이거나 이미 방문한 정류장인 경우 생략
    # if stop_id == from_stop_id or stop_id in visited_stops:
    #     continue

    arrival_time = time_to_stops[stop_id][0]
    stop_name = feed.stops.loc[feed.stops['stop_id'] == stop_id, 'stop_name'].iloc[0]
    
    # 결과 정보 추가
    transfers_info.append({'Stop ID': stop_id, 'Stop Name': stop_name, 'Arrival Time': arrival_time/60})
    
    # 현재 정류장을 방문한 것으로 기록
    visited_stops.add(stop_id)

df = pd.DataFrame(transfers_info)
print(df)

Time to destination: 14.5 minutes
            Stop ID Stop Name  Arrival Time
0  RS_ACC1_S-1-1851       가천대           0.0
1  RS_ACC1_S-1-1030        수서           5.5
2  RS_ACC1_S-1-1030        수서           5.5
3  RS_ACC1_S-1-0340      가락시장          11.5
4  RS_ACC1_S-1-0340      가락시장          11.5


In [37]:
print(time_to_stops)

{'RS_ACC1_S-1-1851': (-60.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1851']), 'RS_ACC1_S-1-1852': (60.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1852']), 'RS_ACC1_S-1-1853': (180.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1853']), 'RS_ACC1_S-1-1854': (360.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1854']), 'RS_ACC1_S-1-1860': (510.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1860']), 'RS_ACC1_S-1-1855': (630.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1855']), 'RS_ACC1_S-1-1856': (750.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1856']), 'RS_ACC1_S-1-1857': (900.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1857']), 'RS_ACC1_S-1-1858': (1050.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1858']), 'RS_ACC1_S-1-1859': (1170.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1859']), 'RS_ACC1_S-1-1862': (1380.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1862']), 'RS_ACC1_S-1-1861': (1530.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1861']), 'RS_ACC1_S-1-1863': (1710.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1863']), 'RS_ACC1_S-1-1864': (1860.0, ['RS_ACC1_S-1-1851', 'RS_ACC1_S-1-1864']), 

In [46]:
# 경로 결과 표 출력 - 환승 시 중복되는 현상 해결 + 시간 튜님
import pandas as pd
transfers_info = []

# 출발지 정보 추가
transfers_info.append({'Stop ID': from_stop_id, 'Stop Name': from_stop_name, 'Arrival Time': 0})

# 이미 방문한 정류장을 기록하기 위한 set
visited_stops = set([from_stop_id])
visit_cost = 0

for stop_id in transfers:

    # 출발지이거나 이미 방문한 정류장인 경우 생략
    if stop_id == from_stop_id:
        continue

    if stop_id == stop_id in visited_stops:
        visit_cost = 1
        continue

    arrival_time = time_to_stops[stop_id][0]
    stop_name = feed.stops.loc[feed.stops['stop_id'] == stop_id, 'stop_name'].iloc[0]

    if visit_cost:
        arrival_time = arrival_time - 180
    
    # 결과 정보 추가
    transfers_info.append({'Stop ID': stop_id, 'Stop Name': stop_name, 'Arrival Time': arrival_time/60})
    
    # 현재 정류장을 방문한 것으로 기록
    visited_stops.add(stop_id)
    visit_cost = 0

df = pd.DataFrame(transfers_info)
print("Time to destination: {} minutes".format(arrival_time/60))
print(df)

Time to destination: 1.5 minutes
            Stop ID Stop Name  Arrival Time
0  RS_ACC1_S-1-1851       가천대           0.0
1  RS_ACC1_S-1-1031        복정           1.5


In [115]:
# #경로 역추적 알고리즘
# def get_full_path(time_to_stops, from_stop_id, to_stop_id):
#     # 최종 목적지에서 시작하여 경로 역추적
#     current_stop = to_stop_id
#     full_path = [current_stop]
    
#     while current_stop != from_stop_id:
#         _, transfers = time_to_stops.get(current_stop, (None, []))
#         # 이전 환승역을 찾아서 경로에 추가
#         if transfers:
#             current_stop = transfers[-1]
#             full_path.insert(0, current_stop)
#         else:
#             break

#     return full_path

# # 최적 경로의 전체 정류장 목록 추출
# full_path = get_full_path(time_to_stops, from_stop_id, to_stop_id)

# # 경로 결과 표 출력
# import pandas as pd
# path_info = []
# for stop_id in full_path:
#     arrival_time = time_to_stops[stop_id][0]
#     stop_name = feed.stops.loc[feed.stops['stop_id'] == stop_id, 'stop_name'].iloc[0]
#     is_transfer = 'Yes' if stop_id in transfers and stop_id not in [from_stop_id, to_stop_id] else 'No'
#     path_info.append({'Stop ID': stop_id, 'Stop Name': stop_name, 'Arrival Time': arrival_time/60, 'Is Transfer': is_transfer})

# df = pd.DataFrame(path_info)
# print(df)

In [34]:
import pandas as pd

def convert_dict_to_dataframe(time_to_stops):
    """Converts the time_to_stops dictionary to a DataFrame."""
    data = []
    for stop_id, (time, transfers) in time_to_stops.items():
        data.append({"Stop ID": stop_id, "Time to Stop (secs)": time, "Transfers": transfers})
    return pd.DataFrame(data)

df_time_to_stops = convert_dict_to_dataframe(time_to_stops)
print(df_time_to_stops)

              Stop ID  Time to Stop (secs)  \
0    RS_ACC1_S-1-1851                -60.0   
1    RS_ACC1_S-1-1852                 60.0   
2    RS_ACC1_S-1-1853                180.0   
3    RS_ACC1_S-1-1854                360.0   
4    RS_ACC1_S-1-1860                510.0   
..                ...                  ...   
623  RS_ACC1_S-1-2636             -17040.0   
624  RS_ACC1_S-1-4206               3930.0   
625  RS_ACC1_S-1-4929               4180.0   
626  RS_ACC1_S-1-2646               3300.0   
627  RS_ACC1_S-1-2635               2580.0   

                                             Transfers  
0                 [RS_ACC1_S-1-1851, RS_ACC1_S-1-1851]  
1                 [RS_ACC1_S-1-1851, RS_ACC1_S-1-1852]  
2                 [RS_ACC1_S-1-1851, RS_ACC1_S-1-1853]  
3                 [RS_ACC1_S-1-1851, RS_ACC1_S-1-1854]  
4                 [RS_ACC1_S-1-1851, RS_ACC1_S-1-1860]  
..                                                 ...  
623  [RS_ACC1_S-1-1023, RS_ACC1_S-1-1023, RS_ACC