## 한글 인코딩 해결
### 파일의 맨 위에 주석 삽입
### #-*-coding:utf-8-*-

In [40]:
import logging
import partridge as ptg

# capture logs in notebook
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")

# load a GTFS of AC Transit
# path = 'gtfs.zip'
path = 'kr_gtfs.zip'
_date, service_ids = ptg.read_busiest_date(path)
view = {'trips.txt': {'service_id': service_ids}}
feed = ptg.load_feed(path, view)

DEBUG:root:test


In [41]:
import geopandas as gpd
import pyproj
from shapely.geometry import Point

# convert all known stops in the schedule to shapes in a GeoDataFrame
gdf = gpd.GeoDataFrame(
    {"stop_id": feed.stops.stop_id.tolist()},
    geometry=[
        Point(lon, lat)
        for lat, lon in zip(
            feed.stops.stop_lat,
            feed.stops.stop_lon)
    ]
)
gdf = gdf.set_index("stop_id")

# CRS 수정
gdf.crs = 'epsg:4326'

# re-cast to meter-based projection to allow for distance calculations
centroid = gdf.iloc[0].geometry.centroid
aeqd_crs = pyproj.CRS(
    proj='aeqd',
    ellps='WGS84',
    datum='WGS84',


    
    lat_0=centroid.y,
    lon_0=centroid.x
)

gdf = gdf.to_crs(crs=aeqd_crs)

In [42]:
feed

<partridge.gtfs.Feed at 0x1d09ee40250>

In [43]:
feed.trips

Unnamed: 0,route_id,service_id,trip_id
0,RR_ACC1_S-1-01-1D,B1,RR_ACC1_S-1-01-1D_Ord001
1,RR_ACC1_S-1-01-1D,B1,RR_ACC1_S-1-01-1D_Ord002
2,RR_ACC1_S-1-01-1D,B1,RR_ACC1_S-1-01-1D_Ord003
3,RR_ACC1_S-1-01-1D,B1,RR_ACC1_S-1-01-1D_Ord004
4,RR_ACC1_S-1-01-1D,B1,RR_ACC1_S-1-01-1D_Ord005
...,...,...,...
8247,RR_ACC1_S-1-WS-1U,B1,RR_ACC1_S-1-WS-1U_Ord246
8248,RR_ACC1_S-1-WS-1U,B1,RR_ACC1_S-1-WS-1U_Ord247
8249,RR_ACC1_S-1-WS-1U,B1,RR_ACC1_S-1-WS-1U_Ord248
8250,RR_ACC1_S-1-WS-1U,B1,RR_ACC1_S-1-WS-1U_Ord249


In [44]:
feed.stops

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon
0,RS_ACC1_S-1-0150,서울역(지하),37.555870,126.972100
1,RS_ACC1_S-1-0151,시청,37.565710,126.977120
2,RS_ACC1_S-1-0152,종각,37.570160,126.982920
3,RS_ACC1_S-1-0153,종로3가,37.570420,126.992110
4,RS_ACC1_S-1-0154,종로5가,37.570930,127.001850
...,...,...,...,...
722,RS_ACC1_S-1-4925,걸포북변,37.631639,126.705776
723,RS_ACC1_S-1-4926,사우(김포시청),37.620320,126.719766
724,RS_ACC1_S-1-4927,풍무,37.612428,126.732449
725,RS_ACC1_S-1-4928,고촌,37.601213,126.770326


In [45]:
# let's use this example origin and destination
# to find the time it would take to go from one to another
# from_stop_name = "Santa Clara Av & Mozart St"
from_stop_name = "가천대"
# to_stop_name = "10th Avenue SB"
to_stop_name = "장지"

# QA: we know the best way to connect these two is the 51A -> 1T
# if we depart at 8:30 AM, schedule should suggest:
#     take 51A 8:37 - 8:49
#     make walk connection
#     take 1T 8:56 - 9:03
# total travel time: 26 minutes

# look at all trips from that stop that are after the depart time
departure_secs = 8.5 * 60 * 60

# get all information, including the stop ids, for the start and end nodes
from_stop = feed.stops[feed.stops.stop_name == from_stop_name].head(1).squeeze()
# to_stop = feed.stops[["10th Avenue" in f for f in feed.stops.stop_name]].head(1).squeeze()
to_stop = feed.stops[feed.stops.stop_name == to_stop_name].head(1).squeeze()
# to_stop = feed.stops[["왕십리" in f for f in feed.stops.stop_name]].head(1).squeeze()
# to_stop2 = feed.stops[["가락시장" in f for f in feed.stops.stop_name]]

# extract just the stop ids
from_stop_id = from_stop.stop_id
to_stop_id = to_stop.stop_id

In [46]:
from_stop

stop_id      RS_ACC1_S-1-1851
stop_name                 가천대
stop_lat             37.44861
stop_lon             127.1267
Name: 312, dtype: object

In [47]:
to_stop

stop_id      RS_ACC1_S-1-2820
stop_name                  장지
stop_lat              37.4787
stop_lon            127.12619
Name: 533, dtype: object

In [48]:
from copy import copy
from typing import Any
from typing import Dict
from typing import List

# assume all xfers are 3 minutes
TRANSFER_COST = (5 * 60)

def get_trip_ids_for_stop(feed, stop_id: str, departure_time: int):
    """Takes a stop and departure time and get associated trip ids."""
    mask_1 = feed.stop_times.stop_id == stop_id
    mask_2 = feed.stop_times.departure_time >= departure_time

    # extract the list of qualifying trip ids
    potential_trips = feed.stop_times[mask_1 & mask_2].trip_id.unique().tolist()

    return potential_trips


def stop_times_for_kth_trip(
    from_stop_id: str,
    stop_ids: List[str],
    time_to_stops_orig: Dict[str, Any],
) -> Dict[str, Any]:
    # prevent upstream mutation of dictionary
    time_to_stops = copy(time_to_stops_orig)
    stop_ids = list(stop_ids)
    potential_trips_num = 0

    for i, ref_stop_id in enumerate(stop_ids):
        # how long it took to get to the stop so far (0 for start node)
        # baseline_cost = time_to_stops[ref_stop_id]
        baseline_cost, baseline_transfers = time_to_stops[ref_stop_id]

        # get list of all trips associated with this stop
        potential_trips = get_trip_ids_for_stop(feed, ref_stop_id, departure_secs)
        potential_trips_num += int(len(potential_trips))
        
        for potential_trip in potential_trips:

            # get all the stop time arrivals for that trip
            stop_times_sub = feed.stop_times[feed.stop_times.trip_id == potential_trip]
            stop_times_sub = stop_times_sub.sort_values(by="stop_sequence")

            # get the "hop on" point
            from_her_subset = stop_times_sub[stop_times_sub.stop_id == ref_stop_id]
            from_here = from_her_subset.head(1).squeeze()

            # get all following stops
            stop_times_after_mask = stop_times_sub.stop_sequence >= from_here.stop_sequence
            stop_times_after = stop_times_sub[stop_times_after_mask]

            # for all following stops, calculate time to reach
            arrivals_zip = zip(stop_times_after.arrival_time, stop_times_after.stop_id)
            for arrive_time, arrive_stop_id in arrivals_zip:

                # time to reach is diff from start time to arrival (plus any baseline cost)
                arrive_time_adjusted = arrive_time - departure_secs + baseline_cost

                new_transfers = baseline_transfers + [arrive_stop_id]

                # # only update if does not exist yet or is faster
                # if arrive_stop_id in time_to_stops:
                #     if time_to_stops[arrive_stop_id] > arrive_time_adjusted:
                #         time_to_stops[arrive_stop_id] = arrive_time_adjusted
                # else:
                #     time_to_stops[arrive_stop_id] = arrive_time_adjusted

                # only update if does not exist yet or is faster
                if arrive_stop_id in time_to_stops:
                    if time_to_stops[arrive_stop_id][0] > arrive_time_adjusted:
                        time_to_stops[arrive_stop_id] = (arrive_time_adjusted, new_transfers)
                else:
                    time_to_stops[arrive_stop_id] = (arrive_time_adjusted, new_transfers)

    print("최종 누적 potential trips num: ", potential_trips_num)
    return time_to_stops


def add_footpath_transfers(
    stop_ids: List[str],
    time_to_stops_orig: Dict[str, Any],
    stops_gdf: gpd.GeoDataFrame,
    transfer_cost=TRANSFER_COST,
) -> Dict[str, Any]:
    # prevent upstream mutation of dictionary
    time_to_stops = copy(time_to_stops_orig)
    stop_ids = list(stop_ids)

    # add in transfers to nearby stops
    for stop_id in stop_ids:
        stop_pt = stops_gdf.loc[stop_id].geometry

        # TODO: parameterize? transfer within .2 miles
        meters_in_miles = 1610
        qual_area = stop_pt.buffer(meters_in_miles/5)

        # get all stops within a short walk of target stop
        mask = stops_gdf.intersects(qual_area)

        # time to reach new nearby stops is the transfer cost plus arrival at last stop
        # arrive_time_adjusted = time_to_stops[stop_id] + TRANSFER_COST
        arrive_time_adjusted = time_to_stops[stop_id][0] + TRANSFER_COST
        new_transfers = time_to_stops[stop_id][1] + [stop_id]

        # only update if currently inaccessible or faster than currrent option
        # for arrive_stop_id, row in stops_gdf[mask].iterrows():
        #     if arrive_stop_id in time_to_stops:
        #         if time_to_stops[arrive_stop_id] > arrive_time_adjusted:
        #             time_to_stops[arrive_stop_id] = arrive_time_adjusted
        #     else:
        #         time_to_stops[arrive_stop_id] = arrive_time_adjusted

        for arrive_stop_id, row in stops_gdf[mask].iterrows():
            if arrive_stop_id in time_to_stops:
                if time_to_stops[arrive_stop_id][0] > arrive_time_adjusted:
                    time_to_stops[arrive_stop_id] = (arrive_time_adjusted, new_transfers)
            else:
                time_to_stops[arrive_stop_id] = (arrive_time_adjusted, new_transfers)

    return time_to_stops

In [49]:
import time
# initialize lookup with start node taking 0 seconds to reach
time_to_stops = {from_stop_id: (0, [])}  # (time, [list of transfers])

# setting transfer limit at 1
TRANSFER_LIMIT = 1
for k in range(TRANSFER_LIMIT + 1):
    logger.info("\nAnalyzing possibilities with {} transfers".format(k))

    # generate current list of stop ids under consideration
    stop_ids = list(time_to_stops.keys())
    logger.info("\tinital qualifying stop ids count: {}".format(len(stop_ids)))

    # update time to stops calculated based on stops accessible
    tic = time.perf_counter()
    time_to_stops = stop_times_for_kth_trip(from_stop_id, stop_ids, time_to_stops)
    toc = time.perf_counter()
    logger.info("\tstop times calculated in {:0.4f} seconds".format(toc - tic))

    added_keys_count = len((time_to_stops.keys())) - len(stop_ids)
    logger.info("\t\t{} stop ids added".format(added_keys_count))

    # now add footpath transfers and update
    tic = time.perf_counter()
    stop_ids = list(time_to_stops.keys())
    time_to_stops = add_footpath_transfers(stop_ids, time_to_stops, gdf)
    toc = time.perf_counter()
    logger.info("\tfootpath transfers calculated in {:0.4f} seconds".format(toc - tic))

    added_keys_count = len((time_to_stops.keys())) - len(stop_ids)
    logger.info("\t\t{} stop ids added".format(added_keys_count))

assert to_stop_id in time_to_stops, "Unable to find route to destination within transfer limit"

time_to_destination = time_to_stops[to_stop_id][0]
transfers = time_to_stops[to_stop_id][1]

logger.info("Time to destination: {} minutes".format(time_to_destination/60))
logger.info("Transfers: {}".format(" -> ".join(transfers)))

INFO:root:
Analyzing possibilities with 0 transfers
INFO:root:	inital qualifying stop ids count: 1


INFO:root:	stop times calculated in 2.8242 seconds
INFO:root:		62 stop ids added
INFO:root:	footpath transfers calculated in 0.0456 seconds
INFO:root:		18 stop ids added
INFO:root:
Analyzing possibilities with 1 transfers
INFO:root:	inital qualifying stop ids count: 81


최종 누적 potential trips num:  284


INFO:root:	stop times calculated in 167.1133 seconds
INFO:root:		517 stop ids added


최종 누적 potential trips num:  18050


INFO:root:	footpath transfers calculated in 0.4214 seconds
INFO:root:		30 stop ids added
INFO:root:Time to destination: 13.666666666666666 minutes
INFO:root:Transfers: RS_ACC1_S-1-1031 -> RS_ACC1_S-1-1031 -> RS_ACC1_S-1-2820
