# Accessibility of Ireland's mass vaccine centres

Motivation: there are 37 mass vaccination centres in Ireland. This algorithm aims to measure how accessible they are by public transport.

In [1]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [2]:
from os import walk
import pandas as pd
import numpy as np
from haversine import haversine_vector
import time

In [3]:
gtfs_folder = 'GTFS Files'
operators = next(walk(gtfs_folder))[1]

MAX_TRANSFER_DIST = 2.0
MAX_WALK_DIST = 5.0
WALKING_SPEED = 5.0
WALK_EFFORT_FACTOR = 0.5

# Step 1a: import stop location data and identify transfer stops

In [4]:
def import_stops(MAX_TRANSFER_DIST, MAX_WALK_DIST, WALKING_SPEED):
    stops = []

    for operator in operators:
        file = gtfs_folder + '\\' + operator + '\\' + 'stops.txt'
        df = pd.read_csv(file)[['stop_id', 'stop_name', 'stop_lat', 'stop_lon']]
        stops.append(df)

    stops = pd.concat(stops).drop_duplicates('stop_id')  # some stops are used by multiple operators with the same ID, so will be duplicated
    stops['merge_key'] = 1
    print("Number of stops =", len(stops.index))

    transfer_stops = find_nearby_stops(stops)
    transfer_stops = transfer_stops[transfer_stops.transfer_distance < MAX_TRANSFER_DIST]
    transfer_stops['transfer_walk_time'] = transfer_stops['transfer_distance'] / WALKING_SPEED
    transfer_stops = transfer_stops[['stop_id_x', 'stop_name_x', 'stop_id_y', 'stop_name_y', 'transfer_distance', 'transfer_walk_time']]
    return stops, transfer_stops

def find_nearby_stops(stops):
    ## Returns a dataframe containing all pairs of stops that are within walking distance of each other
    ## This function works by dividing the region into cells, each containing 0.1 degrees of longitude and latitude
    ## Stops are paired if they are in the same or neighbouring cell (including diagonal neighbours)
    ## For Ireland, 0.1 degrees longitude is about 6.6 kms, while 0.1 degrees latitude is about 11.1 kms
    ## So this function will identify every pair of stops that are less than 6.6 kms apart
        
    decimals = 1
    round_precision = 0.1  # degrees longitude/latitude
    middle = round_precision / 2

    rounded_stops = stops.copy()
    
    ## lat_ceil, lat_floor, lon_ceil, lon_floor are the lat/lon values of the four edges of the cell
    rounded_stops['lat_ceil'] = rounded_stops['stop_lat'] + middle
    rounded_stops['lat_ceil'] = rounded_stops['lat_ceil'].round(decimals)

    rounded_stops['lon_ceil'] = rounded_stops['stop_lon'] + middle
    rounded_stops['lon_ceil'] = rounded_stops['lon_ceil'].round(decimals)

    rounded_stops['lat_floor'] = rounded_stops['lat_ceil'] - round_precision
    rounded_stops['lon_floor'] = rounded_stops['lon_ceil'] - round_precision

    rounded_stops_copy = rounded_stops.copy()
    
    ## There are 9 cases to consider, corresponding to the 9 cells in a 3x3 grid where a nearby stop could be located
    ## df1: both stops in same cell => (lat_ceil, lon_ceil)_x = (lat_ceil, lon_ceil)_y
    ## df2a/b: cells are north/south neighbours => (lat_ceil, lon_ceil)_x = (lat_floor, lon_ceil)_y
    ## df3a/b: cells are east/west neighbours => (lat_ceil, lon_ceil)_x = (lat_ceil, lon_floor)_y
    ## df4a/b: cells are NE/SW diagonal neighbours => (lat_ceil, lon_ceil)_x = (lat_floor, lon_floor)_y
    ## df5a/b: cells are NW/SE diagonal neighbours => (lat_ceil, lon_floor)_x = (lat_floor, lon_ceil)_y

    
    df1 = pd.merge(rounded_stops, rounded_stops_copy, left_on=['lat_ceil', 'lon_ceil'], right_on=['lat_ceil', 'lon_ceil'])

    df2a = pd.merge(rounded_stops, rounded_stops_copy, left_on=['lat_ceil', 'lon_ceil'], right_on=['lat_floor', 'lon_ceil'])
    df2b = pd.merge(rounded_stops, rounded_stops_copy, left_on=['lat_floor', 'lon_ceil'], right_on=['lat_ceil', 'lon_ceil'])

    df3a = pd.merge(rounded_stops, rounded_stops_copy, left_on=['lat_ceil', 'lon_ceil'], right_on=['lat_ceil', 'lon_floor'])
    df3b = pd.merge(rounded_stops, rounded_stops_copy, left_on=['lat_ceil', 'lon_floor'], right_on=['lat_ceil', 'lon_ceil'])

    df4a = pd.merge(rounded_stops, rounded_stops_copy, left_on=['lat_ceil', 'lon_ceil'], right_on=['lat_floor', 'lon_floor'])
    df4b = pd.merge(rounded_stops, rounded_stops_copy, left_on=['lat_floor', 'lon_floor'], right_on=['lat_ceil', 'lon_ceil'])

    df5a = pd.merge(rounded_stops, rounded_stops_copy, left_on=['lat_ceil', 'lon_floor'], right_on=['lat_floor', 'lon_ceil'])
    df5b = pd.merge(rounded_stops, rounded_stops_copy, left_on=['lat_floor', 'lon_ceil'], right_on=['lat_ceil', 'lon_floor'])

    dfs = [df1, df2a, df2b, df3a, df3b, df4a, df4b, df5a, df5b]

    nearby_stops = pd.concat(dfs)
    nearby_stops = nearby_stops[['stop_id_x', 'stop_name_x', 'stop_lat_x', 'stop_lon_x', 
                                 'stop_id_y', 'stop_name_y', 'stop_lat_y', 'stop_lon_y']]

    del df1, df2a, df2b, df3a, df3b, df4a, df4b, df5a, df5b, dfs, rounded_stops, rounded_stops_copy
    
    stop_x_coords = nearby_stops[['stop_lat_x', 'stop_lon_x']].values
    stop_y_coords = nearby_stops[['stop_lat_y', 'stop_lon_y']].values

    nearby_stops['transfer_distance'] = haversine_vector(stop_x_coords, stop_y_coords)

    del stop_x_coords, stop_y_coords

    nearby_stops = nearby_stops.sort_values('transfer_distance')
    return nearby_stops

stops, transfer_stops = import_stops(MAX_TRANSFER_DIST, MAX_WALK_DIST, WALKING_SPEED)

Number of stops = 12614


In [5]:
stops

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,merge_key
0,700000015422,"Belfast, Europa Bus Centre",54.595054,-5.936268,1
1,8220000386,Schoolhouse Hotel,53.337231,-6.239556,1
2,8220000408,Bachelor's Walk,53.347213,-6.260758,1
3,8220000413,The Gresham Hotel,53.351724,-6.261296,1
4,8220000414,The Gresham Hotel,53.351483,-6.260810,1
...,...,...,...,...,...
174,gen:57111:212:0:1,"Mulgannon, Summerhill",52.334758,-6.467826,1
175,gen:57111:2590:0:3,"Drinagh (Wexford), IDA Business Park",52.308049,-6.457324,1
176,gen:57111:8017:0:1,"Drinagh (Wexford), Sinnottstown Lane",52.305878,-6.470129,1
177,gen:57111:8018:0:1,"Whiterock South, Gael Scoil Charman",52.327904,-6.472406,1


# Step 1b: import vaccine centre location data and identify nearby stops

In [6]:
vax_hubs = pd.read_csv('Vaccine_Hubs.txt')
vax_hubs['merge_key'] = 1

vax_hub_stops = pd.merge(stops, vax_hubs, on='merge_key')
stop_coords = vax_hub_stops[['stop_lat', 'stop_lon']].values
vax_hub_coords = vax_hub_stops[['Facility Latitude', 'Facility Longitude']].values

vax_hub_stops['stop_to_vax_hub_distance'] = haversine_vector(stop_coords, vax_hub_coords)
vax_hub_stops['stop_to_vax_hub_time'] = vax_hub_stops['stop_to_vax_hub_distance'] / WALKING_SPEED
vax_hub_stops = vax_hub_stops.sort_values('stop_to_vax_hub_distance')
vax_hub_stops = vax_hub_stops.drop_duplicates('stop_id')
vax_hub_stops

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,merge_key,Facility Latitude,Facility Longitude,County,Facility,Address,stop_to_vax_hub_distance,stop_to_vax_hub_time
266744,8220DB007571,"DCU Helix, stop 7571",53.386776,-6.258725,1,53.386560,-6.259032,Dublin,Helix Theatre DCU,DCU Santry,0.031458,0.006292
79073,8370B2368501,"Eglinton Street, stop 236851",51.897310,-8.464863,1,51.897136,-8.465482,Cork,City Hall Cork,"City Hall, Anglesea Street, Cork city",0.046698,0.009340
74483,8360B603331,"West County Hotel, stop 603331",52.831752,-8.980457,1,52.831327,-8.980353,Clare,West County Hotel,"Limerick Road, Ennis",0.047821,0.009564
410200,828000017,Old Post Office Portlaoise,53.034486,-7.302756,1,53.035026,-7.302458,Laois,Midlands Park Hotel,"Jessop St., Portlaoise, Co Laois",0.063307,0.012661
410163,828000016,Old Post Office Portlaoise,53.034460,-7.302905,1,53.035026,-7.302458,Laois,Midlands Park Hotel,"Jessop St., Portlaoise, Co Laois",0.069738,0.013948
...,...,...,...,...,...,...,...,...,...,...,...,...
161935,8470B5304301,"Ballyconneely, stop 530431",53.431157,-10.075430,1,53.843820,-9.239317,Mayo,Breaffy House Resort,"Breaffy, Castlebar, Co Mayo",71.720747,14.344149
359921,700000014719,"Belfast City Centre, Glengall Street",54.595088,-5.937195,1,53.967790,-6.387424,Louth,Fairways Hotel,"Dublin Rd, Haggardstown, Dundalk, Co Louth",75.627856,15.125571
22,700000015422,"Belfast, Europa Bus Centre",54.595054,-5.936268,1,53.967790,-6.387424,Louth,Fairways Hotel,"Dublin Rd, Haggardstown, Dundalk, Co Louth",75.647670,15.129534
456750,gen:31400:8239:0:1,"Belfast City Centre, Jury's Inn",54.596260,-5.934783,1,53.967790,-6.387424,Louth,Fairways Hotel,"Dublin Rd, Haggardstown, Dundalk, Co Louth",75.808460,15.161692


# Step 1c: import data about trip times from the GTFS files

In [7]:
def import_stop_times():
    stop_times_dfs = []

    for operator in operators:
        stop_times = pd.read_csv(gtfs_folder + '\\' + operator + '\\' + 'stop_times.txt')
        stop_times['operator'] = operator
        stop_times_dfs.append(stop_times)

    stop_times = pd.concat(stop_times_dfs)
    stop_times['departure_time'] = pd.to_timedelta(stop_times['departure_time'])
    stop_times['arrival_time'] = pd.to_timedelta(stop_times['arrival_time'])
    
    stop_times['departure_time_hrs'] = stop_times['departure_time'].dt.total_seconds() / (60 * 60)
    stop_times['arrival_time_hrs'] = stop_times['arrival_time'].dt.total_seconds() / (60 * 60)

    stop_times = stop_times.drop_duplicates(['trip_id', 'stop_sequence'])
    print("Number of trip stops =", len(stop_times.index))
    return stop_times

def import_trip_data():
    trip_dfs = []
    todays_date = 20210225

    for operator in operators:
        calendar = pd.read_csv(gtfs_folder + '\\' + operator + '\\' + 'calendar.txt')
        calendar = calendar[(calendar['end_date'] > todays_date) & 
                            (calendar['start_date'] < todays_date)]
        
        trips = pd.read_csv(gtfs_folder + '\\' + operator + '\\' + 'trips.txt')
        trips = pd.merge(trips, calendar, on='service_id')
        trips['operator'] = operator
        trip_dfs.append(trips)

    trips = pd.concat(trip_dfs)

    trips['operational_days'] = trips['monday'] + trips['tuesday'] + trips['wednesday'] + trips['thursday'] + trips['friday'] + trips['saturday'] + trips['sunday']

    del calendar, trip_dfs

    print("Number of trips =", len(trips.index))
    print(sum(trips['operational_days']))
    
    return trips

stop_times = import_stop_times()
trip_data = import_trip_data()
stop_times = pd.merge(stop_times, trip_data, on='trip_id')
stop_times

Number of trip stops = 6142633
Number of trips = 128090
225970


Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,operator_x,...,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date,operator_y,operational_days
0,9.daily.1-700-y11-2.1.I,0 days 01:25:00,0 days 01:25:00,8240000548,1,,0.0,1.0,0.00,google_transit_aircoach,...,1,1,1,1,1,1,20201221,20211221,google_transit_aircoach,7
1,9.daily.1-700-y11-2.1.I,0 days 01:45:00,0 days 01:45:00,8220DB000047,2,,1.0,0.0,9651.30,google_transit_aircoach,...,1,1,1,1,1,1,20201221,20211221,google_transit_aircoach,7
2,9.daily.1-700-y11-2.1.I,0 days 01:55:00,0 days 01:55:00,8220DB000272,3,,1.0,0.0,11379.45,google_transit_aircoach,...,1,1,1,1,1,1,20201221,20211221,google_transit_aircoach,7
3,9.daily.1-700-y11-2.1.I,0 days 01:58:00,0 days 01:58:00,8220DB000273,4,,1.0,0.0,11867.14,google_transit_aircoach,...,1,1,1,1,1,1,20201221,20211221,google_transit_aircoach,7
4,9.daily.1-700-y11-2.1.I,0 days 02:00:00,0 days 02:00:00,8220DB004530,5,,1.0,0.0,12957.82,google_transit_aircoach,...,1,1,1,1,1,1,20201221,20211221,google_transit_aircoach,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3936421,13.MF-BH.14-WX2-y11-3.9.O,0 days 15:29:00,0 days 15:29:00,834000047,21,,0.0,0.0,9012.02,google_transit_wexfordbus,...,1,1,1,1,0,0,20210104,20220104,google_transit_wexfordbus,5
3936422,13.MF-BH.14-WX2-y11-3.9.O,0 days 15:30:00,0 days 15:30:00,8340B3316201,22,,0.0,0.0,9289.60,google_transit_wexfordbus,...,1,1,1,1,0,0,20210104,20220104,google_transit_wexfordbus,5
3936423,13.MF-BH.14-WX2-y11-3.9.O,0 days 15:31:00,0 days 15:31:00,834LL10364,23,,0.0,0.0,9739.56,google_transit_wexfordbus,...,1,1,1,1,0,0,20210104,20220104,google_transit_wexfordbus,5
3936424,13.MF-BH.14-WX2-y11-3.9.O,0 days 15:32:00,0 days 15:32:00,834000055,24,,0.0,0.0,9957.11,google_transit_wexfordbus,...,1,1,1,1,0,0,20210104,20220104,google_transit_wexfordbus,5


# Step 1d: Select a day of the week for analysis

In [8]:
earliest_departure = 13
latest_arrival = 23

days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
day_index = 5 # Saturday
day = days[day_index]
prev_day = days[(day_index-1) % 7]

daytime_stop_times = stop_times[(stop_times.departure_time_hrs > earliest_departure) &
                                (stop_times.arrival_time_hrs < latest_arrival)]
daytime_stop_times = daytime_stop_times[daytime_stop_times[day] == 1]

## We also need to account for trips where the dep/arr times are over 24 hours
prev_daytime_stop_times = stop_times[(stop_times.departure_time_hrs > 24 + earliest_departure) &
                                     (stop_times.arrival_time_hrs < 24 + latest_arrival)]
prev_daytime_stop_times = prev_daytime_stop_times[prev_daytime_stop_times[prev_day] == 1]
prev_daytime_stop_times['departure_time_hrs'] -= 24
prev_daytime_stop_times['arrival_time_hrs'] -= 24

daytime_stop_times = pd.concat([daytime_stop_times, prev_daytime_stop_times])
daytime_stop_times = daytime_stop_times[['trip_id', 'stop_id', 'stop_sequence', 'pickup_type',
                                         'drop_off_type', 'departure_time_hrs', 'arrival_time_hrs']]

arrivals = daytime_stop_times[daytime_stop_times['drop_off_type']==0][['trip_id', 'stop_id', 'stop_sequence', 'arrival_time_hrs']]
departures = daytime_stop_times[daytime_stop_times['pickup_type']==0][['trip_id', 'stop_id', 'stop_sequence', 'departure_time_hrs']]

daytime_stop_times

Unnamed: 0,trip_id,stop_id,stop_sequence,pickup_type,drop_off_type,departure_time_hrs,arrival_time_hrs
66,35.daily.1-700-y11-2.1.I,8240000548,1,0.0,1.0,19.416667,19.416667
67,35.daily.1-700-y11-2.1.I,8220DB000047,2,1.0,0.0,19.750000,19.750000
68,35.daily.1-700-y11-2.1.I,8220DB000272,3,1.0,0.0,19.916667,19.916667
69,35.daily.1-700-y11-2.1.I,8220DB000273,4,1.0,0.0,19.966667,19.966667
70,35.daily.1-700-y11-2.1.I,8220DB004530,5,1.0,0.0,20.000000,20.000000
...,...,...,...,...,...,...,...
3779185,1340.TA.99-13-r11-1.11.O,822GIR0141,23,0.0,0.0,19.966667,19.950000
3779186,1340.TA.99-13-r11-1.11.O,824G003874,24,0.0,0.0,19.983333,19.983333
3779187,1340.TA.99-13-r11-1.11.O,824GIR0140,25,0.0,0.0,20.033333,20.016667
3779188,1340.TA.99-13-r11-1.11.O,824GIR0024,26,0.0,0.0,20.066667,20.050000


# Part 2: the routing algorithm

Next we need to find the journey time to every bus stop from a vaccine centre.

The algorithm used here is a breadth first search, where all possible trips are saved unless they are proven to be suboptimal.

The process is as follows:
1. Find all stops within walking distance of a vaccine centre
2. Find all trips associated with those stops 

In [16]:
vc = transfer_stops['stop_id_x'].value_counts()
rural_stops = vc[vc < 5].index
print("Number of rural stops: ", len(rural_stops))

limited_transfers = transfer_stops[(transfer_stops.transfer_distance < 1) | 
                                   (transfer_stops.stop_id_x.isin(rural_stops)) | 
                                   (transfer_stops.stop_id_y.isin(rural_stops))]
limited_transfers

Number of rural stops:  2950


Unnamed: 0,stop_id_x,stop_name_x,stop_id_y,stop_name_y,transfer_distance,transfer_walk_time
0,700000015422,"Belfast, Europa Bus Centre",700000015422,"Belfast, Europa Bus Centre",0.000000,0.000000
3276321,8240DB001875,"Pinebrook Vale, stop 1875",8240DB001875,"Pinebrook Vale, stop 1875",0.000000,0.000000
3276664,8240DB001876,"Hartstown Comm Sch, stop 1876",8240DB001876,"Hartstown Comm Sch, stop 1876",0.000000,0.000000
3277007,8240DB001877,"St Ciaran's Church, stop 1877",8240DB001877,"St Ciaran's Church, stop 1877",0.000000,0.000000
3911265,8470B531131,"Rosscahill, stop 531131",8470B531131,"Rosscahill, stop 531131",0.000000,0.000000
...,...,...,...,...,...,...
3953697,8470PB000857,"Clonbern, Ballyedmond",8470PB000860,"Clonbern, Clonberne",1.994880,0.398976
3643837,836GIR0003,Ennis,8360B336431,"Culligans Cross, stop 336431",1.995042,0.399008
3643025,8360B336431,"Culligans Cross, stop 336431",836GIR0003,Ennis,1.995042,0.399008
3954101,853000120,St.Mary's Road Buncrana,853000334,"Straboe (Donegal), Saint Mary's Church",1.998743,0.399749


In [9]:
def compute_first_legs_from_facility(vax_hub_stops, facility, MAX_WALK_DIST, WALK_EFFORT_FACTOR):
    ## This finds all trips that can be make with a single leg from the stops closest to a given vaccine centre
    
    ## WALK_EFFORT_FACTOR is a measure of aversion to walking
    ## This tells us how much longer the person would be willing to spend travelling to avoid each minute of walking
    ## For example, a WALK_EFFORT_FACTOR of 0.25 means that an extra 5 minutes travel time would be worth it to avoid 20 minutes of walking
    
    ## Note that this is simply an abstract idea. It does not actually make the trip longer, or make tight connections impossible
    ## It is not the same as reducing walking speed, which generally does increase journey time and minimum connection times

    local_stops = vax_hub_stops[vax_hub_stops.Facility == facility]
    local_stops = local_stops[local_stops.stop_to_vax_hub_distance < MAX_WALK_DIST]
    local_stops = local_stops[['stop_id', 'stop_to_vax_hub_time']]
    
    
    first_legs_from_facility = pd.merge(local_stops, departures, on='stop_id')
    first_legs_from_facility = first_legs_from_facility.rename(columns={'departure_time_hrs': 'trip_1_departure_time', 
                                                                        'stop_id': 'stop_1a_id', 
                                                                        'stop_to_vax_hub_time': 'walk_0_time'})

    first_legs_from_facility = first_legs_from_facility.sort_values('walk_0_time')
    first_legs_from_facility = first_legs_from_facility.drop_duplicates('trip_id')  # some trips will appear multiple times, each starting from a different stop
    
    first_legs_from_facility['journey_dep_time'] = first_legs_from_facility['trip_1_departure_time'] - first_legs_from_facility['walk_0_time']
    first_legs_from_facility['total_walk_time'] = first_legs_from_facility['walk_0_time']
    first_legs_from_facility['adj_journey_dep_time'] = first_legs_from_facility['journey_dep_time'] - WALK_EFFORT_FACTOR * first_legs_from_facility['total_walk_time']
    

    first_legs_from_facility = pd.merge(first_legs_from_facility, arrivals, on='trip_id')
    first_legs_from_facility = first_legs_from_facility[first_legs_from_facility.stop_sequence_y > first_legs_from_facility.stop_sequence_x]

    first_legs_from_facility = first_legs_from_facility.rename(columns={'trip_id': 'trip_1_id', 
                                                                        'arrival_time_hrs': 'trip_1_arrival_time', 
                                                                        'stop_id': 'stop_1b_id'})

    first_legs_from_facility['journey_arrival_time'] = first_legs_from_facility['trip_1_arrival_time']
    first_legs_from_facility['final_stop_id'] = first_legs_from_facility['stop_1b_id']
    

    first_legs_from_facility = first_legs_from_facility[['adj_journey_dep_time', 'total_walk_time', 'journey_dep_time', 'journey_arrival_time', 'final_stop_id', 'walk_0_time', 'stop_1a_id', 'trip_1_id', 'trip_1_departure_time', 'trip_1_arrival_time', 'stop_1b_id']]
       
    first_legs_from_facility = eliminate_early_depart_late_arrive(first_legs_from_facility)
    return first_legs_from_facility

def compute_first_legs(vax_hub_stops, MAX_WALK_DIST, WALK_EFFORT_FACTOR):
    first_legs = []

    facilities = vax_hub_stops['Facility'].unique()
    for facility in facilities:
        first_legs_from_facility = compute_first_legs_from_facility(vax_hub_stops, facility, MAX_WALK_DIST, WALK_EFFORT_FACTOR)
        first_legs_from_facility['departure_facility'] = facility
        first_legs.append(first_legs_from_facility) 
    
    all_first_legs = pd.concat(first_legs)
    all_first_legs = eliminate_early_depart_late_arrive(all_first_legs)
    all_first_legs = remove_walkable_trips(all_first_legs, vax_hub_stops, WALK_EFFORT_FACTOR, threshold=0.1)

    return all_first_legs

def eliminate_early_depart_late_arrive(trips, stop_id_col_name='final_stop_id',
                                       dep_time_col_name='adj_journey_dep_time', 
                                       arr_time_col_name='journey_arrival_time'):
    ## This function eliminates trips where there is another trip which leaves later and arrives at the same place earlier
    ## Start by sorting the df of trips by stop_id, dep_time, and arrival_time
    trips = trips.sort_values([stop_id_col_name, dep_time_col_name, arr_time_col_name],
                               ascending=[False, False, True])
    
    ## The following loop retains the first occurence of every stop_id (i.e. the one with the latest departure time)
    ## Subsequent occurences are ordered in decreasing departure time, 
    ## and are retained only of their arrival time is earlier than the one above
    
    ## This needs to be repeated until all early depart/late arrive trips have been removed
    
    while True:
        initial_len = len(trips.index)
        trips = trips[(~trips[stop_id_col_name].duplicated()) | 
                      (trips[arr_time_col_name].diff() < 0)]
        new_len = len(trips.index)

        if initial_len == new_len:
            return trips

def remove_walkable_trips(trips, vax_hub_stops, WALK_EFFORT_FACTOR, threshold=0.1):
    ## Removes any trips that can be completed faster on foot
    ## Also removes trips that take longer on foot up to a given threshold (in hours)
    
    trips = pd.merge(trips, vax_hub_stops, left_on='final_stop_id', right_on='stop_id')
    direct_to_stop_time = (1 + WALK_EFFORT_FACTOR) * trips['stop_to_vax_hub_time']
    transit_journey_time = trips['journey_arrival_time'] - trips['adj_journey_dep_time']
    trips = trips[direct_to_stop_time > transit_journey_time + threshold]
    trips = trips.drop(columns=vax_hub_stops.columns)
    return trips

first_legs = compute_first_legs(vax_hub_stops, MAX_WALK_DIST, WALK_EFFORT_FACTOR)

In [10]:
first_legs

Unnamed: 0,adj_journey_dep_time,total_walk_time,journey_dep_time,journey_arrival_time,final_stop_id,walk_0_time,stop_1a_id,trip_1_id,trip_1_departure_time,trip_1_arrival_time,stop_1b_id,departure_facility
1,19.270990,0.208229,19.375104,20.066667,gen:57403:7923:0:1,0.208229,8540LL10179,17.Sat.49-M1-y11-5.18.I,19.583333,20.066667,gen:57403:7923:0:1,Hillgrove Hotel
2,17.987657,0.208229,18.091771,18.750000,gen:57403:7923:0:1,0.208229,8540LL10179,14.Sat.49-M1-y11-5.26.I,18.300000,18.750000,gen:57403:7923:0:1,Hillgrove Hotel
3,16.070990,0.208229,16.175104,16.916667,gen:57403:7923:0:1,0.208229,8540LL10179,13.Sat.49-M1-y11-5.17.I,16.383333,16.916667,gen:57403:7923:0:1,Hillgrove Hotel
4,13.937657,0.208229,14.041771,14.666667,gen:57403:7923:0:1,0.208229,8540LL10179,12.Sat.49-M1-y11-5.17.I,14.250000,14.666667,gen:57403:7923:0:1,Hillgrove Hotel
6,14.032825,0.033672,14.049661,14.333333,gen:57402:8244:0:1,0.033672,8530B158221,2.Sat.3-931-y11-2.7.I,14.083333,14.333333,gen:57402:8244:0:1,Letterkenny Institute of Technology
...,...,...,...,...,...,...,...,...,...,...,...,...
160258,17.730086,0.035498,17.747835,18.750000,700000011467,0.035498,8530B1559601,1414944.7.10-32-e19-1.91.I,17.783333,18.750000,700000011467,Letterkenny Institute of Technology
160259,16.582205,0.222974,16.693692,17.666667,700000011467,0.222974,8540B1559201,1414949.7.10-32-e19-1.92.O,16.916667,17.666667,700000011467,Hillgrove Hotel
160260,15.313419,0.035498,15.331168,16.333333,700000011467,0.035498,8530B1559601,1469368.14.10-32-e19-1.91.I,15.366667,16.333333,700000011467,Letterkenny Institute of Technology
160261,14.832205,0.222974,14.943692,15.916667,700000011467,0.222974,8540B1559201,1414943.7.10-32-e19-1.92.O,15.166667,15.916667,700000011467,Hillgrove Hotel


In [14]:
def identify_transfer_stops(trips, connected_stops, transfer_number, vax_hub_stops, WALK_EFFORT_FACTOR):
    ## Extends the trips DataFrame by finding all stops within walking distance of the current stops
    trips = pd.merge(trips, connected_stops, left_on='final_stop_id', right_on='stop_id_x')
    trips = trips.drop(columns=['stop_id_x', 'stop_name_x', 'stop_name_y'])
    
    col_names = {'stop_id_y': 'stop_' + str(transfer_number+1) + 'a_id',
                 'transfer_walk_time': 'transfer_' + str(transfer_number) + '_walk_time',
                 'transfer_distance': 'transfer_' + str(transfer_number) + '_distance'}
    
    prev_arrival_time = 'trip_' + str(transfer_number) + '_arrival_time'
    
    
    trips['final_stop_id'] = trips['stop_id_y']
    trips['journey_arrival_time'] = trips[prev_arrival_time] + trips['transfer_walk_time']
    trips['total_walk_time'] += trips['transfer_walk_time']
    trips['adj_journey_dep_time'] = trips['journey_dep_time'] - WALK_EFFORT_FACTOR * trips['total_walk_time']
    
    trips = trips.rename(columns=col_names)
    
    trips = eliminate_early_depart_late_arrive(trips)
    trips = remove_walkable_trips(trips, vax_hub_stops, WALK_EFFORT_FACTOR, threshold=0.1)
    return trips

In [18]:
def get_departures(stop_origins, departures, leg_number, min_transfer_time, chunk_size=None):
    current_stop_col = 'stop_' + str(leg_number) + 'a_id'
    trip_col = 'trip_' + str(leg_number) + '_id'
    departure_time_col = 'trip_' + str(leg_number) + '_departure_time'
    prev_arrival_time_col = 'trip_' + str(leg_number-1) + '_arrival_time'
    prev_transfer_time_col = 'transfer_' + str(leg_number-1) + '_walk_time'
    
    number_of_paths = len(stop_origins.index)
    if chunk_size == None:
        chunk_size = number_of_paths
        
    combined_departures = pd.DataFrame()
    
    i = 0
    j = min(i + chunk_size, number_of_paths)
    
    while i < number_of_paths:
        print(i, j, number_of_paths)
        df = stop_origins.iloc[range(i, j)]
        
        a = len(df.index)
        
        unmerged = True
        
        while unmerged:
            try:
                df = pd.merge(df, departures, left_on=current_stop_col, right_on='stop_id').drop(columns='stop_id')
                unmerged = False
            except:
                j = int((i + j)/2)
                print("ERROR: ", i, j, number_of_paths)
                df = stop_origins.iloc[range(i, j)]

                a = len(df.index)
                
            
                
        df = df.rename(columns={'trip_id': trip_col,
                                'departure_time_hrs': departure_time_col})
        
        b = len(df.index)
        
        df = df[df[departure_time_col] > df[prev_arrival_time_col] + df[prev_transfer_time_col] + min_transfer_time]
        
        c = len(df.index)
        
        for prev_leg in range(1, leg_number):
            df = df[df[trip_col] != df['trip_' + str(prev_leg) + '_id']]
        d = len(df.index)
        
        
        df = df.sort_values([trip_col, 'adj_journey_dep_time', 'stop_sequence'],
                        ascending=[False, False, True])
        duplicate_vals = True

        while duplicate_vals:
            initial_len = len(df.index)
            df = df[(~df[trip_col].duplicated()) | 
                    (df['stop_sequence'].diff() < -1)]
            new_len = len(df.index)

            if initial_len == new_len:
                duplicate_vals = False
            
            
        e = len(df.index)
        

        combined_departures = pd.concat([combined_departures, df])
        f = len(combined_departures.index)

        
        combined_departures = combined_departures.sort_values([trip_col, 'adj_journey_dep_time', 'stop_sequence'],
                                                              ascending=[False, False, True])
        duplicate_vals = True

        while duplicate_vals:
            initial_len = len(combined_departures.index)
            combined_departures = combined_departures[(~combined_departures[trip_col].duplicated()) | 
                                                      (combined_departures['stop_sequence'].diff() < -1)]
            new_len = len(combined_departures.index)

            if initial_len == new_len:
                duplicate_vals = False
                
                
        g = len(combined_departures.index)
        
        print(a, b, c, d, e, f, g)
        
        i = j
        j = min(i + chunk_size, number_of_paths)
    
    return combined_departures

In [19]:
def get_arrivals(trips, arrivals, leg_number, vax_hub_stops, WALK_EFFORT_FACTOR, chunk_size=None):
    trip_col = 'trip_' + str(leg_number) + '_id'
    arrival_time_col = 'trip_' + str(leg_number) + '_arrival_time'
    arrival_stop_col = 'stop_' + str(leg_number) + 'b_id'
    
    current_leg_arrivals = pd.merge(trips, arrivals, left_on=trip_col, right_on='trip_id')
    current_leg_arrivals = current_leg_arrivals[current_leg_arrivals.stop_sequence_y > current_leg_arrivals.stop_sequence_x]
    current_leg_arrivals = current_leg_arrivals.drop(columns=['trip_id', 'stop_sequence_x', 'stop_sequence_y'])
    current_leg_arrivals = current_leg_arrivals.rename(columns={'arrival_time_hrs': arrival_time_col,
                                                                'stop_id': arrival_stop_col})
    current_leg_arrivals['journey_arrival_time'] = current_leg_arrivals[arrival_time_col]
    current_leg_arrivals['final_stop_id'] = current_leg_arrivals[arrival_stop_col]

    current_leg_arrivals = eliminate_early_depart_late_arrive(current_leg_arrivals)
    current_leg_arrivals = remove_walkable_trips(current_leg_arrivals, vax_hub_stops, WALK_EFFORT_FACTOR, threshold=0.1)

    return current_leg_arrivals

In [20]:
def eliminate_back_track(current_trips, extended_terminated_trips):
    current_trips['terminated'] = False
    for df in extended_terminated_trips:
        df['terminated'] = True
        current_trips = pd.concat([current_trips, df])
    
    current_trips = eliminate_early_depart_late_arrive(current_trips)
    current_trips = current_trips[~current_trips.terminated]
    
    return current_trips

In [27]:
def compute_all_paths(vax_hub_stops, transfer_stops, departures, arrivals):
    MAX_WALK_DIST = 5.0
    MIN_TRANSFER_TIME = 0.05
    WALK_EFFORT_FACTOR = 0.5
    
    legs = []
    
    first_legs = compute_first_legs(vax_hub_stops, MAX_WALK_DIST, WALK_EFFORT_FACTOR)
    print("FIRST LEGS DONE: ", len(first_legs.index))
    legs.append(first_legs)
    
    transfer_number = 1
    trip_number = transfer_number + 1
    
    
    second_leg_origin_stops = identify_transfer_stops(first_legs, transfer_stops, transfer_number, vax_hub_stops, WALK_EFFORT_FACTOR)
    print("SECOND LEGS ORIGINS DONE: ", len(second_leg_origin_stops.index))
    legs.append(second_leg_origin_stops)
    
    min_dep_time = second_leg_origin_stops['journey_arrival_time'].min()
    red_departures = departures[departures.departure_time_hrs > min_dep_time]
    print("Second leg earliest departure: ", min_dep_time)

    second_leg_departures = get_departures(second_leg_origin_stops, red_departures, trip_number, MIN_TRANSFER_TIME, chunk_size=50000)
    print("SECOND LEGS DEPS DONE: ", len(second_leg_departures.index))
    legs.append(second_leg_departures)

    min_dep_time = second_leg_departures['trip_2_departure_time'].min()
    red_arrivals = arrivals[arrivals.arrival_time_hrs > min_dep_time]
    print("Second leg earliest departure: ", min_dep_time)

    second_leg_arrivals = get_arrivals(second_leg_departures, red_arrivals, trip_number, vax_hub_stops, WALK_EFFORT_FACTOR)
    print("SECOND LEGS ARRS DONE: ", len(second_leg_arrivals.index))
    second_leg_arrivals = eliminate_back_track(second_leg_arrivals, [second_leg_origin_stops])
    print("SECOND LEGS ARRS DONE: ", len(second_leg_arrivals.index))
    legs.append(second_leg_arrivals)

    transfer_number += 1
    trip_number += 1
    
    third_leg_origin_stops = find_transfer_stops(second_leg_arrivals, transfer_stops, transfer_number, vax_hub_stops, WALK_EFFORT_FACTOR)
    print("THIRD LEGS ORIGINS DONE: ", len(third_leg_origin_stops.index))
    third_leg_origin_stops = eliminate_back_track(third_leg_origin_stops, [second_leg_origin_stops])
    print("THIRD LEGS ORIGINS DONE: ", len(third_leg_origin_stops.index))
    legs.append(third_leg_origin_stops)
    
    min_dep_time = third_leg_origin_stops['journey_arrival_time'].min()
    red_departures = departures[departures.departure_time_hrs > min_dep_time]
    print("MIN: ", min_dep_time)

    third_leg_departures = get_departures(third_leg_origin_stops, red_departures, trip_number, MIN_TRANSFER_TIME, chunk_size=50000)
    print("THIRD LEGS DEPS DONE: ", len(third_leg_departures.index))
    legs.append(third_leg_departures)

    min_dep_time = third_leg_departures['trip_3_departure_time'].min()
    red_arrivals = arrivals[arrivals.arrival_time_hrs > min_dep_time]
    print("MIN: ", min_dep_time)

    third_leg_arrivals = get_arrivals(third_leg_departures, red_arrivals, trip_number, vax_hub_stops, WALK_EFFORT_FACTOR)
    print("THIRD LEGS ARRS DONE: ", len(third_leg_arrivals.index))
    third_leg_arrivals = eliminate_back_track(third_leg_arrivals, [third_leg_origin_stops, second_leg_origin_stops])
    print("THIRD LEGS ARRS DONE: ", len(third_leg_arrivals.index))
    legs.append(third_leg_arrivals)
    
    transfer_number += 1
    trip_number += 1
    
    fourth_leg_origin_stops = find_transfer_stops(third_leg_arrivals, transfer_stops, transfer_number, vax_hub_stops, WALK_EFFORT_FACTOR)
    
    fourth_leg_origin_stops = eliminate_back_track(fourth_leg_origin_stops, [second_leg_origin_stops, third_leg_origin_stops])
    print("FOURTH LEGS ORIGINS DONE: ", len(third_leg_origin_stops.index))
    legs.append(fourth_leg_origin_stops)
    
    return legs