In [None]:
import pandas as pd
import json

from tqdm import tqdm
from math import sin, cos, sqrt, atan2, radians

from queue import PriorityQueue as PQueue

In [None]:
def parse_time(time):
    return int(time[0:2])*3600 + int(time[3:5])*60 + int(time[6:8])

def calc_time(stop1, stop2):
    # assume 1 m/s since we disregard roads so slower walking speed is feasable
    # approximate radius of earth in km
    R = 6373.0

    lat1 = radians(stop1['lat'])
    lon1 = radians(stop1['lon'])
    lat2 = radians(stop2['lat'])
    lon2 = radians(stop2['lon'])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    return int(R * c * 1000)

In [None]:
FOLDER = 'hsl' # name of folder

In [None]:
# id : {name, lat, lon}
with open(FOLDER + '/processed_stops.json', 'r') as f:
    stops = json.load(f)

# trip_id : [[arr_time1, dep_time1, stop_id1], ...]
with open(FOLDER + '/processed_trips.json', 'r') as f:
    trips = json.load(f)

# stop_id : [[time1, trip_id1, stop_pos1], ...]
with open(FOLDER + '/processed_stop_data.json', 'r') as f:
    stop_data = json.load(f)

In [None]:
print('stops before:', len(stops))
to_delete = []
for s in stops:
    if s not in stop_data:
        to_delete.append(s)

for s in to_delete:
    del stops[s]
print('stops after:', len(stops))

In [None]:
distances = {}

for stop1 in tqdm(stops):
    distances[stop1] = []
    for stop2 in stops:
        distances[stop1].append((calc_time(stops[stop1], stops[stop2]), stop2))
    distances[stop1] = sorted(distances[stop1], key=lambda x: x[0])

In [None]:
# search for a stop
term = 'Aalto'
for s in stops:
    if term in stops[s]['name']:
        print(s, stops[s])

In [None]:
origin = '2222603' # Aalto metro station

pbar = tqdm(total=len(stops))

dist_from_origin = {}
seen = set()
starting_time = parse_time('09:00:00')

q = PQueue()
q.put((starting_time, origin))

while not q.empty():
    time, cur = q.get()
    
    cur = str(cur)
    
    if cur not in seen and cur in stops:
       
        seen.add(cur)
        pbar.update(1)
        pbar.set_description("Queue size %u" % len(q))
        dist_from_origin[cur] = time - starting_time
    
        # add all unseen stops in 10min walking distance
        for walk_time, n_id in distances[cur][1:]:
            if walk_time <= 600 and n_id not in seen:
                q.put((time + walk_time, str(n_id)))
            if walk_time > 600:
                break

        # add all trips leaving station

        # could use bin_search here
        for dep_time, trip_id, pos in stop_data[cur]:
            if dep_time < time:
                continue

            # get all further stops on the trip and add them to the queue
            for arr_time, dep_time, n_id in trips[trip_id][pos-1:]:
                q.put((arr_time, str(n_id)))
            
pbar.close()

In [None]:
for s in dist_from_origin:
    print(stops[s]['name'], dist_from_origin[s]//3600, (dist_from_origin[s]%3600)//60, dist_from_origin[s]%60)