# Robust route planner


1. [Helper functions](#Helper-Functions)
2. [Parameters](#Parameters)
3. [Visualisation](#Visualisation)

In [1]:
%%configure
{"conf": {
    "spark.app.name": "my-awesome-group_final",
    "spark.driver.memory": "2g"
}}

ID,YARN Application ID,Kind,State,Spark UI,Driver log,Current session?
9380,application_1589299642358_3960,pyspark,idle,Link,Link,
9389,application_1589299642358_3971,pyspark,idle,Link,Link,
9396,application_1589299642358_3979,pyspark,busy,Link,Link,
9397,application_1589299642358_3980,pyspark,idle,Link,Link,
9399,application_1589299642358_3982,pyspark,idle,Link,Link,
9401,application_1589299642358_3984,pyspark,idle,Link,Link,
9407,application_1589299642358_3991,pyspark,busy,Link,Link,
9408,application_1589299642358_3992,pyspark,idle,Link,Link,
9412,application_1589299642358_3997,pyspark,idle,Link,Link,
9413,application_1589299642358_4000,pyspark,idle,Link,Link,


In [2]:
# Initialization
spark

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log,Current session?
9421,application_1589299642358_4008,pyspark,idle,Link,Link,✔


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

SparkSession available as 'spark'.


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

<pyspark.sql.session.SparkSession object at 0x7fe787c7b610>

## Helper Functions

In [3]:
import pandas as pd 
from math import sin, cos, sqrt, atan2, radians
from geopy import distance as dist
from pyspark.sql.functions import col
from heapq import *
import math
import time
import numpy as np

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [4]:
# Convert string time to seconds
def compute_seconds(time_string):
    """
    Given time_string in the form of hh:mm:ss, computes total number of seconds
    """

    split = time_string.split(':')
    return int(split[0])*3600 + int(split[1])*60 + int(split[2])

assert compute_seconds('02:00:01') == 7201

# Convert time in seconds to string
def compute_string(seconds):
    """
    Given seconds returns string corresponding to the time of the day
    """
    hour = seconds // 3600
    minute = (seconds - hour*3600) // 60
    second = seconds % 60
    return '%02d:%02d:%02d'%(hour, minute, second)

assert compute_string(300) == '00:05:00' and compute_string(43503) == '12:05:03'

# Compute difference between time strings
def compute_duration_between(time_string1, time_string2):
    """
    Computes how much time passed from time_string1 to time_string2
    """
    return compute_seconds(time_string2) - compute_seconds(time_string1)

assert compute_duration_between('08:00:00', '08:05:00') == 300

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [5]:
# cell to communicate with hdfs
import subprocess, pickle

def run_cmd(args_list):
    """Run linux commands."""
    print('Running system command: {0}'.format(' '.join(args_list)))    
    proc = subprocess.Popen(args_list,                            
                            stdout=subprocess.PIPE,                            
                            stderr=subprocess.PIPE)    
    s_output, s_err = proc.communicate()    
    s_return =  proc.returncode
    return s_return, s_output, s_err


def save_hdfs(localPath, hdfsPath):
    
    (ret, out, err)= run_cmd(['hdfs','dfs','-put','-f', localPath, hdfsPath])
    if err:
        print(err)
    else:
        print('Success')
        
def read_hdfs(hdfsPath):
    
    (ret, out, err)= run_cmd(['hdfs','dfs','-cat', hdfsPath])
    if err:
        print(err)
    else:
        print('Success')
    return pickle.loads(out)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [6]:
# Graph station node
class StationNode:
    """
    Class for station node, that will keep a list of arrival/departures given by trips as well
    as walking distances
    """
    
    def __init__(self, station_id):
    
        self.station_id = station_id
        self.departures = dict()
        self.arrivals = dict()  
        self.walkable_stations = dict()
 
    def add_walkable_station(self, stop_id, duration):
        """
        Add walkable station and duration to get there
        """
        if stop_id not in self.walkable_stations:
            self.walkable_stations[stop_id] = duration

    def add_arrival(self, time, arrival):
        """
        Arrivals are in the form of
        """
        if time not in self.arrivals:
            self.arrivals[time] = []
            
        self.arrivals[time].append(arrival)
        
    def add_departure(self, time, departure):
        """
        Departure are in the form of
        """
        if time not in self.departures:
            self.departures[time] = []
            
        self.departures[time].append(departure)
        
    def to_tuple(self):
        """
        Convert class object to tuple
        """
        return (self.station_id, self.departures, self.arrivals, self.walkable_stations)
    
    def from_tuple(self, class_tuple):
        """
        Read class fields from tuple
        """
        self.station_id = class_tuple[0]
        self.departures = class_tuple[1]
        self.arrivals = class_tuple[2]
        self.walkable_stations = class_tuple[3]
        
        return self
        
def dictnodes_tolist(station_nodes):
    """
    Convert dictionary of StationNodes to list of node tuples
    """
    tolist = []
    for stop_id in station_nodes:
        try:
            tolist.append(station_nodes[stop_id].to_tuple())
        except:
            raise Exception(stop_id)
        
    return tolist
        
def list_todictnodes(fromlist):
    """
    Convert list of node tuples into dictionary of StationNodes
    """
    todict = dict()
    for node_tuple in fromlist:
        todict[node_tuple[0]] = StationNode(node_tuple[0]).from_tuple(node_tuple)
        
    return todict

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Predictions

In [7]:
from geopy import distance as dist
import pandas as pd
from os import path
import math

from geopy import distance as dist
import pandas as pd
from os import path
import math


# Prediction function
def transfer_success_probability(stop1_id, trip1_id, trip1_arrival_time, stop2_id, trip2_id, trip2_departure_time,  verbose=False, norm=True):
    # Extract time in seconds and day period from time string
    trip1_arr_seconds, trip1_day_period = get_time_info(trip1_arrival_time)
    trip2_dep_seconds, trip2_day_period = get_time_info(trip2_departure_time)

    # Compute the transfer time surplus
    schedule_difference = trip2_dep_seconds - trip1_arr_seconds
    
    if schedule_difference > 3600:
        return 1
    minimum_transfer_time = 120
    walking_time = compute_walk_time(stop1_id, stop2_id)

    transfer_time_surplus = schedule_difference - walking_time - minimum_transfer_time

    avg_arrival_delay, std_arrival_delay = get_prediction_data(stop1_id, trip1_id, trip1_day_period, is_arrival=True)
    
    if trip2_id == 'Terminus':
        avg_departure_delay = 0
        std_departure_delay = 0
        transfer_time_surplus += minimum_transfer_time # Do not require the mininum two minutes for terminus transfer
    else:
        avg_departure_delay, std_departure_delay = get_prediction_data(stop2_id, trip2_id, trip2_day_period, is_arrival=False)

    if verbose:
        print("Schedule difference: {}".format(schedule_difference))
        print("Walking time: {}".format(walking_time))
        print("Transfer_time_surplus: {}".format(transfer_time_surplus))
        print("Mean arrival delay: {}".format(avg_arrival_delay))
        print("Mean departure delay: {}".format(avg_departure_delay))
        
    if norm:
        return compute_uncertainty_norm(avg_arrival_delay, std_arrival_delay, avg_departure_delay, std_departure_delay, transfer_time_surplus)

    return compute_uncertainty(avg_arrival_delay, avg_departure_delay, transfer_time_surplus)


# Prediction function for terminus
def terminus_success_probability(last_stop_id,
                                 last_trip_id,
                                 last_trip_arrival_time,
                                 terminus_stop_id,
                                 terminus_time,
                                 verbose=False):
    return transfer_success_probability(last_stop_id,
                                        last_trip_id,
                                        last_trip_arrival_time,
                                        terminus_stop_id,
                                        'Terminus',
                                        terminus_time,
                                        verbose)

# Return day period given the hour
def get_day_period(hour):
    if hour < 10:
        return 1
    elif hour < 16:
        return 2
    elif hour < 19:
        return 3
    else:
        return 4


    return dist.distance((lat1, lon1), (lat2, lon2)).km * 1200  # 1200 secondes to make 1 km (corresponds to 50m/min)


# Return time in seconds and day period from time string
def get_time_info(time_string):
    """
    Given time_string in the form of hh:mm:ss, computes total number of seconds
    """
    split = time_string.split(':')
    seconds = int(split[0]) * 3600 + int(split[1]) * 60 + int(split[2])
    day_period = get_day_period(int(split[0]))

    return seconds, day_period

# Load a file from pickle or hdfs accordingly
def get_pred_file(hdfs_path):
    file_name = hdfs_path.split("/")[-1]
    if path.exists(file_name):
        return pd.read_pickle(file_name)
    else:
        loaded_df = pd.DataFrame(read_hdfs(hdfs_path))
        loaded_df.to_pickle(file_name)
        return pd.DataFrame(loaded_df)

# Return the prediction parameters given a trip and station
def get_prediction_data(stop_id, trip_id, day_period, is_arrival):

    delay_column = ['mean_arrival_delay', 'std_arrival_delay'] if is_arrival else ['mean_departure_delay','std_departure_delay']

    prediction_param = full_pred[(full_pred['stop_id'] == stop_id)
                                 & (full_pred['trip_id'] == trip_id)
                                 & (full_pred['day_period'] == day_period)][delay_column]

    if len(prediction_param) == 0:
        prediction_param = stop_pred[(stop_pred['stop_id'] == stop_id)
                                     & (stop_pred['day_period'] == day_period)][delay_column]

    if len(prediction_param) == 0:
        prediction_param = period_pred[(period_pred['day_period'] == day_period)][delay_column]

    return prediction_param.values[0]


# Compute the cdf of the exponential distrubution of delay time loss
def compute_uncertainty(avg_arr_delay, avg_dep_delay, transfer_time_surplus):
        
    if transfer_time_surplus <= 0:
        if avg_dep_delay == 0:
            return 0
        return avg_dep_delay / (avg_arr_delay + avg_dep_delay) * math.exp(transfer_time_surplus / avg_dep_delay)
    else:
        if avg_arr_delay == 0:
            return 1
        return 1 - avg_arr_delay / (avg_arr_delay + avg_dep_delay) * math.exp(-transfer_time_surplus / avg_arr_delay)

from scipy.stats import norm

# Compute the cdf of normal approximation of delay time loss
def compute_uncertainty_norm(avg_arr_delay, std_arr_delay, avg_dep_delay, std_dep_delay, transfer_time_surplus):
    
    avg_time_loss = avg_arr_delay - avg_dep_delay
    std_time_loss = math.sqrt(math.pow(avg_arr_delay,2)  + math.pow(avg_dep_delay,2))
    
    return norm.cdf(transfer_time_surplus, avg_time_loss, std_time_loss)



# Compute the duration to walk between two stops
def compute_walk_time(stop_id_1, stop_id_2):
    
    stop1 = stops[stops.index == stop_id_1]
    
    if len(stop1) == 0:
        print('Stop1 is missing, can\'t compute walk time for {}'.format(stop_id_1))
        return 0
    
    lat1 = float(stop1['stop_lat'].values[0])
    lon1 = float(stop1['stop_lon'].values[0])
    

    stop2 = stops[stops.index == stop_id_2]
    
    if len(stop2) == 0:
        print('Stop2 is missing, can\'t compute walk time for {}'.format(stop_id_2))
        return 0
    
    lat2 = float(stop2['stop_lat'].values[0])
    lon2 = float(stop2['stop_lon'].values[0])

    return dist.distance((lat1, lon1), (lat2, lon2)).km * 1200  # 1200 secondes to make 1 km (corresponds to 50m/min)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [8]:
# Filter and sort results based on probabilities    
def get_res_proba(answ):
    arr = []
    res = get_answ(answ)
    for r in res:
        proba, full_trip_info, transfers_info = journey_success_probability(r[1])
        arr.append([proba, full_trip_info, transfers_info, r[0]])
    return arr

# Extract trips for algorithm output
def get_answ(answ):
    result = []
    refined = []
    for i,tp in enumerate(answ):
        path = deepcopy(tp[1])
        path.reverse()
        path = [(p[0], compute_string(p[1]), p[2], p[3]) for p in path]
        refined.append((tp[0]/60, path))
        
        result.append((tp[0]/60, combine_same_trips(path)))
    return result

# write just one value if the path is same:
def combine_same_trips(path):
    renewed_path = []
    for connection in path:
        #print(connection)
        if (len(renewed_path) > 0) and (renewed_path[-1][2] == connection[2]):
            renewed_path[-1] = (renewed_path[-1][0],
                                renewed_path[-1][1],
                                renewed_path[-1][2],
                                compute_string(compute_seconds(renewed_path[-1][3]) + compute_seconds(connection[3])))
        else:
            renewed_path.append(connection)
    return renewed_path

# Compute the success probability for each transfer of a journey
def journey_success_probability(result):
    full_trip_info = []
    transfers_info = []
    
    for i, trip in enumerate(result):
    
        if trip[2] == 'walk':
            if len(full_trip_info) != 0:
                full_trip_info[-1][2] = trip[0]
                
        elif trip[2] == 'Terminus':
        
            if full_trip_info[-1][2] == None:
                full_trip_info[-1][2] = trip[0]
            
            
            # Terminus transfer:
            transfers_info.append([full_trip_info[-1][2], full_trip_info[-1][4], full_trip_info[- 1][3], trip[0], trip[2], trip[1], None])
                
            proba = 1
            
            for tr in transfers_info:
                tr_proba = transfer_success_probability(stop1_id= tr[0], 
                                             trip1_id=tr[1], 
                                             trip1_arrival_time=tr[2], 
                                             stop2_id=tr[3], 
                                             trip2_id=tr[4], 
                                             trip2_departure_time=tr[5])
                
                tr[6] = tr_proba
                proba = tr_proba * proba
        else:
        
            if len(full_trip_info) != 0:
                if full_trip_info[-1][2] == None:
                    full_trip_info[-1][2] = trip[0]
                transfers_info.append([full_trip_info[-1][2], full_trip_info[-1][4], full_trip_info[- 1][3], trip[0], trip[2], trip[1], None])
            
            full_trip_info.append([trip[0], trip[1], None, add_string_time(trip[1],trip[3]), trip[2]])
        
    return proba, full_trip_info, transfers_info

# Sum two string time
def add_string_time(t1, t2):
    return compute_string(compute_seconds(t1) + compute_seconds(t2))


# Print nicely the trip planner output
def pretty_print(probas, trip_info, transfers_info, duration, threshold):
    outstring = u'\n#################################################################\n\n'
    
    for i in range(len(probas)):
        if probas[i] < threshold:
            continue
        outstring += u'\n Option {}'.format(i+1)
        outstring +=u'\n#################################################################\n'
        trip_time = math.ceil(duration[i])
        outstring += u'Trip Time: {}\n'.format(trip_time)
        outstring += u'Trip success proba: {}\n'.format(probas[i])
        outstring += u'-----------\n'
        outstring += u'|Directions|\n'
        outstring += u'------------\n\n'
        outstring += u'---------------------------------------------------------------------------------------------------------------\n'
        outstring += u'|{:>20}|{:>30}|{:>20}|{:>20}|{:>30}|{:>20}|{:>30}|\n'.format('Start Stop', 'Start Name', 'Departure Time', 'End Stop', 'End Name', 'Arrival Time',  'Trip Name')
        outstring += u'---------------------------------------------------------------------------------------------------------------\n'
        for trip in trip_info[i]:
            outstring += u'|{:>20}|{:>30}|{:>20}|{:>20}|{:>30}|{:>20}|{:>30}|\n'.format(trip[0], stops.loc[trip[0]][u'stop_name'], trip[1], trip[2],stops.loc[trip[2]][u'stop_name'], trip[3], trip[4])
        outstring += u'---------------------------------------------------------------------------------------------------------------\n'
        outstring += u'-----------\n'
        outstring += u'|Transfers|\n'
        outstring += u'------------\n\n'
        outstring += u'---------------------------------------------------------------------------------------------------------------\n'
        outstring += u'|{:>20}|{:>30}|{:>20}|{:>20}|{:>30}|{:>20}|{:>20}|\n'.format('Arrival Stop', 'Arrival Trip Name', 'Arrival Time', 'Departure Stop', 'Departure Trip Name', 'Departure Time','Success Proba')
        outstring += u'---------------------------------------------------------------------------------------------------------------\n'
        for tr in transfers_info[i]:
            outstring += u'|{:>20}|{:>30}|{:>20}|{:>20}|{:>30}|{:>20}|{:>20}|\n'.format(tr[0], tr[1], tr[2], tr[3], tr[4], tr[5], tr[6])
        outstring += u'---------------------------------------------------------------------------------------------------------------\n'
        outstring +=u'\n#################################################################\n'
        
    return outstring
        
        
    
# Extract the routes, tranfers, time and probablities using routing algorithm output and uncertainty interval.
def get_result_sorted(answ, uncertainty_threshold):
    res = get_res_proba(answ)
    
    probas = [r[0] for r in res]
    trip_info = [r[1] for r in res]
    transfers_info = [r[2] for r in res]
    duration = [r[3] for r in res]
    
    printable = pretty_print(probas, trip_info, transfers_info, duration, uncertainty_threshold)
    
    return printable
    

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [9]:
from copy import deepcopy

# Routing algorithm
def k_shortest(start_stop, end_stop,sbb_network, all_stops, K, T, count_u, threshold=None, remove_same=True):
    """
    Implementation of k shortest path given the threshold if threshold is not None
    K path starts from destinations and finds path to source backwards
    
    Pierre return probability of transfer given start_station, end_station, departure_time, arrival_time(latest)
    """
    # set of shortest paths from s to t
    P = []

    # containing path, maintaining heap data structure
    h_paths = [] 

    # initialize number of shortes path found to each node to 0
    for node in all_stops:
        count_u[node] = 0
        
    # min cost of the path
    min_cost = []

    # insert path from last stop with cost 0   
    # we have ( cost_of_path(time it lasts), 
    #  [(station, time_at_station_seconds, route/walk/terminus, duration, probability_of_success) ..])
    heappush(h_paths, (0, [(end_stop, T, 'Terminus', '00:00:00', 1)]))

    # while we have possible paths and found shortest paths from start+stop aren't already K
    while h_paths and count_u[start_stop] < K:

        C, shortes_path_T = heappop(h_paths)

        # read the first node and its corresponding time on shortest path
        first_node_on_path = shortes_path_T[-1][0]
        first_node_time = shortes_path_T[-1][1]
        trip = shortes_path_T[-1][2]
        succ_prob = shortes_path_T[-1][4]
        #print('stop', first_node_on_path,'time', first_node_time, start_stop)

        # update number of paths
        count_u[first_node_on_path] += 1

        if first_node_on_path == start_stop:
            if C in min_cost and remove_same:
                count_u[first_node_on_path] -= 1
                continue
            min_cost.append(C)
            P.append((C, shortes_path_T))

        if count_u[first_node_on_path] <= K:

            # return Node of the first stop on the path
            u_node = sbb_network[first_node_on_path]

            # iterate over stations that can reach the first stop on the path by transport
            for time_edge in u_node.arrivals:
                
                # get the time these stations arrive at the first stop of our path
                arrival_time = compute_seconds(time_edge)
                
                
                # traverse all the incoming stations for time_edge time
                for station_edge in u_node.arrivals[time_edge]:
                    
                    
                    # transfer time to the new trip (possible platform) of the same station, 0 if the same trip
                    transfer_time = 2 * 60 if station_edge[2] != trip and trip != 'Terminus' else 0
                    
                    # check if one can make with this station_edge
                    if  arrival_time > first_node_time - transfer_time:
                        continue
                        
                    updated_probe=1
                    # check that probability of making will still be above the threshold
                    if threshold is not None and station_edge[2] != trip:

                        
                        if trip == 'Terminus':
                            # prev_stop_id, prev_stop_tripid, arrival_time, final_stop, final_arrival_time
                            n_proba = terminus_success_probability(end_stop, station_edge[2], time_edge, end_stop, compute_string(T))
                            
                        elif trip != 'walk':
                            # prev_stop_id, prev_stop_tripid, arrival_time, depart_stop, dep_trip, departure_time
                            n_proba = transfer_success_probability(first_node_on_path, station_edge[2], time_edge, first_node_on_path,trip,compute_string(first_node_time))
                            
                        else:
                            n_proba = transfer_success_probability(first_node_on_path, station_edge[2], time_edge, shortes_path_T[-2][0],shortes_path_T[-2][2],compute_string(shortes_path_T[-2][1]))
                            
                            
                        
                        # compute cumulated probability of success
                        if succ_prob * n_proba < threshold:
                            continue
                        
                        updated_probe = succ_prob * n_proba
                        
                    departure_time = compute_seconds(station_edge[1])

                    new_path = deepcopy(shortes_path_T)
                    new_path.append((station_edge[0], departure_time, station_edge[2], 
                                     compute_string(arrival_time-departure_time), updated_probe))

                    # insert new nodes into the heap
                    heappush(h_paths, 
                             (C + first_node_time - departure_time, 
                             new_path))  

            # iterate over walkable station, assume 2 mn delay for connection         
            for station_id in u_node.walkable_stations:
                # if it is not the same station and previous wasnt also walk
                if station_id != first_node_on_path and trip != 'walk':

                    new_path = deepcopy(shortes_path_T)
                    new_path.append((station_id, first_node_time - u_node.walkable_stations[station_id] - 2*60, 
                                     'walk', compute_string(u_node.walkable_stations[station_id]),succ_prob))

                    # insert new nodes into the heap
                    heappush(h_paths, 
                             (C + u_node.walkable_stations[station_id] + 2*60, 
                             new_path))  
    return P
                

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

## Parameters

In [10]:
# Load the graph model
G = read_hdfs('/user/lortkipa/graph_untested_classTuples.pkl')
G = list_todictnodes(G)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Running system command: hdfs dfs -cat /user/lortkipa/graph_untested_classTuples.pkl
Success

In [11]:
# Load stops and their names
stops = read_hdfs('/user/lortkipa/filtered_stops_Premoved.pkl')
stops = stops.set_index('stop_id')

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Running system command: hdfs dfs -cat /user/lortkipa/filtered_stops_Premoved.pkl
Success

In [12]:
# Load prediction dataframes
period_pred = get_pred_file('/user/lortkipa/period_prediction_df.pkl')
stop_pred = get_pred_file('/user/lortkipa/stop_prediction_df.pkl')
full_pred = get_pred_file('/user/lortkipa/full_prediction_df.pkl')

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Running system command: hdfs dfs -cat /user/lortkipa/period_prediction_df.pkl
Success
Running system command: hdfs dfs -cat /user/lortkipa/stop_prediction_df.pkl
Success
Running system command: hdfs dfs -cat /user/lortkipa/full_prediction_df.pkl
Success

## Evaluation

## Test 1
start =  Zürich HB (8503000) to Zürich

end = Auzelg (8591049)

arrival = '12:30:00'

Possible Solutions:

<b>Route 1:</b>

20.TA.26-9-A-j19-1.2.H: 8503000:0:41/42 at 12:07:00 ~ 8503310:0:3 at 12:17:00

Walking: 8503310:0:3 ~ 8590620

168.TA.26-12-A-j19-1.2.H: 8590620 at 12:23:00 ~ 8591049 at 12:29:00


<b>Route 2:</b>

32.TA.80-159-Y-j19-1.8.H: 8503000:0:5 at 12:05:00 ~ 8503006:0:6 at 12:11:00

Walking: 8503006:0:6 ~ 8580449

1914.TA.26-11-A-j19-1.27.R: 8580449 at 12:15:00 ~ 8591049 at 12:24:00

In [13]:
# General function that computes the best trips to take given users parameters
def compute_journey(start_stop, end_stop, end_time, threshold):
    T = compute_seconds(end_time)
    K = 10
    uncertainty_threshold = threshold
    # disctionary for number of shortest paths found from each node
    count_u = dict()

    # SBB network used for the algorithm
    sbb_network = G
    # all possible nodes/stations
    all_stops = G.keys()

    answ = k_shortest(start_stop, end_stop, sbb_network, all_stops, K, T, count_u, threshold=None, remove_same=False)
    
    res = get_res_proba(answ)
    
    probas = [r[0] for r in res]
    
    if (sum([int(p > threshold) for p in probas]) == 0):
        K = 2
        answ = k_shortest(start_stop, end_stop, sbb_network, all_stops, K, T, count_u,threshold = threshold, remove_same=False)
        
    
    return get_result_sorted(answ, threshold)
    

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [14]:
# Test the function with an example
res = compute_journey('8503000', '8591049', '12:30:00', 0.2)
print(res)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…


#################################################################


 Option 1
#################################################################
Trip Time: 23.0
Trip success proba: 0.568759410313
-----------
|Directions|
------------

---------------------------------------------------------------------------------------------------------------
|          Start Stop|                    Start Name|      Departure Time|            End Stop|                      End Name|        Arrival Time|                     Trip Name|
---------------------------------------------------------------------------------------------------------------
|             8503000|                     Zürich HB|            12:07:00|             8503310|                    Glattbrugg|            12:16:00|        20.TA.26-9-A-j19-1.2.H|
|             8590620|           Glattbrugg, Bahnhof|            12:23:00|             8591049|                Zürich, Auzelg|            12:29:00|      168.TA.26-12-A-j19-1.2.H|
----

# Visualisation

In [15]:
%%local
from IPython.display import display
from IPython import get_ipython
from IPython.display import clear_output
from ipywidgets import widgets, Layout, Box, Button, Label, Dropdown, FloatText, Text


form_item_layout = Layout(
    display='flex',
    flex_flow='row',
    justify_content='space-between'
)

tolerance_data = widgets.IntSlider(min=0, max=100)
origin_data = Text(value='8503000')
destination_data = Text(value='8591049')
time_data = Text(value='12:30:00')



tolerance = Box([Label(value='Tolerance'), tolerance_data], layout=form_item_layout)
origin =   Box([Label(value='Origin ID'), origin_data],  layout=form_item_layout)
destination =  Box([Label(value='Destination ID'), destination_data],  layout=form_item_layout)
time =   Box([Label(value='Maximum Arrival time (hh:mm:ss)'), time_data],  layout=form_item_layout)
search =  Button(description='Search')
search1 = Box([Label(value=''), search],  layout=form_item_layout)
elements = widgets.VBox(children=[tolerance, origin, destination, time, search1], layout=Layout(
    display='flex',
    flex_flow='column',
    border='solid 2px',
    align_items='stretch',
    width='50%'
))
display(elements)

output = widgets.Output()


@output.capture()
def on_button_clicked(b):
    clear_output(wait=True)
    ipy = get_ipython()
    print("Selected options are:\n - Tolerance: {}\n - Origin: {}\n - Destination: {}\n - Arrival time: {}".format(tolerance_data.value/100, 
                                                                                                                   origin_data.value, 
                                                                                                                   destination_data.value, 
                                                                                                                   time_data.value))
    
          
    start_stop =  origin_data.value  
    end_stop = destination_data.value  
    T = time_data.value 
    uncertainty = str(tolerance_data.value/100)
          
    ipy.push('T')
    ipy.push('start_stop')
    ipy.push('end_stop')
    ipy.push('uncertainty')
    
    ipy.run_cell_magic("send_to_spark", "-i T -n T -t str", " ")
    ipy.run_cell_magic("send_to_spark", "-i start_stop -n start_stop -t str", " ")
    ipy.run_cell_magic("send_to_spark", "-i end_stop -n end_stop -t str", " ")
    ipy.run_cell_magic("send_to_spark", "-i uncertainty -n uncertainty -t str", " ")
    
    ipy.run_cell_magic(
        "spark", '',
        "print(compute_journey(start_stop, end_stop, T, float(uncertainty)))"
    )

search.on_click(on_button_clicked)
display(output)

VBox(children=(Box(children=(Label(value='Tolerance'), IntSlider(value=0)), layout=Layout(display='flex', flex…

Output()