In [2]:
from scipy.stats import gamma, lognorm
import pandas as pd
import numpy as np

import matplotlib.pylab as plt
import seaborn as sns

from pyscripts.test_utils import toy_schedule
from pyscripts.planner import *

%matplotlib inline
%load_ext autoreload
%autoreload 2

schedule = pd.read_pickle("data/schedule_clean.pkl")
pairwise_distances = pd.read_pickle("data/pairwise_distance.pkl")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import getpass
import pyspark
from pyspark.sql import SparkSession

conf = pyspark.conf.SparkConf()
conf.setMaster('yarn')
conf.setAppName('twitter-{0}'.format(getpass.getuser()))
conf.set('spark.executor.memory', '4g')
conf.set('spark.executor.instances', '6')
conf.set('spark.port.maxRetries', '100')
sc = pyspark.SparkContext.getOrCreate(conf)
conf = sc.getConf()

spark = SparkSession(sc)

metadata = spark.read.text('/datasets/project/metadata/BFKOORD_GEO')

split_col = pyspark.sql.functions.split(metadata['value'], " % ")
split_left = pyspark.sql.functions.split(split_col.getItem(0), " +")
metadata = metadata.withColumn('longitude', split_left.getItem(2))
metadata = metadata.withColumn('latitude', split_left.getItem(1))
metadata = metadata.withColumn('stop', split_col.getItem(1))
metadata = metadata.drop('value')

metadataPandas = metadata.toPandas()

zurich_coord = metadataPandas[metadataPandas['stop'] == 'Zürich HB']

from math import sin, cos, sqrt, atan2, radians

def filter_distance(lat1, lon1, lat2, lon2):
    # approximate radius of earth in km
    R = 6373.0

    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return distance

# keep only stations inside 10km
mask = metadataPandas.apply(lambda x: filter_distance(
   float(zurich_coord['latitude']),
   float(zurich_coord['longitude']),
   float(x['latitude']),
   float(x['longitude'])) <= 10, axis=1)
metadataPandas = metadataPandas[mask]

metadataPandas

stops = metadataPandas.stop

In [5]:
from pyproj import Proj, transform

metadataPandas.longitude = metadataPandas.longitude.astype('float')
metadataPandas.latitude = metadataPandas.latitude.astype('float')
x_coordinates, y_coordinates = transform(Proj(init='epsg:4326'), Proj(init='epsg:3857'), metadataPandas.latitude.values, metadataPandas.longitude.values)
metadataPandas['x'] = x_coordinates
metadataPandas['y'] = y_coordinates

In [17]:
import yaml
import datetime

from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Slider, HoverTool, LabelSet
from bokeh.plotting import figure
from bokeh.themes import Theme
from bokeh.io import show, output_notebook
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
from bokeh.models.widgets import Dropdown, Select, MultiSelect, TextInput, Toggle, RadioButtonGroup, Div

from bokeh.sampledata.sea_surface_temperature import sea_surface_temperature

output_notebook()

# the function that handles the map plotting
def plot_path(doc):
    from bokeh.tile_providers import STAMEN_TERRAIN_RETINA

    # information that we need to plot and/or visualize as labels
    path_stops = pd.DataFrame(columns = ['x', 'y', 'stop', 'departure_time', 'arrival_time'])
    source = ColumnDataSource(path_stops)
    
    tools = "pan,wheel_zoom,reset,save".split(',')
    hover = HoverTool(tooltips=[
        ("Stop", "@stop"),
        ('Departure Time', "@departure_time"),
        ('Arrival Time', "@arrival_time")
    ])
    
    # to show the stops on the map
    labels = LabelSet(x='x', y='y', text='stop', level='glyph',
              source=source, render_mode='canvas')
    
    tools.append(hover)
    
    plot = figure(y_range=(5977000, 6028000), x_range=(960000, 960001),
           x_axis_type="mercator", y_axis_type="mercator", plot_width=950, plot_height=500,
          tools=tools)

    plot.add_layout(labels)
    plot.add_tile(STAMEN_TERRAIN_RETINA)

    # to show stops (circles) and connections (lines)
    plot.circle('x', 'y', source=source, line_width=4)
    plot.line('x', 'y', source=source, line_width=2)
    
    # a button for showing the current computational state of the algorithm
    computation_state = Div(text="")
    
    # wrapper for calling the algorithm to compute the path from the departure station to the destination
    def compute_plan_wrapper(departure_station, destination_station, time, probability_threshold):            
        pl = Planner(schedule, pairwise_distances=pairwise_distances, reverse=(real_global_time_flag == 1))
        computation_state.text = "<font size='3'><b>Computation State</b></font>: <font color='orange'><b>WORKING ON</b></font>"
        path = pl.compute_plan(departure_station, destination_station, time, probability_threshold)

        if(path != None):
            proposed_trip = pl.edges.loc[path]

            stops_trip = proposed_trip['dep_node'].copy()
        else:
            stops_trip = np.array([])
        if(stops_trip.shape[0] > 0):
            computation_state.text = "<font size='3'><b>Computation State</b></font>: <font color='green'><b>TERMINATED</b></font>"

            arrival_times = [list(proposed_trip['dep_time'])[0]] + list(proposed_trip['arr_time'])
            departure_times = list(proposed_trip['dep_time']) + [list(proposed_trip['arr_time'])[-1]]
        
            stops_trip['last'] = proposed_trip['arr_node'].values[-1]
            
            a = pd.DataFrame({ 'stop': stops_trip, 'departure_time': departure_times, 'arrival_time': arrival_times }).reset_index()
            a['arrival_time'] = a.arrival_time.map(lambda x: str(x).split(' ')[2])
            a['departure_time'] = a.departure_time.map(lambda x: str(x).split(' ')[2])

            path_stops = a.merge(metadataPandas, on='stop')
            
            # modify the variable created at the beginning of modify_doc
            source.data = ColumnDataSource(data=path_stops).data
        else:
            computation_state.text = "<font size='3'><b>Computation State</b></font>: <font color='red'><b>TERMINATED</b> (no path found)</font>"

    # callback for setting a new departure station    
    def callback_departure(attr, old, new):
        global real_global_dep
        real_global_dep = new
    
    # callback for setting a new destination
    def callback_destination(attr, old, new):
        global real_global_arr
        real_global_arr = new
    
    # callback for setting whether the provided time is desired departure or desired arrival
    def callback_time_flag(attr, old, new):
        global real_global_time_flag
        real_global_time_flag = new
    
    # callback for setting a new departure/arrival time
    def callback_time(attr, old, new):
        try:
            datetime.datetime.strptime(new, '%H:%M:%S')
        except ValueError:
            raise ValueError("Incorrect data format, should be hh:mm:ss")
        day = datetime.datetime.today().weekday()
        global real_global_time
#         real_global_time = pd.Timedelta("0 day 13:00:00")
        real_global_time = pd.Timedelta(str(day) + " day " + new)
            
    # callback for setting a probability threshold
    def callback_threshold(attr, old, new):
        global real_global_threshold
        real_global_threshold = new

    # callback for executing the algorithm
    def callback_execute_algorithm(a):
        computation_state.text = "<font size='3'><b>Computation State</b></font>: <font color='orange'><b>INITIALIZING</b></font>"

        # re-initialize the map
        temp = pd.DataFrame(columns = ['x', 'y', 'stop', 'arrival_time'])
        temp.x = [0]
        temp.y = [0]
        temp.stop = ''
        temp.arrival_time = ''
        source.data = ColumnDataSource(temp).data
        
        if (real_global_dep != None and real_global_arr != None and real_global_time != None and real_global_threshold != None):
            compute_plan_wrapper(real_global_dep, real_global_arr, real_global_time, real_global_threshold)
        else:
            computation_state.text = "<font size='3'><b>Computation State</b></font>: <font color='red'><b>PARAMETERS ERROR</b></font>"

    # define the user interface for executing the algorithm
    stops_in_schedule = np.unique(list(schedule.dep_node.unique()) + list(schedule.arr_node.unique()))
    sorted_stops = sorted(stops_in_schedule)

    # departure station selection menu
    departure = Select(title="Departure Station", options=sorted_stops)
    departure.on_change('value', callback_departure)
    
    # destination selection menu
    destination = Select(title="Destination Station", options=sorted_stops)
    destination.on_change('value', callback_destination)
    
    # choosing whether the provided time is arrival or departure time
    radio_button_group = RadioButtonGroup(
        labels=["Departure Time", "Desired Arrival Time"], active=0)
    radio_button_group.on_change('active', callback_time_flag)
    
    # providing the arrival/departure time
    text_input = TextInput(value="12:00:00", title="Time")
    text_input.on_change('value', callback_time)
    
    # selecting the probability threshold for allowing paths
    slider = Slider(start=0, end=1, value=0, step=0.1, title="Threshold")
    slider.on_change('value', callback_threshold)
    
    # start the algorithm
    button = Toggle(label="Go!", button_type="success")
    button.on_click(callback_execute_algorithm)
    
    doc.add_root(column(departure, destination, radio_button_group, text_input, slider, button, computation_state, plot))
    
handler = FunctionHandler(plot_path)
app = Application(handler)

In [18]:
real_global_dep = 'Adliswil'
real_global_arr = 'Adliswil'
real_global_time_flag = 0
real_global_time = pd.Timedelta('12:00:00')
real_global_threshold = 0

show(app, notebook_url="http://10.90.38.21:8821")

In [93]:
import yaml
import datetime

from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Slider, HoverTool, LabelSet
from bokeh.plotting import figure
from bokeh.themes import Theme
from bokeh.io import show, output_notebook
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
from bokeh.models.widgets import Dropdown, Select, MultiSelect, TextInput, Toggle, RadioButtonGroup, Div

from bokeh.sampledata.sea_surface_temperature import sea_surface_temperature

output_notebook()

# the function that handles the map plotting
def plot_isochrone_map(doc):
    from bokeh.tile_providers import STAMEN_TERRAIN_RETINA

    # information that we need to plot and/or visualize as labels
    path_stops = pd.DataFrame(columns = ['x', 'y', 'stop', 'arr_time', 'delta'])
    source = ColumnDataSource(path_stops)
    
    tools = "pan,wheel_zoom,reset,save".split(',')
        
    plot = figure(y_range=(5977000, 6028000), x_range=(960000, 960001),
           x_axis_type="mercator", y_axis_type="mercator", plot_width=950, plot_height=500,
          tools=tools)

    plot.add_tile(STAMEN_TERRAIN_RETINA)

    # to show stops (circles) and connections (lines)
    plot.circle('x', 'y', source=source, line_width=0, radius='delta', alpha=0.4, line_alpha=0)
    
    # a button for showing the current computational state of the algorithm
    computation_state = Div(text="")
    
    # wrapper for calling the algorithm to compute the path from the departure station to the destination
    def compute_plan_wrapper(departure_station, start_time, range_minutes, probability_threshold):
        computation_state.text = "<font size='3'><b>Computation State</b></font>: <font color='orange'><b>WORKING ON</b></font>"

        final_time = start_time + pd.Timedelta(minutes=range_minutes)
        pl = Planner(schedule, pairwise_distances=pairwise_distances, reverse=False)
        reached = pl.compute_isocrone(departure_station, start_time, final_time, probability_threshold)

        if(reached.shape[0] > 0):
            computation_state.text = "<font size='3'><b>Computation State</b></font>: <font color='green'><b>TERMINATED</b></font>"
            
            reached['delta'] = (final_time - reached['arr_time']).map(lambda x: x.days*24*60 + (x.seconds//60) % 60) * 100
            reached = reached.reset_index()
            reached.columns = ['stop', 'arr_day', 'arr_time', 'delta']
            
            stops = reached.merge(metadataPandas, on='stop')
            
            source.data = ColumnDataSource(data=stops).data
        else:
            computation_state.text = "<font size='3'><b>Computation State</b></font>: <font color='red'><b>TERMINATED</b> (no stations found)</font>"

    # callback for setting a new departure station    
    def callback_departure(attr, old, new):
        global real_global_dep
        real_global_dep = new
    
    # callback for setting a new destination
    def callback_destination(attr, old, new):
        global real_global_arr
        real_global_arr = new
    
    # callback for setting a new departure/arrival time
    def callback_time(attr, old, new):
        try:
            datetime.datetime.strptime(new, '%H:%M:%S')
        except ValueError:
            raise ValueError("Incorrect data format, should be hh:mm:ss")
        day = datetime.datetime.today().weekday()
        global real_global_time
#         real_global_time = pd.Timedelta("0 day 13:00:00")
        real_global_time = pd.Timedelta(str(day) + " day " + new)
            
    def callback_time_range(attr, old, new):
        global real_global_time_range
        real_global_time_range = int(new)
        
    # callback for setting a probability threshold
    def callback_threshold(attr, old, new):
        global real_global_threshold
        real_global_threshold = new

    # callback for executing the algorithm
    def callback_execute_algorithm(a):
        computation_state.text = "<font size='3'><b>Computation State</b></font>: <font color='orange'><b>INITIALIZING</b></font>"

        # re-initialize the map
        temp = pd.DataFrame(columns = ['x', 'y', 'stop', 'arrival_time', 'delta'])
        temp.x = [0]
        temp.y = [0]
        temp.stop = ''
        temp.arrival_time = ''
        source.data = ColumnDataSource(temp).data
        
#         if (real_global_dep != None and real_global_arr != None and real_global_time != None and real_global_threshold != None):
        if (real_global_dep != None and real_global_time_range != None and real_global_time != None and real_global_threshold != None):
            compute_plan_wrapper(real_global_dep, real_global_time, real_global_time_range, real_global_threshold)
        else:
            computation_state.text = "<font size='3'><b>Computation State</b></font>: <font color='red'><b>PARAMETERS ERROR</b></font>"

    # define the user interface for executing the algorithm
    stops_in_schedule = np.unique(list(schedule.dep_node.unique()) + list(schedule.arr_node.unique()))
    sorted_stops = sorted(stops_in_schedule)

    # departure station selection menu
    departure = Select(title="Departure Station", options=sorted_stops)
    departure.on_change('value', callback_departure)
    
    
    # providing the arrival/departure time
    text_input = TextInput(value="12:00:00", title="Time")
    text_input.on_change('value', callback_time)
    
     # providing the time for the area of the isochrone map, in minutes
    text_input = TextInput(value="10", title="Time range (minutes)")
    text_input.on_change('value', callback_time_range)
    
    # selecting the probability threshold for allowing paths
    slider = Slider(start=0, end=1, value=0, step=0.1, title="Threshold")
    slider.on_change('value', callback_threshold)
    
    # start the algorithm
    button = Toggle(label="Show!", button_type="success")
    button.on_click(callback_execute_algorithm)
    
    doc.add_root(column(departure, text_input, slider, button, computation_state, plot))
    
handler_isochrone = FunctionHandler(plot_isochrone_map)
app_isochrone = Application(handler_isochrone)

In [94]:
real_global_dep = 'Adliswil'
real_global_time = pd.Timedelta('12:00:00')
real_global_time_range = 5
real_global_threshold = 0

show(app_isochrone, notebook_url="http://10.90.38.21:8821")

In [32]:
pd.DataFrame().shape



(0, 0)