# Statisctical information, mean and std : 

This notebook computes the mean, std for the arrival time for each train_type, hour and station. Note, this notebook takes approximately 20 minutes to run.  

### Set up spark:

In [1]:
%%configure
{"conf": {
    "spark.app.name": "dslab-group_final"
}}

ID,YARN Application ID,Kind,State,Spark UI,Driver log,Current session?
7933,application_1589299642358_2451,pyspark,idle,Link,Link,
7992,application_1589299642358_2514,pyspark,idle,Link,Link,
7994,application_1589299642358_2517,pyspark,idle,Link,Link,
8002,application_1589299642358_2525,pyspark,idle,Link,Link,
8008,application_1589299642358_2531,pyspark,busy,Link,Link,
8018,application_1589299642358_2541,pyspark,idle,Link,Link,
8021,application_1589299642358_2544,pyspark,idle,Link,Link,
8027,application_1589299642358_2548,pyspark,idle,Link,Link,
8033,application_1589299642358_2553,pyspark,busy,Link,Link,
8037,application_1589299642358_2557,pyspark,busy,Link,Link,


In [2]:
%%local
import ipywidgets as widgets
import pandas as pd
import fuzzy_pandas as fpd

In [3]:
%%local
import time
import os
username = os.environ['JUPYTERHUB_USER']

In [4]:
%%send_to_spark -i username -t str -n username

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log,Current session?
8069,application_1589299642358_2593,pyspark,idle,Link,Link,✔


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

SparkSession available as 'spark'.


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

Successfully passed 'username' as 'username' to Spark kernel

In [5]:
stops_zurich = spark.read.format('orc').load("/user/{}/nodes.orc".format(username))\
                                        .select('stop_name').distinct()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [6]:
%%spark -o stops_zurich

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [7]:
%%local
example_graph = [['Lausanne', 'Berne', {'duration': 100, 'time': 500}],
                 ['Berne', 'Lucerne', {'duration': 200, 'time': 600}],
                 ['Lucerne', 'Zürich', {'duration': 50, 'time': 800}]]

In [8]:
%%local
def search_station(station):
    search = pd.DataFrame([station], columns=['station'])
    matches = fpd.fuzzy_merge(search, stops_zurich, left_on='station', right_on='stop_name',
                              ignore_case=True, ignore_nonalpha=True, ignore_nonlatin=True, ignore_order_words=True,
                              keep='match', threshold=0.8, method='jaro')
    return matches['stop_name'].to_list()

In [9]:
%%local
def search_station_departure(sender):
    phrase = depart_station.value
    depart_proposals.options = search_station(phrase)

In [10]:
%%local
def search_station_arrival(sender):
    phrase = arrive_station.value
    arrive_proposals.options = search_station(phrase)

In [11]:
%%local
no_station_selected = "None selected"

In [12]:
%%local
def select_station_departure(sender):
    if(sender['name'] == 'label'):
        if(sender['new'] == None):
            selected_depart_station.value = no_station_selected
        else:
            selected_depart_station.value = sender['new']

In [13]:
%%local
def select_station_arrival(sender):
    if(sender['name'] == 'label'):
        if(sender['new'] == None):
            selected_arrival_station.value = no_station_selected
        else:
            selected_arrival_station.value = sender['new']

In [26]:
%%local
def find_route_button(button):
    # Parse arguments
    depart_station = selected_depart_station.value
    if depart_station == no_station_selected:
        report_error("No departure station selected")
        return
    
    arrive_station = selected_arrival_station.value
    if arrive_station == no_station_selected:
        report_error("No arrival station selected")
        return
    
    date = date_picker.value
    if(date == None):
        report_error("No date selected")
        return
    
    if(date.weekday() > 4):
        report_error("Date is a weekend day, please select a week day")
        return
    
    hour_str = hour_picker.value
    
    if hour_str == None or hour_str == "":
        report_error("No hour selected")
        return
    
    hour_str = hour_str.split(':')
    hour = -1
    minute = -1
    
    try:
        if(len(hour_str) != 2):
            raise Error
        hour = int(hour_str[0])
        minute = int(hour_str[1])        
    except:
        report_error("Invalid hour format, use HH:MM")
        return
    
    if(hour not in range(8,21)):
        report_error("Invalid hour, valid range: [8,20]")
        return
            
    if(minute not in range(0,60)):
        report_error("Invalid minute, valid range: [0,59]")
        return
        
    confidence = confidence_picker.value
        
    report_error(None)
    
    
    # Calculate path
    results.children = []
    progress_bar.layout = widgets.Layout(display='block')
    path = example_graph # Change to actual path (run on cluster)
    time.sleep(2)
    
    
    # Display path
    progress_bar.layout = widgets.Layout(display='none')
    display_path(path)

In [27]:
%%local
def display_path(path):
    stops = []
    
    for edge in path:
        start = edge[0]
        end = edge[1]
        time = edge[2]['time']
        duration = edge[2]['duration']
        
        stops.append(widgets.HTML(value=f"t{time} {start} => t{time+duration} {end}"))
        
    results.children = stops

In [28]:
%%local
def report_error(error_message):
    if error_message == None:
        error.value = ""
    else:
        error.value = "<b style='color:red;'>Error: " + error_message  + "</b>"

In [32]:
%%local
style = {'description_width': 'initial'}

# Search station
depart_station = widgets.Text(description = 'Search departure station',
                              layout=widgets.Layout(width='40%'),
                              style=style)
depart_station.observe(search_station_departure)
arrive_station = widgets.Text(description = 'Search arrival station',
                              layout=widgets.Layout(width='40%'),
                              style=style)
arrive_station.observe(search_station_arrival)


# Proposals
depart_proposals = widgets.Select(description = 'Found stations',
                                  layout=widgets.Layout(width='40%', height='200px'),
                                  style=style)
depart_proposals.observe(select_station_departure)
arrive_proposals = widgets.Select(description = 'Found stations',
                                  layout=widgets.Layout(width='40%', height='200px'),
                                  style=style)
arrive_proposals.observe(select_station_arrival)


# Stations
selected_depart_station = widgets.Label(value = no_station_selected, style=style)
selected_box_depart_station = widgets.HBox([widgets.Label(value = "Selected depart station: ", style=style),
                                             selected_depart_station], layout=widgets.Layout(width='40%'))
selected_arrival_station = widgets.Label(value = no_station_selected, style=style)
selected_box_arrival_station = widgets.HBox([widgets.Label(value = "Selected arrival station: ", style=style),
                                             selected_arrival_station], layout=widgets.Layout(width='40%'))



# Options
date_picker = widgets.DatePicker(
                    description='Pick a Date',
                    disabled=False,
                    layout=widgets.Layout(width='20%')
                )
hour_picker = widgets.Text(description = 'Arrival time',
                            placeholder='HH:MM',
                            layout=widgets.Layout(width='20%'),
                            style=style
                          )
confidence_picker = widgets.IntSlider(
            value=90,
            min=0,
            max=99,
            step=1,
            description='Confidence:',
            disabled=False,
            continuous_update=False,
            orientation='horizontal',
            readout=True,
            readout_format='d',
            layout=widgets.Layout(width='25%'),
            style=style
        )
search_button = widgets.Button(
            description='Find route',
            disabled=False,
            button_style='', # 'success', 'info', 'warning', 'danger' or ''
            tooltip='Find route',
            icon='check', # (FontAwesome names without the `fa-` prefix)
            layout=widgets.Layout(width='15%')
        )
search_button.on_click(find_route_button)


# Error
error = widgets.HTML(value="")


padding = widgets.HTML(value="", layout=widgets.Layout(height='50px'))

# Progress bar
progress_bar = widgets.HTML(value="Finding best route...", layout=widgets.Layout(display='none'))

# Result
results = widgets.VBox([])

In [33]:
%%local
stations = widgets.HBox([depart_station, arrive_station])
proposals = widgets.HBox([depart_proposals, arrive_proposals])
selected_stations = widgets.HBox([selected_box_depart_station, selected_box_arrival_station])
options = widgets.HBox([date_picker, hour_picker, confidence_picker, search_button])
layout = widgets.VBox([stations, proposals, selected_stations, options, error, padding, progress_bar, results])

In [34]:
%%local
layout

VBox(children=(HBox(children=(Text(value='', description='Search departure station', layout=Layout(width='40%'…