# Statisctical information, mean and std : 

This notebook computes the mean, std for the arrival time for each train_type, hour and station. Note, this notebook takes approximately 20 minutes to run.  

### Set up spark:

In [1]:
%%configure
{"conf": {
    "spark.app.name": "dslab-group_final"
}}

ID,YARN Application ID,Kind,State,Spark UI,Driver log,Current session?
7933,application_1589299642358_2451,pyspark,idle,Link,Link,
7978,application_1589299642358_2498,pyspark,idle,Link,Link,
7987,application_1589299642358_2508,pyspark,idle,Link,Link,
7992,application_1589299642358_2514,pyspark,idle,Link,Link,
7994,application_1589299642358_2517,pyspark,idle,Link,Link,
7998,application_1589299642358_2521,pyspark,busy,Link,Link,
8002,application_1589299642358_2525,pyspark,idle,Link,Link,
8004,application_1589299642358_2527,pyspark,idle,Link,Link,
8008,application_1589299642358_2531,pyspark,busy,Link,Link,
8012,application_1589299642358_2535,pyspark,idle,Link,Link,


In [2]:
%%local
import ipywidgets as widgets
import pandas as pd
import fuzzy_pandas as fpd

In [3]:
%%local
import os
username = os.environ['JUPYTERHUB_USER']

In [None]:
%%send_to_spark -i username -t str -n username

Starting Spark application


In [None]:
stops_zurich = spark.read.format('orc').load("/user/{}/nodes.orc".format(username))\
                                        .select('stop_name').distinct()

In [None]:
%%spark -o stops_zurich

In [None]:
%%local
widgets.IntSlider(
    min=0,
    max=10,
    step=1,
    description='Slider:',
    value=3
)

In [None]:
%%local
widgets.ToggleButtons(
    options=['Slow', 'Regular', 'Fast'],
    description='Speed:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Description of slow', 'Description of regular', 'Description of fast'],
#     icons=['check'] * 3
)

In [None]:
%%local
widgets.DatePicker(
    description='Pick a Date',
    disabled=False
)

In [None]:
%%local
widgets.Button(
    description='Click me',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)

In [None]:
%%local
search_text = widgets.Text(description = 'Search') 
search_result = widgets.Select(description = 'Select table')

def search_action(sender):
    phrase = search_text.value
    search_result.options=['Test', phrase]
        
search_text.observe(search_action)

In [None]:
%%local
search_text

In [None]:
%%local
search_result

In [None]:
%%local
test = pd.DataFrame(['lau'], columns=['station'])

print(test)

findings = fpd.fuzzy_merge(test, stops_zurich, left_on='station', right_on='stop_name',
               ignore_case=True, ignore_nonalpha=True, ignore_nonlatin=True, keep='match', threshold=0.9, method='jaro')
findings['stop_name'].to_list()

In [None]:
%%local
def search_station(station):
    search = pd.DataFrame([station], columns=['station'])
    matches = fpd.fuzzy_merge(search, stops_zurich, left_on='station', right_on='stop_name',
                              ignore_case=True, ignore_nonalpha=True, ignore_nonlatin=True, ignore_order_words=True,
                              keep='match', threshold=0.8, method='jaro')
    return matches['stop_name'].to_list()

In [None]:
%%local
def search_station_departure(sender):
    phrase = depart_station.value
    depart_proposals.options = search_station(phrase)

In [None]:
%%local
def search_station_arrival(sender):
    phrase = arrive_station.value
    arrive_proposals.options = search_station(phrase)

In [None]:
    if(sender['name'] == 'label' and sender['old'] != None):
        print('#### VALUE CHANGED ####')
        print(sender.keys())
        print()
        print()
        print("name")
        print(sender['name'])
        print()
        print()
        print("old")
        print(sender['old'])
        print()
        print()
        print("new")
        print(sender['new'])
        print()
        print()
        print("owner")
        print(sender['owner'])
        print()
        print()
        print("type")
        print(sender['type'])
        print('#### END VALUE CHANGED ####')
        print()
        print()
        print()
        print()
        print()

In [None]:
%%local
no_station_selected = "None selected"

In [None]:
%%local
def select_station_departure(sender):
    if(sender['name'] == 'label'):
        if(sender['new'] == None):
            selected_depart_station.value = no_station_selected
        else:
            selected_depart_station.value = sender['new']

In [None]:
%%local
def select_station_arrival(sender):
    if(sender['name'] == 'label'):
        if(sender['new'] == None):
            selected_arrival_station.value = no_station_selected
        else:
            selected_arrival_station.value = sender['new']

In [None]:
%%local
def find_route_button(button):
    depart_station = selected_depart_station.value
    if depart_station == no_station_selected:
        report_error("No departure station selected")
        return
    
    arrive_station = selected_arrival_station.value
    if arrive_station == no_station_selected:
        report_error("No arrival station selected")
        return
    
    date = date_picker.value
    if(date != None)
        report_error("No date selected")
        return
    
    if(date.weekday() > 4)
        report_error("Date is a weekend day, please select a week day")
        return
    
    hour_str = hour_picker.value.split(':')
    hour = -1
    minute = -1
    
    try:
        if(len(hour_str) != 2):
            raise Error
        hour = int(hour_str[0])
        minute = int(hour_str[1])
        
        if(hour not in range(8,21)):
            raise Error
            
        if(minute not in range(0,60)):
            raise Error
        
    except:
        report_error("Invalid hour format, use HH:MM")
        return

In [None]:
%%local
def report_error(error_message):
     error.value = "<b style='color:red;'>Error: " + error_message  + "</b>"

In [None]:
%%local
style = {'description_width': 'initial'}

# Search station
depart_station = widgets.Text(description = 'Search departure station',
                              layout=widgets.Layout(width='40%'),
                              style=style)
depart_station.observe(search_station_departure)
arrive_station = widgets.Text(description = 'Search arrival station',
                              layout=widgets.Layout(width='40%'),
                              style=style)
arrive_station.observe(search_station_arrival)


# Proposals
depart_proposals = widgets.Select(description = 'Found stations',
                                  layout=widgets.Layout(width='40%', height='200px'),
                                  style=style)
depart_proposals.observe(select_station_departure)
arrive_proposals = widgets.Select(description = 'Found stations',
                                  layout=widgets.Layout(width='40%', height='200px'),
                                  style=style)
arrive_proposals.observe(select_station_arrival)


# Stations
selected_depart_station = widgets.Label(value = no_station_selected, style=style)
selected_box_depart_station = widgets.HBox([widgets.Label(value = "Selected depart station: ", style=style),
                                             selected_depart_station], layout=widgets.Layout(width='40%'))
selected_arrival_station = widgets.Label(value = no_station_selected, style=style)
selected_box_arrival_station = widgets.HBox([widgets.Label(value = "Selected arrival station: ", style=style),
                                             selected_arrival_station], layout=widgets.Layout(width='40%'))



# Options
date_picker = widgets.DatePicker(
                    description='Pick a Date',
                    disabled=False,
                    layout=widgets.Layout(width='20%')
                )
hour_picker = widgets.Text(description = 'Arrival hour',
                            placeholder='HH:MM',
                            layout=widgets.Layout(width='20%'),
                            style=style
                          )
confidence_picker = widgets.IntSlider(
            value=90,
            min=0,
            max=99,
            step=1,
            description='Confidence:',
            disabled=False,
            continuous_update=False,
            orientation='horizontal',
            readout=True,
            readout_format='d',
            layout=widgets.Layout(width='25%'),
            style=style
        )
search_button = widgets.Button(
            description='Find route',
            disabled=False,
            button_style='', # 'success', 'info', 'warning', 'danger' or ''
            tooltip='Find route',
            icon='check', # (FontAwesome names without the `fa-` prefix)
            layout=widgets.Layout(width='15%')
        )
search_button.on_click(find_route_button)


# Error
error = widgets.HTML(value="")

In [None]:
%%local
stations = widgets.HBox([depart_station, arrive_station])
proposals = widgets.HBox([depart_proposals, arrive_proposals])
selected_stations = widgets.HBox([selected_box_depart_station, selected_box_arrival_station])
options = widgets.HBox([date_picker, hour_picker, confidence_picker, search_button])
layout = widgets.VBox([stations, proposals, selected_stations, options, error])

In [None]:
%%local
layout

In [None]:
%%local
report_error("Test")

In [None]:
%%local
date_picker.value.weekday()

In [None]:
%%local
date_picker.value