In [2]:
import dash
from dash.dependencies import Input, Output
import dash_html_components as html
import dash_core_components as dcc
import geopandas as gpd
import jupyterlab_dash
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import requests

In [2]:
arrival_times = pd.read_csv('paralel_arrival_times_consecutive.csv', parse_dates=['arrival_time'])
arrival_times.head()

Unnamed: 0,cod_issue,cod_stop,cod_line,eta_date,arrival_time
0,8__622____2_17:00:00_2_-__17_8__622___,8_4311,8__622___,15,2020-02-15 17:45:53+01:00
1,5308413,8_1333,8__653___,15,2020-02-15 17:46:15+01:00
2,5308274,8_10577,8__562___,15,2020-02-15 17:46:15+01:00
3,8__661____2_17:45:00_1_-__21_8__661___,8_1338,8__661___,15,2020-02-15 17:46:20+01:00
4,5308386,8_08458,8__564___,15,2020-02-15 17:46:27+01:00


In [3]:
 def get_trip_times(cod_stop_from='8_1336', cod_stop_to = '8_06277'):
    trip_times_array = []
    #arrival_times_from = arrival_times.query("cod_line == '8__658___'")
    selected_arrival_times = arrival_times.query("cod_stop == '{cod_stop_from}' | cod_stop == '{cod_stop_to}'".format(cod_stop_from=cod_stop_from, cod_stop_to=cod_stop_to))

    selected_arrival_times_grouped = selected_arrival_times.groupby(['cod_line', 'cod_issue', 'eta_date'])

    for name, group in selected_arrival_times_grouped:
        if (len(group)==2):
            arrival_from = group.iloc[0]
            arrival_to = group.iloc[1]
            if (
                arrival_from['cod_stop'] == cod_stop_from and
                arrival_to['cod_stop'] == cod_stop_to
                ):
                departure_time = arrival_from['arrival_time']
                arrival_time = arrival_to['arrival_time']
                trip_time = arrival_time - departure_time
                if (arrival_from['cod_line'] == arrival_to['cod_line']):
                    cod_line = arrival_from['cod_line']
                else:
                    print("from: " + str(arrival_from['cod_line']) + " to: " + arrival_to['cod_line'])
                    continue

                trip_times_array.append([
                                         name[0],
                                         departure_time,
                                         arrival_time,
                                         trip_time,
                                         cod_line
                                         ])
        elif (len(group)>2):
            print("Error: repeated cod_issue")
            print(group)

    selected_arrival_times['cod_issue'].value_counts()

    trip_times = pd.DataFrame(trip_times_array, columns=[
                                                         'cod_issue',
                                                         'departure_time',
                                                         'arrival_time',
                                                         'trip_time',
                                                         'cod_line'
                                                         ])
    trip_times = trip_times[trip_times['trip_time'] < pd.Timedelta(2, unit='h')]
    return trip_times

In [4]:
get_trip_times()['cod_line'].value_counts()

Error: repeated cod_issue
       cod_issue cod_stop   cod_line  eta_date              arrival_time
674655   5307639   8_1336  8__656___         7 2020-03-07 10:15:03+01:00
674915   5307639  8_06277  8__656___         7 2020-03-07 10:26:59+01:00
675222   5307639  8_06277  8__656___         7 2020-03-07 10:39:00+01:00
Error: repeated cod_issue
       cod_issue cod_stop   cod_line  eta_date              arrival_time
450801   5308363   8_1336  8__656___        29 2020-02-29 17:31:18+01:00
451029   5308363  8_06277  8__656___        29 2020-02-29 17:42:14+01:00
451263   5308363  8_06277  8__656___        29 2020-02-29 17:51:00+01:00


8__656___     1223
8__658___      602
8_N_901___      41
Name: cod_line, dtype: int64

In [5]:
file_path = 'm8_Estaciones.json'

if not os.path.exists(file_path):
  url = 'https://opendata.arcgis.com/datasets/19884a02ac044270b91fa478d80f7858_0.csv?outSR=%7B%22latestWkid%22%3A25830%2C%22wkid%22%3A25830%7D'
  r = requests.get(url)
  with open(file_path, 'w') as f:
    f.write(r.content.decode("utf-8"))

with open(file_path, 'r') as f:
  est = pd.read_csv(f)

In [6]:
file_path = 'm8_Estaciones.geojson'

if not os.path.exists(file_path):
  url = 'https://opendata.arcgis.com/datasets/19884a02ac044270b91fa478d80f7858_0.geojson'
  r = requests.get(url)
  with open(file_path, 'w') as f:
    f.write(r.content.decode("utf-8"))

est_geojson = gpd.read_file(file_path)
selected_est_geojson = est_geojson[est_geojson['IDESTACION']
                                       .isin(arrival_times['cod_stop'].unique())]

In [7]:
selected_est_geojson

Unnamed: 0,OBJECTID,IDESTACION,FECHAACTUAL,MODO,CODIGOESTACION,DENOMINACION,OBSERVACIONES,SITUACION,CODIGOCTMESTACIONREDMETRO,CODIGOEMPRESA,...,FECHAFIN,X,Y,GRADOACCESIBILIDAD,SITUACIONCALLE,DENOMINACION_SAE,INTERURBANOS_CODIGOEMT_CRTM,INTERURBANOS_CODIGOEMT_EMPRESA,LINEAS,geometry
45,8046,8_3840,20190912,8,3840,AV.OSA MAYOR-PLEYADES,,S,,,...,,433596,4479218,N,,AV.OSA MAYOR-PLEYADES,,3840,"656, N906",POINT (-3.78449 40.45910)
50,8051,8_4789,20190912,8,4789,GOLONDRINA-VELETA,,S,,,...,,433677,4478845,N,,GOLONDRINA-VELETA,,4789,657,POINT (-3.78349 40.45575)
51,8052,8_4790,20190912,8,4790,GOLONDRINA-GTA.CIRILO MARTIN MARTIN,,S,,,...,,433833,4478845,N,,GOLONDRINA-GTA.CIRILO MARTIN MARTIN,,4790,657,POINT (-3.78165 40.45576)
154,8155,8_20349,20190912,8,20349,BADAJOZ-PALENCIA,,S,,,...,,429123,4474582,N,,BADAJOZ-PALENCIA,,,656A,POINT (-3.83672 40.41697)
155,8156,8_50018,20190912,8,50018,FERNANDO LAZARO CARRETER-CARLOS GARDIEL,,S,,,...,,432722,4479330,N,,FERNANDO LAZARO CARRETER-CARLOS GARDIEL,,,656A,POINT (-3.79479 40.46005)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8023,7531,8_15646,20190912,8,15646,AV.CASABLANCA-DIEGO DE VELÁZQUEZ,,S,,,...,,433098,4472700,T,,AV.CASABLANCA-DIEGO DE VELÁZQUEZ,,,"658, N902",POINT (-3.78967 40.40035)
8027,7535,8_15760,20190912,8,15760,VALLADOLID-OVIEDO,,S,,,...,,429495,4474344,T,,VALLADOLID-OVIEDO,,,656A,POINT (-3.83231 40.41486)
8043,7551,8_10585,20190912,8,10585,ANA TERESA-PLEYADES,,S,,,...,,433583,4479460,T,,ANA TERESA-PLEYADES,,,"656, 657, 656A, N901",POINT (-3.78467 40.46128)
8107,7615,8_10502,20190912,8,10502,CºHUERTAS-COLEGIO,,S,,,...,,430782,4476995,T,,CºHUERTAS-COLEGIO,,,"562, 564, 656A",POINT (-3.81743 40.43885)


In [8]:
def get_stop_name(cod_stop):
    stop_row = est[est['IDESTACION'] == cod_stop].iloc[0]
    return str(stop_row['DENOMINACION'] + " (" + str(stop_row['CODIGOESTACION']) + ")")

In [9]:
viewer = jupyterlab_dash.AppViewer()
app = dash.Dash(__name__)

cod_stops = sorted(arrival_times['cod_stop'].unique())

app.layout = html.Div(
    html.Div([
        dcc.Dropdown(
            id='from-dropdown',
            options=[{'label': get_stop_name(cod_stop), 'value': cod_stop} for cod_stop in cod_stops],
            value='8_1336'
        ),
        dcc.Dropdown(
            id='to-dropdown',
            options=[{'label': get_stop_name(cod_stop), 'value': cod_stop} for cod_stop in cod_stops],
            value='8_06277'
        ),
        dcc.Graph(id="boxplot"),
        dcc.Graph(id="map")
    ])
)

@app.callback(Output("boxplot", "figure"), [Input('from-dropdown', 'value'),
                                          Input('to-dropdown', 'value')])
def make_figure(cod_stop_from, cod_stop_to):
    trip_times = get_trip_times(cod_stop_from, cod_stop_to)
    if (trip_times.size <= 0):
        return 0
    else:
        return px.box(x=trip_times['departure_time'].dt.hour,
                                        y=trip_times['trip_time'].astype('timedelta64[s]')/60,
                                        color=trip_times['cod_line'],
                                        points="all",
                                        hover_data=[
                                            trip_times['departure_time'].dt.day_name(),
                                            trip_times['departure_time'].dt.minute,
                                            trip_times['departure_time'].dt.day
                                            ],
                                        title="Trip time through the day"
                                       )
    
    
@app.callback(Output("map", "figure"), [Input('from-dropdown', 'value'),
                                          Input('to-dropdown', 'value')])
def make_figure(cod_stop_from, cod_stop_to):
    us_cities = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/us-cities-top-1k.csv")

    fig = px.scatter_mapbox(
                            selected_est_geojson,
                            lat=selected_est_geojson['geometry'].y,
                            lon=selected_est_geojson['geometry'].x,
                            hover_data=['IDESTACION'],
                            height=300
                            )
    selected_est_geojson_from = selected_est_geojson[
                                selected_est_geojson['IDESTACION'] == cod_stop_from].iloc[0]
    selected_est_geojson_to = selected_est_geojson[
                                selected_est_geojson['IDESTACION'] == cod_stop_to].iloc[0]

    
    fig.add_trace(
        go.Scattermapbox(
            mode='markers',
            lat=[selected_est_geojson_from['geometry'].y],
            lon=[selected_est_geojson_from['geometry'].x],
            marker=go.scattermapbox.Marker(
                size=17,
                color='rgb(255, 0, 0)',
                opacity=0.7
            ),
            hovertext = "From: " + cod_stop_from,
            hoverinfo = 'text',
            showlegend=False
        )
    )
    fig.add_trace(
        go.Scattermapbox(
            mode='markers',
            lat=[selected_est_geojson_to['geometry'].y],
            lon=[selected_est_geojson_to['geometry'].x],
            marker=go.scattermapbox.Marker(
                size=17,
                color='rgb(255, 0, 0)',
                opacity=0.7
            ),
            hovertext = "To: " + cod_stop_to,
            hoverinfo = 'text',
            showlegend=False
        )
    )
    fig
    fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    return fig


viewer.show(app)

## TODO

* Label axis
* Plot map with shape and stops

In [28]:
arrival_times.query("cod_line == '8__656___' & cod_stop == '8_23' & eta_date == 6").tail(50)

Unnamed: 0,cod_issue,cod_stop,cod_line,eta_date,arrival_time
648893,5306605,8_23,8__656___,6,2020-03-06 14:48:54+01:00
649117,5306618,8_23,8__656___,6,2020-03-06 14:54:45+01:00
649528,5306624,8_23,8__656___,6,2020-03-06 15:03:32+01:00
650429,5306629,8_23,8__656___,6,2020-03-06 15:25:13+01:00
650635,5306642,8_23,8__656___,6,2020-03-06 15:30:00+01:00
651061,5306733,8_23,8__656___,6,2020-03-06 15:40:00+01:00
651501,5306799,8_23,8__656___,6,2020-03-06 15:50:00+01:00
651661,5306975,8_23,8__656___,6,2020-03-06 15:54:30+01:00
652090,5307077,8_23,8__656___,6,2020-03-06 16:03:51+01:00
652663,5307195,8_23,8__656___,6,2020-03-06 16:17:59+01:00
