## Import data

In [282]:
import pandas as pd
# Read our station csv files, join them and do some date processing
station_status = pd.read_csv('station_status.csv')
station_information = pd.read_csv('station_information.csv')
stations = station_information.join(
    station_status.reset_index().set_index('station_id'), on='station_id', lsuffix='_information'
)
stations = stations.reset_index()
stations['last_reported'] = pd.to_datetime(stations['last_reported'], unit='s')
stations['last_reported'] = stations['last_reported'].dt.tz_localize('UTC').dt.tz_convert('Europe/Oslo')
stations = stations.sort_values(by=['station_id', 'last_reported'])

## Graph a random station

In [283]:
# let's pick a random station and graph its available bikes and docks
import numpy as np
import plotly.io as pio
pio.templates.default = "plotly_dark"
import plotly.express as px
import plotly.offline as py

random_station_id = stations['station_id'][np.random.randint(len(stations))]
selected_station = stations[stations['station_id'] == random_station_id]
fig = px.line(
    selected_station, x='last_reported', y=['num_bikes_available', 'num_docks_available'],
)
station_name = selected_station['name'].iloc[0]
fig.update_layout(
    title='Bysykkel station ' + station_name + ' bike and dock availability',
    xaxis_title='Date / Time', yaxis_title='Number', 
    legend_title='Metric',
)
trace_names = {'num_bikes_available': 'Available bikes', 'num_docks_available': 'Available docks'} 
fig.for_each_trace(lambda t: t.update(
        name = trace_names[t.name],
        legendgroup = trace_names[t.name],
        hovertemplate = t.hovertemplate.replace(t.name, trace_names[t.name])
    )
)
fig

## Changes in station bike availability by hour of day

In [284]:
# Graph the number of times the number of bikes at a station changes by the hour of day
stations['same_station_as_previous_row'] = stations['station_id'] == stations['station_id'].shift(-1)
stations['change_in_bikes'] = stations['same_station_as_previous_row'] * stations['num_bikes_available'] != stations['num_bikes_available'].shift(-1)
changed_by_hour = stations.groupby(stations.last_reported.dt.hour).change_in_bikes.sum()
fig = px.line(
    changed_by_hour, y='change_in_bikes', labels={'change_in_bikes': 'Changes in station bike availability', 'last_reported':'Hour of day'}
)
fig.update_layout(
    title='Changes in station bike availability',
    xaxis_title='Hour of day',
)
fig


In [285]:
## Absolute changes in bike numbers by hour

In [286]:
# Graph the absolute change in number of bikes at a station by the hour of day
stations.change_in_bikes = abs(stations['num_bikes_available'] - stations['num_bikes_available'].shift(-1)) * stations['station_id'] == stations['station_id'].shift(-1)
stations['change_in_bikes'] = stations['same_station_as_previous_row'] * abs(stations['num_bikes_available'] - stations['num_bikes_available'].shift(-1))

changed_by_hour = stations.groupby(stations.last_reported.dt.hour).change_in_bikes.sum()
fig = px.line(
    changed_by_hour, y='change_in_bikes', labels={'change_in_bikes': 'Number of bike availability changes', 'last_reported': 'Hour of day'}
)
fig.update_layout(title='Absolute change in availability of bikes')
fig

In [287]:
# Build an hour by hour heatmap of number of available bikes and docks 
import plotly.graph_objects as go

stations_heatmap = stations.groupby(
    [stations.station_id, stations.last_reported.dt.hour],
).agg(
    mean_available_bikes=('num_bikes_available', 'mean'), 
    mean_available_docks=('num_docks_available', 'mean'),
).round(0)
stations_heatmap.index.names = ['station_id', 'hour_of_day']

station = stations_heatmap.loc[[random_station_id]]
custom_data = np.r_[1:24, 0:1]
fig = go.Figure(data=[
    go.Bar(
        x=station.mean_available_bikes,
        y=station.index.get_level_values(level=1),
        name='Average available bikes',
        orientation='h',
        hovertemplate='Average number of bikes between %{y}:00 and %{customdata}:00 - %{x}<extra></extra>',
        customdata=custom_data,
    ), 
    go.Bar(
        x=station.mean_available_docks,
        y=station.index.get_level_values(level=1),
        name='Average available docks',
        hovertemplate='Average number of bikes between %{y}:00 and %{customdata}:00 - %{x}<extra></extra>',
        orientation='h',
        customdata=custom_data,

    ),
])
fig.update_layout(
    title='Average dock and bike availability at '+ selected_station['name'].iloc[0],
    xaxis_title='Count',
    yaxis_title='Hour of day',
    yaxis=dict(range=[0, 23]),
    barmode='stack',
)
capacity = selected_station['capacity'].iloc[0]
annotation = 'The Bysykkel system is closed between 01:00 and 05:00'
fig.add_annotation(
    x=(capacity/2), y=3, yanchor='auto', height=50, text=annotation, showarrow=False, 
    bgcolor='#000000',
)
fig['layout']['yaxis']['autorange'] = 'reversed'

fig.update_yaxes(range=[0, 23])
fig.update_yaxes(tickvals=[0, 4, 8, 12, 16, 20])

fig

In [288]:
# Build an hour by hour, day by day heatmap of number of available bikes and docks 
# def percentile(n):
#     def percentile_(x):
#         return np.percentile(x, n)
#     percentile_.__name__ = 'percentile_%s' % n
#     return percentile_

# stations_heatmap = stations.groupby(
#     [stations.station_id, stations.last_reported.dt.day_of_week, stations.last_reported.dt.hour],
# ).agg(
#     mean_available_bikes=('num_bikes_available', 'mean'), 
#     mean_available_docks=('num_docks_available', 'mean')
# )
# stations_heatmap.index.names = ['station_id', 'day_of_week', 'hour_of_day']

# station = stations_heatmap.loc[[617]]
# station
