## Import data

In [13]:
import pandas as pd
# Read our station csv files, join them and do some date processing
station_status = pd.read_csv('station_status.csv')
station_information = pd.read_csv('station_information.csv')
stations = station_information.join(
    station_status.reset_index().set_index('station_id'), on='station_id', lsuffix='_information'
)
stations = stations.reset_index()
stations['last_reported'] = pd.to_datetime(stations['last_reported'], unit='s')
stations['last_reported'] = stations['last_reported'].dt.tz_localize('UTC').dt.tz_convert('Europe/Oslo')
stations = stations.sort_values(by=['station_id', 'last_reported'])

## Overall statistics

Let's see how many stations, docks and bikes there are in the system.

In [14]:

# print the average of the total amount of available bikes in the system
total_bikes = stations.groupby('last_reported').sum('num_bikes_available')
print(total_bikes['num_bikes_available'].describe())
print('Average number of parked bikes: ' + str(total_bikes['num_bikes_available'].mean()))
print('Total system capacity: ' + str(station_information['capacity'].sum()))


count    2388.000000
mean     2154.332077
std        68.509592
min      1905.000000
25%      2108.000000
50%      2159.000000
75%      2202.250000
max      2311.000000
Name: num_bikes_available, dtype: float64
Average number of parked bikes: 2154.3320770519263
Total system capacity: 5783


## Graph a random station

In [15]:
# let's pick a random station and graph its available bikes and docks
import numpy as np
import plotly.io as pio
pio.templates.default = "plotly_dark"
import plotly.express as px
import plotly.offline as py

random_station_id = stations['station_id'][np.random.randint(len(stations))]
selected_station = stations[stations['station_id'] == random_station_id]
fig = px.line(
    selected_station, x='last_reported', y=['num_bikes_available', 'num_docks_available'],
)
station_name = selected_station['name'].iloc[0]
fig.update_layout(
    title='Bysykkel station ' + station_name + ' bike and dock availability',
    xaxis_title='Date / Time', yaxis_title='Number', 
    legend_title='Metric',
)
trace_names = {'num_bikes_available': 'Available bikes', 'num_docks_available': 'Available docks'} 
fig.for_each_trace(lambda t: t.update(
        name = trace_names[t.name],
        legendgroup = trace_names[t.name],
        hovertemplate = t.hovertemplate.replace(t.name, trace_names[t.name])
    )
)
fig

## Changes in station bike availability by hour of day

In [16]:
# Graph how often the number of bikes at a station changes by the hour of day
stations['same_station_as_previous_row'] = stations['station_id'] == stations['station_id'].shift(-1)
stations['change_in_bikes'] = stations['same_station_as_previous_row'] * stations['num_bikes_available'] != stations['num_bikes_available'].shift(-1)
changed_by_hour = stations.groupby(stations.last_reported.dt.hour).change_in_bikes.sum()
fig = px.line(
    changed_by_hour, y='change_in_bikes', labels={'change_in_bikes': 'Changes in station bike availability', 'last_reported':'Hour of day'}
)
fig.update_layout(
    title='Changes in station bike availability',
    xaxis_title='Hour of day',
)
fig


## Proportional changes in bike numbers by hour

In [17]:
# Graph the proportion of bike changes that happen in the system by hour of day
stations.change_in_bikes = abs(stations['num_bikes_available'] - stations['num_bikes_available'].shift(-1)) * stations['station_id'] == stations['station_id'].shift(-1)
stations['change_in_bikes'] = stations['same_station_as_previous_row'] * abs(stations['num_bikes_available'] - stations['num_bikes_available'].shift(-1))

# Group by hour and sum the changes
changed_by_hour = stations.groupby(stations.last_reported.dt.hour)['change_in_bikes'].sum()
# Calculate the proportion of changes for each hour
proportion_changes_by_hour = changed_by_hour / changed_by_hour.sum() * 100

fig = px.line(
    proportion_changes_by_hour.round(0), y='change_in_bikes', labels={'change_in_bikes': 'Percentage of bike availability changes<br>', 'last_reported': 'Hour of day'}
)
subtitle = '<br><sup>Based on {0} station statuses from {1} stations between {2} and {3}</sup>'.format(
    stations['last_reported'].nunique(),  stations['station_id'].nunique(), stations['last_reported'].min().strftime('%Y-%m-%d'), stations['last_reported'].max().strftime('%Y-%m-%d')
)
fig.update_layout(title='Proportion of availability changes of bikes'+subtitle)
fig.update_xaxes(tickvals=[0, 4, 8, 12, 16, 20])
fig.write_image('./charts/bysykkel-system-availability-changes-by-hour.png')

fig

## Heatmap of bike and dock availability by hour of day per station

How often will you find an empty bike station? How often will you want to park your bike at a station only to find all the docks full?

In [18]:
# Build an hour by hour heatmap of the proportion of times where there is at least one available bike and dock at a given station 
import plotly.graph_objects as go

stations_heatmap = stations.groupby(
    [stations.name, stations.station_id, stations.last_reported.dt.hour],
).agg(
    proportion_non_zero_bikes=('num_bikes_available', lambda x: (x != 0).mean() * 100), 
    proportion_non_zero_docks=('num_docks_available', lambda x: (x != 0).mean() * 100),
    count=('last_reported', np.count_nonzero),
    start=('last_reported', 'min'),
    end=('last_reported', 'max'),
).round(0)

stations_heatmap.index.names = ['station_name', 'station_id', 'hour_of_day']

# build a graph with the first station
station = stations_heatmap.loc[[stations_heatmap.index.levels[0][0]]]
custom_data = [1] + list(range(6, 25))
fig = go.Figure(data=[
    go.Bar(
        x=station.proportion_non_zero_bikes,
        y=station.index.get_level_values(level=2),
        name='Bikes',
        orientation='h',
        hovertemplate='One or more bikes between %{y}:00 and %{customdata}:00 - %{x}%<extra></extra>',
        customdata=custom_data,
    ), 
    go.Bar(
        x=station.proportion_non_zero_docks,
        y=station.index.get_level_values(level=2),
        name='Docks',
        hovertemplate='One or more docks between %{y}:00 and %{customdata}:00 - %{x}%<extra></extra>',
        orientation='h',
        customdata=custom_data,
    ),
])

title = 'How often is there at least one dock/bike available at OsloBysykkel.no stations<br><sup>Based on {0} station statuses between {1} and {2}</sup>'.format(
    station['count'].sum(), station['start'].iloc[0].strftime('%Y-%m-%d'), station['end'].iloc[0].strftime('%Y-%m-%d')
)
title += '<br><sup>See github.com/kalli/bysykkel-data for more information.</sup>'

fig.update_layout(
    title=title,
    xaxis_title='Percentage of times when there is at least one bike/dock available',
    yaxis_title='Hour of day',
    yaxis=dict(range=[0, 23]),
    barmode='group',
)

# Add an annotation about when the system is closed
annotation = 'The Bysykkel system is closed between 01:00 and 05:00'
fig.add_annotation(
    x=50, y=3, height=50, text=annotation, showarrow=False, 
    bgcolor='#000000',
)
fig['layout']['yaxis']['autorange'] = 'reversed'

fig.update_yaxes(range=[0, 23])
fig.update_yaxes(tickvals=[0, 4, 8, 12, 16, 20])
fig.update_xaxes(range=[0, 100])

# Build a dropdown so that the user can select information for a specific station
all_station_data = []
for station in station_information.itertuples():
    try:
        station_chart = stations_heatmap.loc[station.name]
        station_data = [
            [station_chart.proportion_non_zero_bikes, station_chart.index], 
            [station_chart.proportion_non_zero_docks, station_chart.index]
        ],
        all_station_data.append(
            dict(
                args=[ {'x': [station_chart.proportion_non_zero_bikes, station_chart.proportion_non_zero_docks] } ], 
                label=station.name, 
                method='update',
            )
        )
    except KeyError:
        print(station.name + ' station not found')
all_station_data.sort(key=lambda x: x['label'])
fig.update_layout(
    updatemenus=[
        dict(
            buttons=all_station_data,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
        ),
    ]
)
fig.write_html('./charts/bysykkel-station-hour-by-hour-availability.html')

Skovveien station not found
Linaaes gate station not found
St. Olavs gate station not found
Ullevål sykehus station not found
Majorstuen T-bane station not found
Søndre gate station not found
Hammersborg Torg station not found
Marienlyst station not found
