In [13]:
import pandas as pd
import numpy as np
from plotnine import *
import plotly.express as px
from mizani.formatters import comma_format, percent_format
from tqdm.notebook import tqdm
from datetime import datetime
import pytz
import requests
import json
import os
from IPython.display import display, clear_output
import cv2

pd.options.display.max_columns = 500
%load_ext blackcellmagic

The blackcellmagic extension is already loaded. To reload it, use:
  %reload_ext blackcellmagic


### Using the PubliBike API using `requests`

Lesson of the day: Read documentation before you start investing time into something that you would have learned does not actually work because of rate limits.

In [37]:
response = requests.get("https://api.publibike.ch/v1/public/partner/stations")
content = response.json()

In [38]:
stations = pd.json_normalize(content["stations"], sep="_").assign(
    timestamp=pd.to_datetime(datetime.now().strftime("%Y-%m-%d %H:%M"))
)
# get datetime col to front
stations = pd.concat([stations["timestamp"], stations.drop("timestamp", axis=1)], axis=1)

stations.head()

Unnamed: 0,timestamp,id,latitude,longitude,name,address,zip,city,vehicles,sponsors,is_virtual_station,capacity,state_id,state_name,network_id,network_name,network_background_img,network_logo_img,network_sponsors
0,2023-10-17 19:15:00,841,46.938447,7.480803,Wittigkofen,Jupiterstrasse 1,3015,Bern,"[{'id': 4507, 'name': '503043', 'ebike_battery...",[],True,80,1,Active,5,Bern,,https://www.publibike.ch/static-content/Netz5.svg,[]
1,2023-10-17 19:15:00,629,46.009397,8.931591,Centro Giovanile,Via Dott. Giov. Polar. 34,6932,Breganzona,[],[],True,80,3,Active (empty),3,Sottoceneri (TI),,,[]
2,2023-10-17 19:15:00,540,47.403822,8.546464,Berninaplatz,Berninastrasse 45,8057,Zürich,"[{'id': 3923, 'name': '103291', 'ebike_battery...",[],True,22,1,Active,6,Zürich,,https://www.publibike.ch/static-content/Netz6.svg,[]
3,2023-10-17 19:15:00,169,46.42238,6.258527,"Gland, Les Tuillières",Rue des Tuillières 1,1196,Gland,"[{'id': 1309, 'name': '101080', 'ebike_battery...",[],True,24,1,Active,9,Région de Nyon,,https://www.publibike.ch/static-content/Netz9.svg,[]
4,2023-10-17 19:15:00,839,46.528778,6.614258,Parc de Valency,Av. d'Echallens,1004,Lausanne,"[{'id': 5938, 'name': '503805', 'ebike_battery...","[{'id': 11, 'name': 'Sponsor 11', 'image': 'ht...",True,21,1,Active,2,Lausanne-Morges,,,[]


In [39]:
bike_df_list = []
sponsor_df_list = []

for station_id, vehicle_info, sponsor_info in zip(
    stations["id"], stations["vehicles"], stations["sponsors"]
):
    # Expand the bike information into separate df to large back onto the main df later
    bike_df = pd.json_normalize(vehicle_info, sep="_").assign(station_id=station_id)
    bike_df_list.append(bike_df)

    # same for the sponsors
    sponsor_df = pd.json_normalize(sponsor_info, sep="_").assign(station_id=station_id)
    sponsor_df_list.append(sponsor_df)

In [40]:
bikes = pd.concat(bike_df_list).rename(columns={"id": "bike_id", "name": "bike_name"})
bikes.head()

Unnamed: 0,bike_id,bike_name,ebike_battery_level,type_id,type_name,station_id
0,4507.0,503043,17.0,2.0,E-Bike,841
1,1064.0,101493,,1.0,Bike,841
2,1131.0,500461,21.0,2.0,E-Bike,841
3,4033.0,502603,55.0,2.0,E-Bike,841
4,4884.0,500543,19.0,2.0,E-Bike,841


In [41]:
sponsors = pd.concat(sponsor_df_list).rename(columns={"id": "sponsor_id", "name": "sponsor_name"})
sponsors

Unnamed: 0,station_id,sponsor_id,sponsor_name,image,url
0,839,11.0,Sponsor 11,https://www.publibike.ch/static-content/Sponso...,
0,83,22.0,Sponsor 22,https://www.publibike.ch/static-content/Sponso...,
0,647,26.0,Sponsor 26,https://www.publibike.ch/static-content/Sponso...,
0,18,14.0,Sponsor 14,https://www.publibike.ch/static-content/Sponso...,
0,487,1.0,Sponsor 01,https://www.publibike.ch/static-content/Sponso...,
...,...,...,...,...,...
0,769,26.0,Sponsor 26,https://www.publibike.ch/static-content/Sponso...,
0,233,26.0,Sponsor 26,https://www.publibike.ch/static-content/Sponso...,
0,640,26.0,Sponsor 26,https://www.publibike.ch/static-content/Sponso...,
0,644,26.0,Sponsor 26,https://www.publibike.ch/static-content/Sponso...,


Let's join these dataframes back together for final analysis:

In [42]:
df = (
    stations.merge(bikes, how="left", left_on="id", right_on="station_id")
    .drop("station_id", axis=1)
    .merge(sponsors, how="left", left_on="id", right_on="station_id")
    .drop(
        [
            "station_id",
            "vehicles",
            "sponsors",
            "network_background_img",
            "network_logo_img",
            "network_sponsors",
            "image",
            "url",
        ],
        axis=1,
    )
    .rename(columns={"id": "station_id"})
)
df.head()

Unnamed: 0,timestamp,station_id,latitude,longitude,name,address,zip,city,is_virtual_station,capacity,state_id,state_name,network_id,network_name,bike_id,bike_name,ebike_battery_level,type_id,type_name,sponsor_id,sponsor_name
0,2023-10-17 19:15:00,841,46.938447,7.480803,Wittigkofen,Jupiterstrasse 1,3015,Bern,True,80,1,Active,5,Bern,4507.0,503043,17.0,2.0,E-Bike,,
1,2023-10-17 19:15:00,841,46.938447,7.480803,Wittigkofen,Jupiterstrasse 1,3015,Bern,True,80,1,Active,5,Bern,1064.0,101493,,1.0,Bike,,
2,2023-10-17 19:15:00,841,46.938447,7.480803,Wittigkofen,Jupiterstrasse 1,3015,Bern,True,80,1,Active,5,Bern,1131.0,500461,21.0,2.0,E-Bike,,
3,2023-10-17 19:15:00,841,46.938447,7.480803,Wittigkofen,Jupiterstrasse 1,3015,Bern,True,80,1,Active,5,Bern,4033.0,502603,55.0,2.0,E-Bike,,
4,2023-10-17 19:15:00,841,46.938447,7.480803,Wittigkofen,Jupiterstrasse 1,3015,Bern,True,80,1,Active,5,Bern,4884.0,500543,19.0,2.0,E-Bike,,


### Explain GitHub Actions Miner

TBD: Another day

### Analyse time deltas between runs from title names

In [43]:
import re
# Use regular expression to extract numbers
filename_dates = [re.findall(r'\d+', filename)[0] for filename in os.listdir("Data/")]

In [44]:
pd.DataFrame({"timestamp": pd.to_datetime(filename_dates)}).assign(delta=lambda x: x["timestamp"] - x["timestamp"].shift()).describe()

Unnamed: 0,timestamp,delta
count,3158,3157
mean,2023-10-04 03:13:45.955984896,0 days 00:12:10.488438390
min,2023-09-20 20:13:31,0 days 00:00:45
25%,2023-09-27 10:48:32.500000,0 days 00:08:53
50%,2023-10-04 02:54:16.500000,0 days 00:09:08
75%,2023-10-10 18:53:23.500000,0 days 00:13:42
max,2023-10-17 12:49:23,0 days 01:13:09
std,,0 days 00:07:10.819656362


### Read in the separate `csv` files and answer questions

In [14]:
# Directory containing the CSV files
directory = "./Data/"

# Initialize an empty list to store DataFrames
dfs = []

# Loop through files in the directory
for filename in tqdm(os.listdir(directory)):
    if filename.endswith(".csv"):
        # Construct the full file path
        file_path = os.path.join(directory, filename)

        tmp_file = (
            pd.read_csv(file_path)
            .drop(
                [
                    "station_name",
                    "address",
                    "zip",
                    "city",
                    "is_virtual_station",
                    "network_id",
                    "sponsor_id",
                    "sponsor_name",
                    "bike_type_name",
                ],
                axis=1,
            )
            .assign(
                station_id=lambda x: pd.to_numeric(x["station_id"], downcast="float"),
                station_capacity=lambda x: pd.to_numeric(
                    x["station_capacity"], downcast="float"
                ),
                bike_id=lambda x: pd.to_numeric(x["bike_id"], downcast="float"),
                bike_name=lambda x: x["bike_name"].astype("category"),
                ebike_battery_level=lambda x: pd.to_numeric(
                    x["ebike_battery_level"], downcast="float"
                ),
                bike_type_id=lambda x: pd.to_numeric(
                    x["bike_type_id"], downcast="float"
                ),
                latitude=lambda x: pd.to_numeric(x["latitude"], downcast="float"),
                longitude=lambda x: pd.to_numeric(x["longitude"], downcast="float"),
            )
        )

        # Read the CSV file into a DataFrame and append it to the list
        dfs.append(tmp_file)

# Concatenate all DataFrames in the list into one final DataFrame
final_df = pd.concat(dfs, axis=0).reset_index(drop=True)

  0%|          | 0/3436 [00:00<?, ?it/s]

In [15]:
final_df = final_df.assign(timestamp=lambda x: pd.to_datetime(x["timestamp"]).dt.tz_localize(pytz.UTC).dt.tz_convert("Europe/Zurich").dt.round("5 min"), 
                           day = lambda x: x["timestamp"].dt.date,
                           hour = lambda x: x["timestamp"].dt.hour)

### Visualising the Distribution of Bikes in City Over One Day

- separate for business days and the weekend

### Zurich: `plotly`

In [16]:
zurich_data = (
    final_df.filter(
        [
            "timestamp",
            "station_id",
            "latitude",
            "longitude",
            "station_capacity",
            "state_name",
            "bike_name",
            "network_name",
            "ebike_battery_level",
            "day",
            "hour",
        ]
    )
    .query("network_name == 'Zürich'")
    .reset_index(drop=True)
)

Problem: Might have multiple observations for the same time on a given day (scheduling on GitHub was shite), might have to drop those values...

In [17]:
zurich_data = (
    zurich_data
    .assign(time=lambda x: x.timestamp.dt.time)
    .drop_duplicates(["timestamp", "station_id", "bike_name"])
    .groupby(["timestamp", "station_id"])
     .apply(lambda x: pd.Series({
         "time": x["time"].iloc[0],
         "latitude": x["latitude"].iloc[0],
         "longitude": x["longitude"].iloc[0],
         "station_capacity": x["station_capacity"].iloc[0],
         "state_name": x["state_name"].iloc[0],
         "bike_count": x["bike_name"].nunique()
     }))
    .reset_index()
)

In [18]:
zurich_data = (
    zurich_data
    .query("timestamp.dt.weekday in [0, 1, 2, 3, 4]")
    .assign(
        day=lambda x: x.timestamp.dt.date.pipe(pd.to_datetime),
        bike_frac=lambda x: x.bike_count / x.groupby(["day", "time"])["bike_count"].transform("sum"),
    )
    .groupby(["time", "station_id"])
    .apply(lambda x: pd.Series({
        "bike_count": x.bike_count.mean(),
        "latitude": x.latitude.iloc[0],
        "longitude": x.longitude.iloc[0],
        "bike_frac": x.bike_frac.mean()
    }))
    .reset_index()
)

In [19]:
plot_data=zurich_data.copy()

In [20]:
dict(lat=plot_data["latitude"].median(), lon=plot_data["longitude"].median())

{'lat': 47.38279724121094, 'lon': 8.535449981689453}

Issue: the size is relative to the other values in the dataframe: Therefore, include all data for each plot, but make the opacity zero for the times that are currently not shown, but a scale for the values that currently shown:

In [22]:
timestamp = plot_data.time.unique()[150]

min_in = plot_data.bike_frac.min()
max_in = plot_data.bike_frac.max()
min_out = 0.2
max_out = 1

def draw_publibike_map(timestamp, dataset):
    
    tmp_df = dataset.copy().assign(
        opacity_value=lambda x: np.where(
            x["time"] == timestamp,
            ((x.bike_frac - min_in) / (max_in - min_in)) * (max_out - min_out) + min_out,
            0,
        )
    )

    fig = px.scatter_mapbox(
        tmp_df,
        lat="latitude",
        lon="longitude",
        center=dict(lat=47.38, lon=8.53),
        color_discrete_sequence=["fuchsia"],
        size="bike_frac",
        width=700*1.5,
        height=600*1.5,
        zoom=13,
        opacity=tmp_df["opacity_value"]
)

    fig.update_layout(
        mapbox_style="carto-darkmatter",
        plot_bgcolor='black',
        paper_bgcolor='black',
        annotations=[
            dict(
                text=str(timestamp),
                showarrow=False,
                xref="paper",
                yref="paper",
                x=0.05,
                y=0.95,
                font=dict(size=40, color='#e5e5e5', family='Arial, sans-serif')
            )
        ]
    )
    fig.update_layout(margin={"r": 0, "l": 0, "b": 0, "t": 0})
    fig.update_traces(marker=dict(sizemode='diameter', size=tmp_df["bike_frac"]*0.25))  # Adjust the size value as needed

    return fig


fig = draw_publibike_map(timestamp, plot_data)
fig.write_image("test.png", scale=2)

In [23]:
import time
import os, shutil
import warnings

png_path = "Images_Zurich/"

os.makedirs(png_path, exist_ok=True)

for filename in os.listdir(png_path):
    file_path = os.path.join(png_path, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print("Failed to delete %s. Reason: %s" % (file_path, e))

unique_times = plot_data.time.unique()

for idx, time_idx in enumerate(tqdm(unique_times)):
    fig = draw_publibike_map(
        timestamp=time_idx,
        # Smooth the data by averaging four values 
        dataset=plot_data.sort_values(["station_id", "time"]).assign(
            bike_frac=lambda x: x.groupby("station_id")["bike_frac"].transform(
                lambda x: x.rolling(4, min_periods=1).mean()
            )
        ),
    )
    fig.write_image(f"{png_path}frame{idx+1:03d}.png")
    
    del fig
    
    time.sleep(1)

  0%|          | 0/273 [00:00<?, ?it/s]

In [24]:
def convert_pngs_to_mp4(png_path, output_mp4_path, frames_per_second):
    # Get the list of PNG files in the specified directory
    frames = [f for f in os.listdir(png_path) if f.endswith('.png')]
    
    # Sort frames to ensure the correct order
    frames.sort()

    # Read the first frame to get dimensions
    first_frame = cv2.imread(os.path.join(png_path, frames[0]))
    height, width, _ = first_frame.shape

    # Create VideoWriter object
    video_writer = cv2.VideoWriter(output_mp4_path, cv2.VideoWriter_fourcc(*'mp4v'), frames_per_second, (width, height))

    # Iterate over frames and write to video
    for frame_name in tqdm(frames):
        frame_path = os.path.join(png_path, frame_name)
        frame = cv2.imread(frame_path)
        video_writer.write(frame)

    # Release the VideoWriter
    video_writer.release()

png_path = "Images_Zurich/"
output_mp4_path = "test.mp4"
frames_per_second = 15

convert_pngs_to_mp4(png_path, output_mp4_path, frames_per_second)

  0%|          | 0/273 [00:00<?, ?it/s]

### Bern: `plotly`

In [25]:
bern_data = (
    final_df.filter(
        [
            "timestamp",
            "station_id",
            "latitude",
            "longitude",
            "station_capacity",
            "state_name",
            "bike_name",
            "network_name",
            "ebike_battery_level",
            "day",
            "hour",
        ]
    )
    .query("network_name == 'Bern'")
    .reset_index(drop=True)
)

Problem: Might have multiple observations for the same time on a given day (scheduling on GitHub was shite), might have to drop those values...

In [26]:
bern_data = (
    bern_data
    .assign(time=lambda x: x.timestamp.dt.time)
    .drop_duplicates(["timestamp", "station_id", "bike_name"])
    .groupby(["timestamp", "station_id"])
     .apply(lambda x: pd.Series({
         "time": x["time"].iloc[0],
         "latitude": x["latitude"].iloc[0],
         "longitude": x["longitude"].iloc[0],
         "station_capacity": x["station_capacity"].iloc[0],
         "state_name": x["state_name"].iloc[0],
         "bike_count": x["bike_name"].nunique()
     }))
    .reset_index()
)

In [27]:
bern_data = (
    bern_data
    .query("timestamp.dt.weekday in [0, 1, 2, 3, 4]")
    .assign(
        day=lambda x: x.timestamp.dt.date.pipe(pd.to_datetime),
        bike_frac=lambda x: x.bike_count / x.groupby(["day", "time"])["bike_count"].transform("sum"),
    )
    .groupby(["time", "station_id"])
    .apply(lambda x: pd.Series({
        "bike_count": x.bike_count.mean(),
        "latitude": x.latitude.iloc[0],
        "longitude": x.longitude.iloc[0],
        "bike_frac": x.bike_frac.mean()
    }))
    .reset_index()
)

In [28]:
plot_data=bern_data.copy()

In [29]:
dict(lat=plot_data["latitude"].median(), lon=plot_data["longitude"].median())

{'lat': 46.94757843017578, 'lon': 7.440463066101074}

Issue: the size is relative to the other values in the dataframe: Therefore, include all data for each plot, but make the opacity zero for the times that are currently not shown, but a scale for the values that currently shown:

In [30]:
timestamp = plot_data.time.unique()[150]

min_in = plot_data.bike_frac.min()
max_in = plot_data.bike_frac.max()
min_out = 0.2
max_out = 1

def draw_publibike_map(timestamp, dataset):
    
    tmp_df = dataset.copy().assign(
        opacity_value=lambda x: np.where(
            x["time"] == timestamp,
            ((x.bike_frac - min_in) / (max_in - min_in)) * (max_out - min_out) + min_out,
            0,
        )
    )

    fig = px.scatter_mapbox(
        tmp_df,
        lat="latitude",
        lon="longitude",
        center=dict(lat=46.95, lon=7.445),
        color_discrete_sequence=["fuchsia"],
        size="bike_frac",
        width=700*1.5,
        height=600*1.5,
        zoom=13,
        opacity=tmp_df["opacity_value"]
)

    fig.update_layout(
        mapbox_style="carto-darkmatter",
        plot_bgcolor='black',
        paper_bgcolor='black',
        annotations=[
            dict(
                text=str(timestamp),
                showarrow=False,
                xref="paper",
                yref="paper",
                x=0.05,
                y=0.95,
                font=dict(size=40, color='#e5e5e5', family='Arial, sans-serif')
            )
        ]
    )
    fig.update_layout(margin={"r": 0, "l": 0, "b": 0, "t": 0})
    fig.update_traces(marker=dict(sizemode='diameter', size=tmp_df["bike_frac"]*0.25))  # Adjust the size value as needed

    return fig


fig = draw_publibike_map(timestamp, plot_data)
fig.write_image("test.png", scale=2)

In [31]:
import time
import os, shutil
import warnings

png_path = "Images_Bern/"

os.makedirs(png_path, exist_ok=True)

for filename in os.listdir(png_path):
    file_path = os.path.join(png_path, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print("Failed to delete %s. Reason: %s" % (file_path, e))

unique_times = plot_data.time.unique()

for idx, time_idx in enumerate(tqdm(unique_times)):
    fig = draw_publibike_map(
        timestamp=time_idx,
        # Smooth the data by averaging four values 
        dataset=plot_data.sort_values(["station_id", "time"]).assign(
            bike_frac=lambda x: x.groupby("station_id")["bike_frac"].transform(
                lambda x: x.rolling(4, min_periods=1).mean()
            )
        ),
    )
    fig.write_image(f"{png_path}frame{idx+1:03d}.png")
    
    del fig
    
    time.sleep(1)

  0%|          | 0/273 [00:00<?, ?it/s]

In [32]:
def convert_pngs_to_mp4(png_path, output_mp4_path, frames_per_second):
    # Get the list of PNG files in the specified directory
    frames = [f for f in os.listdir(png_path) if f.endswith('.png')]
    
    # Sort frames to ensure the correct order
    frames.sort()

    # Read the first frame to get dimensions
    first_frame = cv2.imread(os.path.join(png_path, frames[0]))
    height, width, _ = first_frame.shape

    # Create VideoWriter object
    video_writer = cv2.VideoWriter(output_mp4_path, cv2.VideoWriter_fourcc(*'mp4v'), frames_per_second, (width, height))

    # Iterate over frames and write to video
    for frame_name in tqdm(frames):
        frame_path = os.path.join(png_path, frame_name)
        frame = cv2.imread(frame_path)
        video_writer.write(frame)

    # Release the VideoWriter
    video_writer.release()

png_path = "Images_Bern/"
output_mp4_path = "test_bern.mp4"
frames_per_second = 15

convert_pngs_to_mp4(png_path, output_mp4_path, frames_per_second)

  0%|          | 0/273 [00:00<?, ?it/s]

In [33]:
final_df.value_counts("network_name")

network_name
Bern                5322804
Zürich              4055696
Sottoceneri (TI)    2025161
Fribourg            1354128
Lausanne-Morges     1047661
Région de Nyon       834512
Chur                 188417
Name: count, dtype: int64

### Fribourg: `plotly`

In [34]:
fribourg_data = (
    final_df.filter(
        [
            "timestamp",
            "station_id",
            "latitude",
            "longitude",
            "station_capacity",
            "state_name",
            "bike_name",
            "network_name",
            "ebike_battery_level",
            "day",
            "hour",
        ]
    )
    .query("network_name == 'Fribourg'")
    .reset_index(drop=True)
)

Problem: Might have multiple observations for the same time on a given day (scheduling on GitHub was shite), might have to drop those values...

In [35]:
fribourg_data = (
    fribourg_data
    .assign(time=lambda x: x.timestamp.dt.time)
    .drop_duplicates(["timestamp", "station_id", "bike_name"])
    .groupby(["timestamp", "station_id"])
     .apply(lambda x: pd.Series({
         "time": x["time"].iloc[0],
         "latitude": x["latitude"].iloc[0],
         "longitude": x["longitude"].iloc[0],
         "station_capacity": x["station_capacity"].iloc[0],
         "state_name": x["state_name"].iloc[0],
         "bike_count": x["bike_name"].nunique()
     }))
    .reset_index()
)

In [36]:
fribourg_data = (
    fribourg_data
    .query("timestamp.dt.weekday in [0, 1, 2, 3, 4]")
    .assign(
        day=lambda x: x.timestamp.dt.date.pipe(pd.to_datetime),
        bike_frac=lambda x: x.bike_count / x.groupby(["day", "time"])["bike_count"].transform("sum"),
    )
    .groupby(["time", "station_id"])
    .apply(lambda x: pd.Series({
        "bike_count": x.bike_count.mean(),
        "latitude": x.latitude.iloc[0],
        "longitude": x.longitude.iloc[0],
        "bike_frac": x.bike_frac.mean()
    }))
    .reset_index()
)

In [37]:
plot_data=fribourg_data.copy()

In [38]:
dict(lat=plot_data["latitude"].median(), lon=plot_data["longitude"].median())

{'lat': 46.80180358886719, 'lon': 7.150041103363037}

Issue: the size is relative to the other values in the dataframe: Therefore, include all data for each plot, but make the opacity zero for the times that are currently not shown, but a scale for the values that currently shown:

In [39]:
timestamp = plot_data.time.unique()[150]

min_in = plot_data.bike_frac.min()
max_in = plot_data.bike_frac.max()
min_out = 0.2
max_out = 1

def draw_publibike_map(timestamp, dataset):
    
    tmp_df = dataset.copy().assign(
        opacity_value=lambda x: np.where(
            x["time"] == timestamp,
            ((x.bike_frac - min_in) / (max_in - min_in)) * (max_out - min_out) + min_out,
            0,
        )
    )

    fig = px.scatter_mapbox(
        tmp_df,
        lat="latitude",
        lon="longitude",
        center=dict(lat=46.80, lon=7.15),
        color_discrete_sequence=["fuchsia"],
        size="bike_frac",
        width=700*1.5,
        height=600*1.5,
        zoom=13,
        opacity=tmp_df["opacity_value"]
)

    fig.update_layout(
        mapbox_style="carto-darkmatter",
        plot_bgcolor='black',
        paper_bgcolor='black',
        annotations=[
            dict(
                text=str(timestamp),
                showarrow=False,
                xref="paper",
                yref="paper",
                x=0.05,
                y=0.95,
                font=dict(size=40, color='#e5e5e5', family='Arial, sans-serif')
            )
        ]
    )
    fig.update_layout(margin={"r": 0, "l": 0, "b": 0, "t": 0})
    fig.update_traces(marker=dict(sizemode='diameter', size=tmp_df["bike_frac"]*0.25))  # Adjust the size value as needed

    return fig


fig = draw_publibike_map(timestamp, plot_data)
fig.write_image("test.png", scale=2)

In [40]:
import time
import os, shutil
import warnings

png_path = "Images_Fribourg/"

os.makedirs(png_path, exist_ok=True)

for filename in os.listdir(png_path):
    file_path = os.path.join(png_path, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print("Failed to delete %s. Reason: %s" % (file_path, e))

unique_times = plot_data.time.unique()

for idx, time_idx in enumerate(tqdm(unique_times)):
    fig = draw_publibike_map(
        timestamp=time_idx,
        # Smooth the data by averaging four values 
        dataset=plot_data.sort_values(["station_id", "time"]).assign(
            bike_frac=lambda x: x.groupby("station_id")["bike_frac"].transform(
                lambda x: x.rolling(4, min_periods=1).mean()
            )
        ),
    )
    fig.write_image(f"{png_path}frame{idx+1:03d}.png")
    
    del fig
    
    time.sleep(1)

  0%|          | 0/273 [00:00<?, ?it/s]

In [41]:
def convert_pngs_to_mp4(png_path, output_mp4_path, frames_per_second):
    # Get the list of PNG files in the specified directory
    frames = [f for f in os.listdir(png_path) if f.endswith('.png')]
    
    # Sort frames to ensure the correct order
    frames.sort()

    # Read the first frame to get dimensions
    first_frame = cv2.imread(os.path.join(png_path, frames[0]))
    height, width, _ = first_frame.shape

    # Create VideoWriter object
    video_writer = cv2.VideoWriter(output_mp4_path, cv2.VideoWriter_fourcc(*'mp4v'), frames_per_second, (width, height))

    # Iterate over frames and write to video
    for frame_name in tqdm(frames):
        frame_path = os.path.join(png_path, frame_name)
        frame = cv2.imread(frame_path)
        frame = cv2.resize(frame, (width, height))
        video_writer.write(frame)

    # Release the VideoWriter
    video_writer.release()

png_path = "Images_Fribourg/"
output_mp4_path = "test_fribourg.mp4"
frames_per_second = 15

convert_pngs_to_mp4(png_path, output_mp4_path, frames_per_second)

  0%|          | 0/273 [00:00<?, ?it/s]

In [42]:
final_df.value_counts("network_name")

network_name
Bern                5322804
Zürich              4055696
Sottoceneri (TI)    2025161
Fribourg            1354128
Lausanne-Morges     1047661
Région de Nyon       834512
Chur                 188417
Name: count, dtype: int64

### Further ideas:

- See if ETH/UZH students mostly use electric bikes to get up the hill
- See how battery levels develop for electric bikes and see when they get changed