In [None]:
import json
import os
import re
from datetime import datetime

### Get sorted paths of snapshots

In [2]:
folder_path = "data/"

# Regular expression to match the file format
pattern = re.compile(r"nextbike_data_(\d{8}_\d{6})")

files_with_dates = []

for filename in os.listdir(folder_path):
    match = pattern.match(filename)
    if match:
        date_str = match.group(1)
        try:
            file_datetime = datetime.strptime(date_str, "%Y%m%d_%H%M%S")
            full_path = os.path.join(folder_path, filename)
            files_with_dates.append((file_datetime, full_path))
        except ValueError:
            print(f"Skipping file with invalid datetime: {filename}")

files_with_dates.sort()
sorted_paths = [path for _, path in files_with_dates]

### Helper functions

In [3]:
def extract_station_data(data):
    stations = {}
    city  = data["countries"][0]["cities"][0]
    for place in city.get("places", []):
        stations[place["uid"]] = {
            "name": place["name"],
            "bikes": set(bike["number"] for bike in place.get("bike_list", []))
        }
    return stations

In [4]:
def load_state(path):
    with open(path, "r") as f:
        state = json.load(f)
        return state

In [17]:
def extract_timestamp_from_path(path):
    filename = os.path.basename(path)
    datetime_str = filename.replace("nextbike_data_", "").replace(".json", "")
    return datetime.strptime(datetime_str, "%Y%m%d_%H%M%S")

### Main loop, only for one city (Berlin)

In [None]:
bike_movements = []
disappeared_bikes = {}  # {bike_id: {"from_station": name, "departure_time": timestamp}}

previous_path = sorted_paths[0]
previous_state = extract_station_data(load_state(previous_path))
previous_timestamp = extract_timestamp_from_path(previous_path)

for current_path in sorted_paths[1:500]: # Note that this is only sample of data, we have over 10_000 files,
    try:
        current_state = extract_station_data(load_state(current_path))
        current_timestamp = extract_timestamp_from_path(current_path)

        # Check for disappeared bikes across all stations
        for station_id, station_info in previous_state.items():
            if station_id in current_state:
                current_bikes = current_state[station_id]["bikes"]
                disappeared = station_info["bikes"] - current_bikes
                for bike in disappeared:
                    if bike not in disappeared_bikes:
                        disappeared_bikes[bike] = {
                            "from_station": station_info["name"],
                            "departure_time": previous_timestamp
                        }

        # Try to locate disappeared bikes in the current state
        bikes_found = []
        for new_station_id, new_station_info in current_state.items():
            if re.match(r"^BIKE \d+$", new_station_info["name"]): # We do not process temporary stations, those stattion are created when bike is rented but not returned, can be seen in bike_movements_old.ipynb 
                continue
            for bike in new_station_info["bikes"]:
                if bike in disappeared_bikes:
                    movement = {
                        "bike_id": bike,
                        "from_station": disappeared_bikes[bike]["from_station"],
                        "to_station": new_station_info["name"],
                        "departure_time": disappeared_bikes[bike]["departure_time"],
                        "arrival_time": current_timestamp
                    }
                    bike_movements.append(movement)
                    print(f"Bike {bike} moved from {movement['from_station']} at {movement['departure_time']} "
                          f"to {movement['to_station']} at {movement['arrival_time']}")
                    bikes_found.append(bike)

        # Remove bikes we've found from the disappeared set
        for bike in bikes_found:
            del disappeared_bikes[bike]

        previous_state = current_state
        previous_timestamp = current_timestamp
        print("-" * 20)
    except Exception as e:
        print(f"Error processing path {current_path}: {e}")


--------------------
--------------------
Bike 13531 moved from Volksbühne (U Rosa-Luxemburg-Platz) | BONUS-Station: Return(Rückgabe) here=15 mins free at 2025-04-02 19:45:14 to Volksbühne (U Rosa-Luxemburg-Platz) | BONUS-Station: Return(Rückgabe) here=15 mins free at 2025-04-02 19:47:18
--------------------
--------------------
Bike 16357 moved from virtuell - Schillerstraße/Krumme Straße at 2025-04-02 19:44:13 to Jelbi Schillerstraße/Wilmersdorfer Straße (CHA/SC) at 2025-04-02 19:49:21
Bike 10382 moved from virtuell - U Heinrich-Heine-Straße (Abstellfläche-Mikromobilität) at 2025-04-02 19:45:14 to virtuell - Annenstraße at 2025-04-02 19:49:21
--------------------
Bike 19769 moved from S Rummelsburg | BONUS-Station: Return(Rückgabe) here=15 mins free at 2025-04-02 19:46:16 to S Nöldnerplatz | BONUS-Station: Return(Rückgabe) here=15 mins free at 2025-04-02 19:50:23
Bike 10461 moved from virtuell - Bouchéstraße/Karl-Kunger-Straße at 2025-04-02 19:46:16 to virtuell - Kiehlufer/Bouchéstra

### Movement count for each bike

In [36]:
from collections import defaultdict

bike_movement_counts = defaultdict(int)

for movement in bike_movements:
    bike_id = movement["bike_id"]
    bike_movement_counts[bike_id] += 1

bike_movement_summary = sorted(bike_movement_counts.items(), key=lambda x: x[1], reverse=True)

for bike_id, count in bike_movement_summary[:10]:
    print(f"Bike {bike_id} moved {count} times")

Bike 16204 moved 7 times
Bike 100064 moved 5 times
Bike 19155 moved 5 times
Bike 16968 moved 5 times
Bike 15180 moved 5 times
Bike 10914 moved 5 times
Bike 14425 moved 5 times
Bike 17778 moved 5 times
Bike 14328 moved 5 times
Bike 19473 moved 5 times


### Movements for selected bike

In [35]:
target_bike_id = "16204"

target_bike_movements = [
    move for move in bike_movements if move["bike_id"] == target_bike_id
]

for move in target_bike_movements:
    print(
        f"Bike {move['bike_id']} departed from {move['from_station']} at {move['departure_time']} "
        f"and arrived at {move['to_station']} at {move['arrival_time']}"
    )


Bike 16204 departed from S+U Neukölln | BONUS-Station: Return(Rückgabe) here=15 mins free at 2025-04-02 20:32:34 and arrived at virtuell - Oderstraße/Siegfriedstraße at 2025-04-02 20:41:47
Bike 16204 departed from virtuell - Oderstraße/Siegfriedstraße at 2025-04-02 21:01:18 and arrived at virtuell - Oderstraße/Siegfriedstraße at 2025-04-02 21:23:58
Bike 16204 departed from virtuell - Oderstraße/Siegfriedstraße at 2025-04-02 21:43:29 and arrived at S Ostbahnhof | BONUS-Station: Return(Rückgabe) here=15 mins free at 2025-04-02 22:13:15
Bike 16204 departed from S Ostbahnhof | BONUS-Station: Return(Rückgabe) here=15 mins free at 2025-04-02 22:34:50 and arrived at Köpenicker Straße/Eisenbahnstraße | BONUS-Station: Return(Rückgabe) here=15 mins free at 2025-04-02 22:42:02
Bike 16204 departed from Köpenicker Straße/Eisenbahnstraße | BONUS-Station: Return(Rückgabe) here=15 mins free at 2025-04-03 01:10:14 and arrived at virtuell - Leipziger Straße/Jerusalemer Straße at 2025-04-03 01:30:49
Bike