In [1]:
import datetime
import time
import requests
import pandas as pd
import hopsworks
from functions import utils
import json
import os
import git
import warnings
warnings.filterwarnings("ignore")

In [2]:
from dotenv import load_dotenv
load_dotenv()
HOPSWORKS_API_KEY = os.getenv("HOPSWORKS_API_KEY")
os.environ["HOPSWORKS_API_KEY"] = HOPSWORKS_API_KEY

## Get the necessary data from Hopsworks

In [3]:
project = hopsworks.login()
fs = project.get_feature_store() 
# secrets = utils.secrets_api(project.name)

CITY = "dublin"
STATION = "HEUSTON BRIDGE (NORTH)"

# latitude =
# longitude =

today = datetime.datetime.now()

2025-01-31 18:24:19,650 INFO: Initializing external client


2025-01-31 18:24:19,655 INFO: Base URL: https://c.app.hopsworks.ai:443


2025-01-31 18:24:21,709 INFO: Python Engine initialized.



Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1207494


In [4]:
# Retrieve feature groups
bike_fg = fs.get_feature_group(
    name='bike_data',
    version=1,
)
weather_fg = fs.get_feature_group(
    name='weather_data',
    version=1,
)


## Clone and pull the repository with the bike data

In [5]:
# Configuration
REPO_URL = "https://github.com/MaxHalford/bike-sharing-history"
CLONE_DIR = "./bike_data/bike-sharing-history"
TARGET_CITY = "dublin"
FILE_NAME = "jcdecaux.geojson"

In [6]:
if not os.path.exists(CLONE_DIR):
    print("Cloning repository...")
    git.Repo.clone_from(REPO_URL, CLONE_DIR)
repo = git.Repo(CLONE_DIR)

# go to main and pull the latest changes
repo.git.checkout("main", force=True)
repo.remotes.origin.pull()

[<git.remote.FetchInfo at 0x22b996191c0>,
 <git.remote.FetchInfo at 0x22b99619120>]

## Get the latest datetime present in the bike data

In [7]:
# last_bike_datetime = "2025-01-06 15:06:11 UTC"

bike_df = bike_fg.read()

last_bike_datetime = bike_df["datetime"].max()
last_bike_datetime = last_bike_datetime.strftime("%Y-%m-%d %H:%M:%S %Z")

last_bike_datetime

Reading data from Hopsworks, using Hopsworks Feature Query Service.   

Reading data from Hopsworks, using Hopsworks Feature Query Service..   

Reading data from Hopsworks, using Hopsworks Feature Query Service...   

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.15s) 


'2025-01-28 00:00:00 UTC'

## Loop through the commits and convert the bike data into a dataframe

In [8]:
results = {}

# Populate the results dict with the stations
data_file = os.path.join(CLONE_DIR, "data/stations/" + TARGET_CITY + "/" + FILE_NAME)
if os.path.exists(data_file):
    with open(data_file, "r") as f:
        data = f.read()
        data = json.loads(data)
        for feature in data["features"]:
            name = feature["properties"]["name"]
            results[name] = []

In [9]:
start_time = datetime.datetime.now()
last_day_and_hour = None

for commit in repo.iter_commits():
    # Stop when we reach the earliest bike date
    if commit.committed_datetime <= datetime.datetime.strptime(last_bike_datetime, "%Y-%m-%d %H:%M:%S %Z").replace(tzinfo=datetime.timezone.utc):
        print("breaking at: ", commit.committed_datetime)
        break

    # Skip commits from today
    if commit.committed_datetime > today.replace(tzinfo=datetime.timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0):
        first_commited_datetime = repo.commit().committed_datetime
        continue

    day_and_hour = commit.committed_datetime.replace(minute=0, second=0, microsecond=0)
    # print("day_and_hour: ", day_and_hour, " - last_day_and_hour: ", last_day_and_hour)
    if day_and_hour == last_day_and_hour:
        continue
    last_day_and_hour = day_and_hour

    # Get the data for the commit
    print("Processing commit: ", commit.hexsha, " - ", commit.committed_datetime)
    try:
        repo.git.checkout(commit.hexsha, force=True)
    except Exception as e:
        print("Error checking out commit: ", e)
        break

    data_file = os.path.join(CLONE_DIR, "data/stations/" + TARGET_CITY + "/" + FILE_NAME)
    if os.path.exists(data_file):
        with open(data_file, "r") as f:
            data = f.read()
            data = json.loads(data)
            for feature in data["features"]:
                try:
                    results[feature["properties"]["name"]].append([feature["properties"]["available_bikes"], commit.committed_datetime])
                except KeyError:
                    continue

print(results)


Processing commit:  b8642d5f958e6fe3ba79455bbb96bf2035f7898a  -  2025-01-30 23:49:00+00:00


Processing commit:  7ae0b63c218807f65d97228ad20c15b62710bfb5  -  2025-01-30 22:50:17+00:00


Processing commit:  6f0abfd68af8e023aa534aab07e8b00791cb60bc  -  2025-01-30 21:48:59+00:00


Processing commit:  63e73056184244174da7b364d231d1e9f18297b6  -  2025-01-30 20:49:10+00:00


Processing commit:  74f784432c6656e28af3bd385d4a6428fc5f4374  -  2025-01-30 19:49:04+00:00


Processing commit:  47a6b3eac69c84bf21f8852b3b1d5ffbb34730d2  -  2025-01-30 18:49:08+00:00


Processing commit:  797355535cb3698b22fd652363fc63ba38e3ab09  -  2025-01-30 17:48:59+00:00


Processing commit:  c46e829f00de570ac0f51cb667ba1441aa29fa75  -  2025-01-30 16:50:03+00:00


Processing commit:  9f3b4d91aee182fbf9f294af49d2a91a9ff44724  -  2025-01-30 15:49:14+00:00


Processing commit:  e00b063c193b3a8b90fe6c2c4211b5f60afdf2d1  -  2025-01-30 14:49:05+00:00


Processing commit:  624aa1ebf791537686b3b56c51f9ed396bddde87  -  2025-01-30 13:49:11+00:00


Processing commit:  51b932d8026020eed353c37e1defd5272271eaf6  -  2025-01-30 12:59:21+00:00


Processing commit:  b8601e2101d0dbfeb9cd2f440489b7f4ce53c220  -  2025-01-30 11:49:04+00:00


Processing commit:  8c38912b10162abc4fbfa304b26c0948649c8c96  -  2025-01-30 10:49:05+00:00


Processing commit:  670cc21b91df0880f305e19673508142c948b2ce  -  2025-01-30 09:48:58+00:00


Processing commit:  66e8597ce1827c80ae9930d122cc67c312ecffe1  -  2025-01-30 08:48:58+00:00


Processing commit:  dc4cfea44002ec14f530415bbd777de86b13fa7b  -  2025-01-30 07:49:02+00:00


Processing commit:  b599075ecb50e6eb6f257385e00a317840d67e0e  -  2025-01-30 06:49:16+00:00


Processing commit:  194ed10e3c30d6b53a630cf874a8718eb81964f5  -  2025-01-30 05:48:54+00:00


Processing commit:  023c94eca383a0be5a630893ea4383471325a210  -  2025-01-30 04:49:14+00:00


Processing commit:  7e6cd39d143ec33c6608d89f71f7729fea02cd61  -  2025-01-30 03:49:04+00:00


Processing commit:  2a3e212c18a373642ade5400cf3badb0f28a63e0  -  2025-01-30 02:55:32+00:00


Processing commit:  c908a3f03bdb4a004fe9c20afe740e80f9ec97e9  -  2025-01-30 01:33:11+00:00


Processing commit:  69f23dbe971f835984b366d1bcbe97a7910efe9a  -  2025-01-30 00:49:34+00:00


Processing commit:  d80a31610c17399b21e096f19aeb41384f65d19e  -  2025-01-29 23:49:05+00:00


Processing commit:  084ff47766f0bf3602bfa93f67b3e4ac1fe89bf1  -  2025-01-29 22:49:10+00:00


Processing commit:  be8ec5eac0a4b9b85ff34c3553abca33fbcff6d6  -  2025-01-29 21:49:12+00:00


Processing commit:  05ec692bf8172df47b293c0dd9e92a367c1c65e2  -  2025-01-29 20:49:07+00:00


Processing commit:  bf8cff3a646f626bac4f2af8f89afef071469ec3  -  2025-01-29 19:49:13+00:00


Processing commit:  1943b54f9918593c084d7bedde76774882af8ba4  -  2025-01-29 18:49:14+00:00


Processing commit:  d74375c9102a3621f3a62eea5682382d97daa26f  -  2025-01-29 17:49:17+00:00


Processing commit:  da4a743c637b375ad16db875c359d6bca6a75f91  -  2025-01-29 16:49:27+00:00


Processing commit:  05f5dbbf081f42676e1607989c332bc5cdb5dc6b  -  2025-01-29 15:49:03+00:00


Processing commit:  2a5eac7847538c76595b13a1a0daeeb2eaafe752  -  2025-01-29 14:49:14+00:00


Processing commit:  25c84fc552e5647847efd64ff26b2f4e711efe2f  -  2025-01-29 13:49:09+00:00


Processing commit:  a16989852a61c02344a3a401ccca538f880c189e  -  2025-01-29 12:58:32+00:00


Processing commit:  ff67b3a7290b73650912ce654bd3590608595f13  -  2025-01-29 11:49:11+00:00


Processing commit:  73f0a758c5d89850c393c015640524c7c070871a  -  2025-01-29 10:49:04+00:00


Processing commit:  726a6df933972dc212f709037d3ba3ba1e5d73bb  -  2025-01-29 09:49:23+00:00


Processing commit:  6d5ca618783b0a1df8c3aaf4e3a8c7abd8565c36  -  2025-01-29 08:49:03+00:00


Processing commit:  ea5a7d9343ae5099a9f1dff552af22d060e75da9  -  2025-01-29 07:48:55+00:00


Processing commit:  a964eb65eb4bfef4f6dc75933b50568869c79f48  -  2025-01-29 06:49:15+00:00


Processing commit:  11855f68cd1d8eb1cbdd7910c587a5b92db9f830  -  2025-01-29 05:49:01+00:00


Processing commit:  9b8d045771d31287942419e1cf292166391b2d8a  -  2025-01-29 04:49:03+00:00


Processing commit:  8409bd02d10d8838e401e1684072c404092e384f  -  2025-01-29 03:48:53+00:00


Processing commit:  57686fc2f3711f4c2df93eb9a8bcf53b485e34ee  -  2025-01-29 02:55:54+00:00


Processing commit:  a204be1e69872101a06fc0bcbc98f3496cc58124  -  2025-01-29 01:33:23+00:00


Processing commit:  86f4053909bf98e674d5f0ce334da73145f895be  -  2025-01-29 00:49:45+00:00


Processing commit:  55eb3cb41e21f6eac159b573f8fbef2fdda6bb39  -  2025-01-28 23:49:12+00:00


Processing commit:  8d0bcf1f46e8da575d11ecaad70c322a80ce54cd  -  2025-01-28 22:49:05+00:00


Processing commit:  1a3250f98c439f3cad5bdae14c1e7f9eb06f9bea  -  2025-01-28 21:49:16+00:00


Processing commit:  0d4cc3dd0a889b3fdcf0af75c12547e77089abce  -  2025-01-28 20:48:56+00:00


Processing commit:  10fa540b1a6a71521abcfd886afde595a18993cb  -  2025-01-28 19:49:15+00:00


Processing commit:  55997a5ecd9d090c465e6cdbd7905184ac54c46b  -  2025-01-28 18:49:48+00:00


Processing commit:  5265846553e426e7a4125c5b0f5dbc3f9af36e6c  -  2025-01-28 17:49:05+00:00


Processing commit:  4d99ca4b0501a9e04c1a56ba55103e2e5161a6a2  -  2025-01-28 16:49:30+00:00


Processing commit:  0852bc196c2c9ed2c4040bdcca1683baaf615633  -  2025-01-28 15:49:00+00:00


Processing commit:  1d68002dc5f6e3d4b72df8716288864698ec9da8  -  2025-01-28 14:49:09+00:00


Processing commit:  76db8a3e2f498e417b2dab41d21a982520dceddb  -  2025-01-28 13:49:07+00:00


Processing commit:  679618bc9e05ebfed095d1370c22c13297010362  -  2025-01-28 12:59:34+00:00


Processing commit:  d1c62651c7697d6452421e671399ecf1183df1dd  -  2025-01-28 11:48:57+00:00


Processing commit:  cfc4c0490244682ac38a4d7500077b7ac06ea0a6  -  2025-01-28 10:49:06+00:00


Processing commit:  59a5216231e7f799d286e713b61e237dcb82ef65  -  2025-01-28 09:49:03+00:00


Processing commit:  46020217b974f8d85f87c8ebd2b6608e233bc1b7  -  2025-01-28 08:48:58+00:00


Processing commit:  ca5cb5e21306ebbaef5b3c4017f62a7ca160a87a  -  2025-01-28 07:49:10+00:00


Processing commit:  44eb70f78fc8b6624eec986e5e4a97a0e5aefac9  -  2025-01-28 06:49:14+00:00


Processing commit:  47b35caab9ec1a7d88c6079df4451d88e10dd1d7  -  2025-01-28 05:49:26+00:00


Processing commit:  960d8c11f36f2dcfa63a8c7756f9d0ff13cf5fe1  -  2025-01-28 04:49:03+00:00


Processing commit:  e81055b5d817c13a07b84dcc96a03b0c4cf339f4  -  2025-01-28 03:49:04+00:00


Processing commit:  fa9c461caa401783242b5e4f66a8196b404d433c  -  2025-01-28 02:56:18+00:00


Processing commit:  4b590d9954609f82f0d542a2e1131271e2ff5052  -  2025-01-28 01:33:32+00:00


Processing commit:  48871f75d734ae002e5a14fa10ac7880001ee5e4  -  2025-01-28 00:49:43+00:00


breaking at:  2025-01-27 23:49:07+00:00
{'CLARENDON ROW': [[14, datetime.datetime(2025, 1, 30, 23, 49, tzinfo=<git.objects.util.tzoffset object at 0x0000022B997963E0>)], [15, datetime.datetime(2025, 1, 30, 22, 50, 17, tzinfo=<git.objects.util.tzoffset object at 0x0000022B99796AA0>)], [20, datetime.datetime(2025, 1, 30, 21, 48, 59, tzinfo=<git.objects.util.tzoffset object at 0x0000022B99796410>)], [20, datetime.datetime(2025, 1, 30, 20, 49, 10, tzinfo=<git.objects.util.tzoffset object at 0x0000022B99796290>)], [20, datetime.datetime(2025, 1, 30, 19, 49, 4, tzinfo=<git.objects.util.tzoffset object at 0x0000022B997B64A0>)], [23, datetime.datetime(2025, 1, 30, 18, 49, 8, tzinfo=<git.objects.util.tzoffset object at 0x0000022B997BBA60>)], [14, datetime.datetime(2025, 1, 30, 17, 48, 59, tzinfo=<git.objects.util.tzoffset object at 0x0000022B998F9060>)], [19, datetime.datetime(2025, 1, 30, 16, 50, 3, tzinfo=<git.objects.util.tzoffset object at 0x0000022B99902620>)], [22, datetime.datetime(2025,

In [10]:
# turn results into a dataframe
df_bike_today = pd.DataFrame()

for station, values in results.items():
    if len(values) > 0:
        df = pd.DataFrame(values, columns=["num_bikes_available", "datetime"])
        df["datetime"] = pd.to_datetime(df["datetime"], utc=True)
        df["station"] = station.replace(" ", "_")
        df_bike_today = pd.concat([df_bike_today, df])

# if empty, do nothing
if not df_bike_today.empty:
    df_bike_today.dropna(inplace=True)
    df_bike_today["num_bikes_available"] = df_bike_today["num_bikes_available"].astype("float32")
    df_bike_today = df_bike_today[df_bike_today['station'].isin([STATION.replace(" ", "_")])]
    df_bike_today['datetime'] = pd.to_datetime(df_bike_today['datetime'], utc=True).dt.floor('H') + datetime.timedelta(hours=1)
df_bike_today


Unnamed: 0,num_bikes_available,datetime,station
0,37.0,2025-01-31 00:00:00+00:00,HEUSTON_BRIDGE_(NORTH)
1,37.0,2025-01-30 23:00:00+00:00,HEUSTON_BRIDGE_(NORTH)
2,38.0,2025-01-30 22:00:00+00:00,HEUSTON_BRIDGE_(NORTH)
3,40.0,2025-01-30 21:00:00+00:00,HEUSTON_BRIDGE_(NORTH)
4,40.0,2025-01-30 20:00:00+00:00,HEUSTON_BRIDGE_(NORTH)
...,...,...,...
67,40.0,2025-01-28 05:00:00+00:00,HEUSTON_BRIDGE_(NORTH)
68,40.0,2025-01-28 04:00:00+00:00,HEUSTON_BRIDGE_(NORTH)
69,40.0,2025-01-28 03:00:00+00:00,HEUSTON_BRIDGE_(NORTH)
70,40.0,2025-01-28 02:00:00+00:00,HEUSTON_BRIDGE_(NORTH)


## Fetch the weather data for the same time period

In [11]:
forecast_df = utils.get_hourly_weather_forecast(CITY)
forecast_df = forecast_df.rename(columns={'date_x': 'datetime'})
forecast_df = forecast_df.drop(columns=['date_y', 'date_only'])
forecast_df.dropna(inplace=True)

print(forecast_df.empty)

forecast_df

features: {'hourly': ['temperature_2m', 'apparent_temperature', 'rain', 'snowfall', 'wind_speed_10m'], 'daily': ['daylight_duration', 'rain_sum']}
params: {'latitude': 53.35, 'longitude': -6.26, 'hourly': ['temperature_2m', 'apparent_temperature', 'rain', 'snowfall', 'wind_speed_10m'], 'daily': ['daylight_duration', 'rain_sum']}


Coordinates 53.5°N -6.25°E
Elevation 11.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
False


Unnamed: 0,datetime,temperature_2m,apparent_temperature,rain,snowfall,wind_speed_10m,daylight_duration,rain_sum,city
0,2025-01-31 00:00:00+00:00,6.95,2.110960,0.4,0.0,25.929996,32065.236328,0.400000,dublin
1,2025-01-31 01:00:00+00:00,6.75,1.755430,0.0,0.0,26.932714,32065.236328,0.400000,dublin
2,2025-01-31 02:00:00+00:00,6.35,1.483531,0.0,0.0,25.233406,32065.236328,0.400000,dublin
3,2025-01-31 03:00:00+00:00,5.90,1.232717,0.0,0.0,23.224882,32065.236328,0.400000,dublin
4,2025-01-31 04:00:00+00:00,5.45,0.968557,0.0,0.0,21.325253,32065.236328,0.400000,dublin
...,...,...,...,...,...,...,...,...,...
235,2025-02-09 19:00:00+00:00,5.20,0.061158,0.1,0.0,23.806318,34126.535156,5.699998,dublin
236,2025-02-09 20:00:00+00:00,5.20,-0.020061,0.1,0.0,24.363251,34126.535156,5.699998,dublin
237,2025-02-09 21:00:00+00:00,5.20,-0.084013,0.1,0.0,24.590923,34126.535156,5.699998,dublin
238,2025-02-09 22:00:00+00:00,5.15,-0.177298,0.1,0.0,24.826952,34126.535156,5.699998,dublin


## Insert the bike and weather data into Hopsworks

In [12]:
if df_bike_today.empty:
    print("No bike data available for today")
else:
    bike_fg.insert(df_bike_today)

Uploading Dataframe: 0.00% |                                      | Rows 0/72 | Elapsed Time: 00:00 | Remaining Time: ?

Uploading Dataframe: 1.39% |▍                                 | Rows 1/72 | Elapsed Time: 00:01 | Remaining Time: 01:19

Uploading Dataframe: 100.00% |███████████████████████████████| Rows 72/72 | Elapsed Time: 00:01 | Remaining Time: 00:00




Launching job: bike_data_1_offline_fg_materialization


Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1207494/jobs/named/bike_data_1_offline_fg_materialization/executions


In [13]:
if forecast_df.empty:
    print("No weather forecast available for today")
else:
    weather_fg.insert(forecast_df, write_options={"wait_for_job": True})


Uploading Dataframe: 0.00% |                                     | Rows 0/240 | Elapsed Time: 00:00 | Remaining Time: ?

Uploading Dataframe: 69.17% |████████████████████▊         | Rows 166/240 | Elapsed Time: 00:01 | Remaining Time: 00:00

Uploading Dataframe: 100.00% |█████████████████████████████| Rows 240/240 | Elapsed Time: 00:01 | Remaining Time: 00:00




Launching job: weather_data_1_offline_fg_materialization


Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1207494/jobs/named/weather_data_1_offline_fg_materialization/executions


2025-01-31 18:25:51,023 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED


2025-01-31 18:26:16,584 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED


2025-01-31 18:28:43,413 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED


2025-01-31 18:28:43,579 INFO: Waiting for log aggregation to finish.


2025-01-31 18:29:02,193 INFO: Execution finished successfully.
