In [1]:
import sys
from pathlib import Path

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/featurestorebook/mlfs-book.git
    %cd mlfs-book

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("Google Colab environment")
else:
    root_dir = Path().absolute()
    # Strip ~/notebooks/ccfraud from PYTHON_PATH if notebook started in one of these subdirectories
    if root_dir.parts[-1:] == ('energy_price',):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ('notebooks',):
        root_dir = Path(*root_dir.parts[:-1])
    root_dir = str(root_dir) 
    print("Local environment")

# Add the root directory to the `PYTHONPATH` to use the `recsys` Python module from the notebook.
if root_dir not in sys.path:
    sys.path.append(root_dir)
print(f"Added the following directory to the PYTHONPATH: {root_dir}")
    
# Set the environment variables from the file <root_dir>/.env
from mlfs import config
settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")

Local environment
Added the following directory to the PYTHONPATH: /Users/alexanderdahm/Documents/GitHub/mlfs-book-proj
HopsworksSettings initialized!


### <span style='color:#ff5f27'> üìù Imports

In [2]:
import datetime
import time
import requests
import pandas as pd
import hopsworks
from mlfs.energy_price import util
from mlfs import config
import json
import os
import warnings
warnings.filterwarnings("ignore")






In [3]:
class Sensor:
    def __init__(self, name, city, lat, lon, csv=""):
        self.name = name
        self.country = "sweden"
        self.city = city
        self.street = city
        self.url = ""  # no explicit AQICN URL provided for these cities
        self.lat = float(lat)
        self.lon = float(lon)
        self.csv = csv

cities = [
    {"name": "flasjon", "lat": 62.760350390111626, "lon": 13.715986496712969},
    {"name": "hudiksvall", "lat": 61.790862930411194, "lon": 17.15754858778168},
    {"name": "ange", "lat": 62.54989082316923, "lon": 15.751547550392734},
    {"name": "solleftea", "lat": 63.159587742988755, "lon": 17.2655114712721},
    {"name": "umea", "lat": 63.81702480736613, "lon": 20.18691175826482},
]

sensorList = []
for idx, c in enumerate(cities):
    sensorList.append(Sensor(f"sensor{idx}", c["name"], c["lat"], c["lon"], ""))
    

   

for sensor in sensorList:
    print(f"Processing sensor: {sensor.name} located at {sensor.street}, {sensor.city}, {sensor.country} with coordinates ({sensor.lat}, {sensor.lon}), csv path {sensor.csv}")


Processing sensor: sensor0 located at flasjon, flasjon, sweden with coordinates (62.760350390111626, 13.715986496712969), csv path 
Processing sensor: sensor1 located at hudiksvall, hudiksvall, sweden with coordinates (61.790862930411194, 17.15754858778168), csv path 
Processing sensor: sensor2 located at ange, ange, sweden with coordinates (62.54989082316923, 15.751547550392734), csv path 
Processing sensor: sensor3 located at solleftea, solleftea, sweden with coordinates (63.159587742988755, 17.2655114712721), csv path 
Processing sensor: sensor4 located at umea, umea, sweden with coordinates (63.81702480736613, 20.18691175826482), csv path 


In [4]:
project = hopsworks.login(engine="python")
fs = project.get_feature_store() 
secrets = hopsworks.get_secrets_api()
today = datetime.date.today()

2026-01-03 20:07:06,128 INFO: Initializing external client
2026-01-03 20:07:06,128 INFO: Base URL: https://c.app.hopsworks.ai:443
2026-01-03 20:07:07,631 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1290388


### <span style="color:#ff5f27;"> üîÆ Get references to the Feature Groups </span>

In [5]:
# Retrieve feature groups
energy_price_fg = fs.get_feature_group(
    name='energy_price',
    version=1,
)
weather_fg = fs.get_feature_group(
    name='weather',
    version=1,
)


In [6]:
# Collect per-city daily weather dfs to merge into wide format (same as file 1)
all_weather_data = []

# Get new energy price data
price_history_dict = util.get_energy_price(date=None)
energy_price_df = pd.DataFrame(list(price_history_dict.items()), columns=['date', 'sek'])
energy_price_df['sek'] = energy_price_df['sek'].astype('float32')
energy_price_df['zone'] = "SE2"
energy_price_df["date"] = pd.to_datetime(energy_price_df["date"], utc=True).dt.tz_convert(None)
energy_price_df = energy_price_df.sort_values("date").reset_index(drop=True)

print(energy_price_df)
print(energy_price_df.dtypes)


# Loop over each sensor and add their daily data
for sensor in sensorList:
    country = sensor.country
    city = sensor.city
    street = sensor.street
    aqicn_url = sensor.url
    latitude = sensor.lat
    longitude = sensor.lon

    # Get weather data (daily at ~12:00), then rename columns with city suffix to produce wide format
    hourly_df = util.get_hourly_weather_forecast(city, latitude, longitude)
    hourly_df = hourly_df.set_index("date")

    # We will only make 1 daily prediction, so we will replace the hourly forecasts with a single daily forecast
    # We only want the daily weather data, so only get weather at 12:00
    daily_df = hourly_df.between_time('11:59', '12:01')
    daily_df = daily_df.reset_index()
    daily_df['date'] = pd.to_datetime(daily_df['date']).dt.date
    daily_df['date'] = pd.to_datetime(daily_df['date'])
    # Rename columns to include city name (exclude 'date')
    daily_df = daily_df.rename(columns={col: f"{col}_{city}" for col in daily_df.columns if col != "date"})
    all_weather_data.append(daily_df)

energy_price_fg.insert(energy_price_df)


if all_weather_data:
    combined_weather_df = all_weather_data[0]
    for df in all_weather_data[1:]:
        combined_weather_df = pd.merge(combined_weather_df, df, on="date", how="outer")
    weather_fg.insert(combined_weather_df, wait=True)

combined_weather_df


         date        sek zone
0  2025-12-24   5.150000  SE2
1  2025-12-25   1.210000  SE2
2  2025-12-26   2.320000  SE2
3  2025-12-27   1.250000  SE2
4  2025-12-28   2.390000  SE2
5  2025-12-29   2.710000  SE2
6  2025-12-30  31.770000  SE2
7  2025-12-31  63.189999  SE2
8  2026-01-01  13.410000  SE2
9  2026-01-02  23.010000  SE2
10 2026-01-03  62.389999  SE2
11 2026-01-04  93.070000  SE2
date    datetime64[ns]
sek            float32
zone            object
dtype: object
Coordinates 62.75¬∞N 13.75¬∞E
Elevation 478.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 61.75¬∞N 17.25¬∞E
Elevation 65.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 62.5¬∞N 15.75¬∞E
Elevation 165.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 63.25¬∞N 17.25¬∞E
Elevation 66.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 63.75¬∞N 20.25¬∞E
Elevation 18.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
2026-01-03 

Uploading Dataframe: 100.00% |‚ñà| Rows 12/12 | Elapsed Time: 00:00 | Remaining Ti


Launching job: energy_price_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1290388/jobs/named/energy_price_1_offline_fg_materialization/executions


Uploading Dataframe: 100.00% |‚ñà| Rows 7/7 | Elapsed Time: 00:00 | Remaining Time


Launching job: weather_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1290388/jobs/named/weather_1_offline_fg_materialization/executions
2026-01-03 20:07:39,000 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2026-01-03 20:07:42,229 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2026-01-03 20:09:12,219 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2026-01-03 20:09:12,396 INFO: Waiting for log aggregation to finish.
2026-01-03 20:09:21,087 INFO: Execution finished successfully.


Unnamed: 0,date,temperature_2m_mean_flasjon,precipitation_sum_flasjon,wind_speed_10m_max_flasjon,wind_direction_10m_dominant_flasjon,temperature_2m_mean_hudiksvall,precipitation_sum_hudiksvall,wind_speed_10m_max_hudiksvall,wind_direction_10m_dominant_hudiksvall,temperature_2m_mean_ange,...,wind_speed_10m_max_ange,wind_direction_10m_dominant_ange,temperature_2m_mean_solleftea,precipitation_sum_solleftea,wind_speed_10m_max_solleftea,wind_direction_10m_dominant_solleftea,temperature_2m_mean_umea,precipitation_sum_umea,wind_speed_10m_max_umea,wind_direction_10m_dominant_umea
0,2026-01-03,-15.85,0.0,7.072878,14.743609,-8.95,0.8,27.002399,359.236115,-12.85,...,14.408997,12.99463,-14.8,0.0,10.805999,358.090881,-13.25,0.0,22.608458,9.16228
1,2026-01-04,-20.549999,0.0,5.771239,273.576263,-5.55,0.0,21.203358,40.179176,-13.45,...,7.28055,351.469299,-17.35,0.0,7.636753,315.000092,-13.75,0.0,15.034041,343.30069
2,2026-01-05,-20.0,0.0,6.214563,259.992096,-17.299999,0.0,8.669949,274.76355,-21.799999,...,5.241679,254.054535,-23.450001,0.0,3.563818,315.000092,-13.15,0.0,10.24578,288.435028
3,2026-01-06,-22.049999,0.0,2.16,270.0,-6.5,0.1,13.684735,1.5074,-16.549999,...,0.509117,315.000092,-17.049999,0.0,3.893995,303.690094,-15.85,0.0,13.493999,9.210952
4,2026-01-07,-14.35,0.3,8.78872,34.9921,-7.2,0.0,4.213692,199.9832,-10.0,...,11.113451,24.904745,-9.75,0.2,17.072504,42.436188,-9.7,0.1,19.826164,29.357658
5,2026-01-08,-10.85,0.2,8.350138,82.568687,-0.6,0.0,13.570615,158.198532,-5.25,...,10.182337,98.13002,-3.9,0.3,8.699793,114.443947,-3.55,0.5,20.883102,125.882233
6,2026-01-09,-6.75,0.5,16.595179,77.471199,-1.95,0.5,1.13842,341.564941,-3.7,...,20.545246,78.887009,-5.85,0.1,20.063339,80.706772,-8.55,0.0,20.316889,97.124924


---