In [None]:
from pathlib import Path
import sys
import datetime
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

from dotenv import load_dotenv
import hopsworks

# 1. Find project root (one level up from notebooks/)
root_dir = Path("..").resolve()

# 2. Add project root to PYTHONPATH so we can import the src package
if str(root_dir) not in sys.path:
    sys.path.append(str(root_dir))

# 3. Load .env from project root
env_path = root_dir / ".env"
load_dotenv(env_path)

# 4. Load settings and utility functions (after adjusting PYTHONPATH)
from src.config import ElectricitySettings
from src import util

settings = ElectricitySettings()

# 5. Log in to Hopsworks and get feature store
project = hopsworks.login()
fs = project.get_feature_store(name='scalableproject_featurestore')


print("Successfully logged in to Hopsworks project:", settings.HOPSWORKS_PROJECT)


ElectricitySettings initialized
2025-12-09 17:21:41,159 INFO: Closing external client and cleaning up certificates.


2025-12-09 17:21:41,164 INFO: Connection closed.
2025-12-09 17:21:41,165 INFO: Initializing external client
2025-12-09 17:21:41,165 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-09 17:21:42,273 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/127
Successfully logged in to Hopsworks project: ScalableProject


In [10]:
# Get the feature groups    
electricity_prices_fg = fs.get_feature_group('electricity_prices', version=1)
weather_hourly_fg = fs.get_feature_group('weather_hourly', version=1)


In [11]:
# Configuration
PRICE_AREA = "SE3"  # Stockholm / Södra Mellansverige
CITY = "Stockholm"
LATITUDE = 59.3251   # Stockholm coordinates
LONGITUDE = 18.0711

#LATITUDE, LONGITUDE = util.get_city_coordinates(CITY)

today = datetime.date.today()

In [12]:
# Using fetch_electricity_prices() from util.py
# Fetch all electricity prices for the date range
# Keep only SEK prices (we do not use EUR or exchange_rate)
df_prices = util.get_today_electricity_prices(PRICE_AREA)
df_prices = df_prices[['timestamp', 'date', 'hour', 'price_area', 'price_sek']]
df_prices.head(24)

# Insert new data
electricity_prices_fg.insert(df_prices, storage="online", wait=True)

Fetching electricity prices from 2025-12-09 to 2025-12-09 for SE3...
Fetched 24 hourly price records across 1 day(s)
2025-12-09 17:21:44,918 INFO: 	2 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://eu-west.cloud.hopsworks.ai:443/p/127/fs/74/fg/28


Uploading Dataframe: 100.00% |██████████| Rows 24/24 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: electricity_prices_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/127/jobs/named/electricity_prices_1_offline_fg_materialization/executions
2025-12-09 17:21:56,550 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-12-09 17:22:02,796 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-12-09 17:23:54,090 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-12-09 17:23:54,265 INFO: Waiting for log aggregation to finish.
2025-12-09 17:24:02,990 INFO: Execution finished successfully.


Online data ingestion progress: 0.00% |          | Rows 0/24

(Job('electricity_prices_1_offline_fg_materialization', 'PYSPARK'),
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_values_to_be_between",
         "kwargs": {
           "column": "hour",
           "min_value": 0,
           "max_value": 23
         },
         "meta": {
           "expectationId": 20
         }
       },
       "result": {
         "element_count": 24,
         "missing_count": 0,
         "missing_percent": 0.0,
         "unexpected_count": 0,
         "unexpected_percent": 0.0,
         "unexpected_percent_total": 0.0,
         "unexpected_percent_nonmissing": 0.0,
         "partial_unexpected_list": []
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2025-12-09T04:21:44.000917Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null


In [None]:
# Fetch and insert tomorrow's day-ahead prices (available after ~13:00)
df_prices_tomorrow = util.get_tomorrow_electricity_prices(PRICE_AREA)
df_prices_tomorrow = df_prices_tomorrow[['timestamp', 'date', 'hour', 'price_area', 'price_sek']]

electricity_prices_fg.insert(df_prices_tomorrow, storage="online", wait=True)
print(f"Inserted tomorrow's prices: {len(df_prices_tomorrow)} rows for {df_prices_tomorrow['date'].iloc[0].date() if len(df_prices_tomorrow) else 'unknown'}")


In [None]:
# Fetch weather forecast for tomorrow and insert
forecast_days = 2  # today + tomorrow
forecast_df = util.get_hourly_weather_forecast(latitude=LATITUDE, longitude=LONGITUDE, city=CITY, forecast_days=forecast_days)
forecast_df['date'] = pd.to_datetime(forecast_df['date'])

tomorrow = today + timedelta(days=1)
forecast_tomorrow = forecast_df[forecast_df['date'].dt.date == tomorrow].copy()

if len(forecast_tomorrow):
    weather_hourly_fg.insert(forecast_tomorrow, storage="online", wait=True)
    print(f"Inserted forecast for tomorrow: {len(forecast_tomorrow)} rows for {tomorrow}")
else:
    print("No forecast rows for tomorrow found in response.")

# Replace/refresh yesterday's forecast with actual outcome
yesterday = today - timedelta(days=1)
actual_weather_yesterday = util.get_hourly_historical_weather(
    latitude=LATITUDE,
    longitude=LONGITUDE,
    start_date=str(yesterday),
    end_date=str(yesterday),
    city=CITY,
)
actual_weather_yesterday['date'] = pd.to_datetime(actual_weather_yesterday['date'])

if len(actual_weather_yesterday):
    weather_hourly_fg.insert(actual_weather_yesterday, storage="online", wait=True)
    print(f"Inserted actual weather for yesterday: {len(actual_weather_yesterday)} rows for {yesterday}")
else:
    print("No actual weather rows for yesterday.")


In [None]:
historical_df = electricity_prices_fg.read()


Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.98s) 


In [18]:
historical_df = historical_df.sort_values("timestamp", ascending=True).reset_index(drop=True)
historical_df.tail(24)

Unnamed: 0,timestamp,date,hour,price_area,price_sek
27179,2025-12-08 23:00:00,2025-12-08,23,SE3,0.39248
27180,2025-12-09 00:00:00,2025-12-09,0,SE3,0.4193
27181,2025-12-09 01:00:00,2025-12-09,1,SE3,0.24445
27182,2025-12-09 02:00:00,2025-12-09,2,SE3,0.23497
27183,2025-12-09 03:00:00,2025-12-09,3,SE3,0.25566
27184,2025-12-09 04:00:00,2025-12-09,4,SE3,0.47096
27185,2025-12-09 05:00:00,2025-12-09,5,SE3,0.70985
27186,2025-12-09 06:00:00,2025-12-09,6,SE3,0.89802
27187,2025-12-09 07:00:00,2025-12-09,7,SE3,1.05347
27188,2025-12-09 08:00:00,2025-12-09,8,SE3,1.11531


In [17]:
ts_local = df_prices['timestamp'].dt.tz_localize('UTC').dt.tz_convert('Europe/Stockholm')
df_prices['date_local'] = ts_local.dt.date
df_prices['hour_local'] = ts_local.dt.hour

print(len(df_prices[df_prices['date_local'] == today]))  # bör vara 24

24
