## Daily feature pipeline

### <span style='color:#ff5f27'> 📝 Imports

In [1]:
import datetime
import time
import requests
import pandas as pd
import hopsworks
from functions import util
from functions import fetch_data
import json
import os
import warnings
warnings.filterwarnings("ignore")

In [None]:
# with open('../data/keys/hopsworks-api-key.txt', 'r') as file:
#     os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()

project = hopsworks.login(project="ML_Project_Electricity", api_key_value=os.environ["HOPSWORKS_API_KEY"])
fs = project.get_feature_store() 
# secrets = util.secrets_api(project.name)
print("Project name:", project.name)


2025-01-08 14:07:50,519 INFO: Initializing external client
2025-01-08 14:07:50,519 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-08 14:07:50,519 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-08 14:07:51,696 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1207495
Project name: ML_Project_Electricity


### <span style="color:#ff5f27;"> 🔮 Get references to the Feature Groups </span>

In [6]:
# Retrieve feature groups
sthlm_weather_fg = fs.get_feature_group(
    name='stockholm_weather',
    version=1,
)
malmo_weather_fg = fs.get_feature_group(
    name='malmo_weather',
    version=1,
)

se3_fg = fs.get_feature_group(
    name='se3_electricity_prices',
    version=1,
)

se4_fg = fs.get_feature_group(
    name='se4_electricity_prices',
    version=1,
)

---

## Retrieve the most recent electricity price data (for tomorrow)

In [7]:
# Use the function get_tomorrows_electricity_prices 
# from the fetch_data module to get the electricity prices for tomorrow

se3_current_prices = fetch_data.get_tomorrows_electricity_prices('SE3')
se4_current_prices = fetch_data.get_tomorrows_electricity_prices('SE4')


se4_current_prices.head()


Unnamed: 0,time,pricearea,spotpriceeur
0,2025-01-08 23:00:00+00:00,SE4,33.87
1,2025-01-09 00:00:00+00:00,SE4,31.84
2,2025-01-09 01:00:00+00:00,SE4,31.32
3,2025-01-09 02:00:00+00:00,SE4,31.89
4,2025-01-09 03:00:00+00:00,SE4,33.69


In [8]:
# Read the feature groups into pandas dataframes
se3_df = se3_fg.read()
se4_df = se4_fg.read()

sthlm_weather_df = sthlm_weather_fg.read()
malmo_weather_df = malmo_weather_fg.read()

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.02s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.20s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.78s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.48s) 


In [9]:
# Merge the historical electricity prices with the most recent prices
se3_df = pd.concat([se3_df, se3_current_prices], axis=0)
se4_df = pd.concat([se4_df, se4_current_prices], axis=0)

# sort the dataframes by time
se3_df = se3_df.sort_values('time')
se4_df = se4_df.sort_values('time')

# Calculate a rolling average of the electricity prices of the last 7 days
se3_df['spot_price_rolling'] = se3_df['spotpriceeur'].rolling(window=24*7).mean()
se4_df['spot_price_rolling'] = se4_df['spotpriceeur'].rolling(window=24*7).mean()

# Calculate a rolling average of the electricity prices of the last 3 hours
se3_df['spot_price_rolling_3h'] = se3_df['spotpriceeur'].rolling(window=3).mean()
se4_df['spot_price_rolling_3h'] = se4_df['spotpriceeur'].rolling(window=3).mean()

# convert to datetime
# se3_df['time'] = pd.to_datetime(se3_df['time'])
# se4_df['time'] = pd.to_datetime(se4_df['time'])


In [10]:
se3_df.tail()

Unnamed: 0,time,pricearea,spotpriceeur,spot_price_rolling,spot_price_rolling_3h
19,2025-01-09 18:00:00+00:00,SE3,127.59,22.614167,134.183333
20,2025-01-09 19:00:00+00:00,SE3,119.99,23.247917,128.956667
21,2025-01-09 20:00:00+00:00,SE3,102.52,23.808036,116.7
22,2025-01-09 21:00:00+00:00,SE3,97.85,24.362738,106.786667
23,2025-01-09 22:00:00+00:00,SE3,68.53,24.746369,89.633333


## Insert the newly retrieved values into the feature groups

In [11]:
# Insert the new electricity prices into the feature store
se3_fg.insert(se3_df.tail(24))
se4_fg.insert(se4_df.tail(24))

Uploading Dataframe: 100.00% |██████████| Rows 24/24 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: se3_electricity_prices_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1207495/jobs/named/se3_electricity_prices_1_offline_fg_materialization/executions


Uploading Dataframe: 100.00% |██████████| Rows 24/24 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: se4_electricity_prices_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1207495/jobs/named/se4_electricity_prices_1_offline_fg_materialization/executions


(Job('se4_electricity_prices_1_offline_fg_materialization', 'SPARK'), None)

## Retrieve fresh weather data

In [12]:
sthlm_forecast_df = fetch_data.get_hourly_weather_forecast(59.3294, 18.0687) #Stockholm
malmo_forecast_df = fetch_data.get_hourly_weather_forecast(55.6059, 13.0007) #Malmo

# Drop nan values
sthlm_forecast_df = sthlm_forecast_df.dropna()
malmo_forecast_df = malmo_forecast_df.dropna()

malmo_forecast_df

Coordinates 59.32889938354492°N 18.072357177734375°E
Elevation 24.0 m asl
Timezone b'Europe/Berlin' b'CET'
Timezone difference to GMT+0 3600 s
Coordinates 55.60652542114258°N 13.002044677734375°E
Elevation 12.0 m asl
Timezone b'Europe/Berlin' b'CET'
Timezone difference to GMT+0 3600 s


Unnamed: 0,time,temperature,precipitation,cloud_cover,wind_speed_10m,date,sunshine_duration,weekday,month,hour
0,2025-01-07 23:00:00+00:00,3.524,0.0,100,34.200001,2025-01-07,0.000000,1,1,23
1,2025-01-08 00:00:00+00:00,3.874,0.0,100,36.360001,2025-01-08,2285.459717,2,1,0
2,2025-01-08 01:00:00+00:00,3.724,0.0,100,37.079998,2025-01-08,2285.459717,2,1,1
3,2025-01-08 02:00:00+00:00,3.674,0.0,100,35.639999,2025-01-08,2285.459717,2,1,2
4,2025-01-08 03:00:00+00:00,3.624,0.0,100,34.919998,2025-01-08,2285.459717,2,1,3
...,...,...,...,...,...,...,...,...,...,...
140,2025-01-13 19:00:00+00:00,4.100,0.1,100,27.609911,2025-01-13,0.000000,0,1,19
141,2025-01-13 20:00:00+00:00,4.100,0.1,100,28.916763,2025-01-13,0.000000,0,1,20
142,2025-01-13 21:00:00+00:00,4.150,0.1,100,29.869154,2025-01-13,0.000000,0,1,21
143,2025-01-13 22:00:00+00:00,4.350,0.0,100,29.871325,2025-01-13,0.000000,0,1,22


## Insert weather forecast data into weather featuregroups

In [13]:
# Insert the new weather forecast into the feature store
sthlm_weather_fg.insert(sthlm_forecast_df)
malmo_weather_fg.insert(malmo_forecast_df)

Uploading Dataframe: 100.00% |██████████| Rows 145/145 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: stockholm_weather_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1207495/jobs/named/stockholm_weather_1_offline_fg_materialization/executions


Uploading Dataframe: 100.00% |██████████| Rows 145/145 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: malmo_weather_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1207495/jobs/named/malmo_weather_1_offline_fg_materialization/executions


(Job('malmo_weather_1_offline_fg_materialization', 'SPARK'), None)

## END