# Feature Pipeline

### Imports

In [1]:
import requests
import pandas as pd

#ignore warnings
import warnings
warnings.filterwarnings('ignore')

### Parsing new data

#### Weather measurements from Open Meteo

In [2]:
#url = ("https://archive-api.open-meteo.com/v1/archive?latitude=57.048&longitude=9.9187&start_date=2022-01-01&end_date=2023-12-31&hourly=temperature_2m,relative_humidity_2m,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_gusts_10m")


In [3]:
url = ('https://archive-api.open-meteo.com/v1/archive?latitude=57.048&longitude=9.9187&start_date=2024-01-01&end_date=2024-04-08&hourly=temperature_2m,relative_humidity_2m,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_gusts_10m&timezone=auto')

In [4]:
response = requests.get(url)
print(response)

data = response.json()

weather_df = pd.DataFrame(data['hourly'])

<Response [200]>


In [5]:
weather_df['date'] = weather_df['time'].str[:10]

new_column = weather_df.pop('date')
weather_df.insert(0, 'date', new_column)

# Convert string 'date' column to DATE type
weather_df['date'] = pd.to_datetime(weather_df['date'], format='%Y-%m-%d').dt.date

weather_df['time'] = pd.to_datetime(weather_df['time'])

In [6]:
weather_df

Unnamed: 0,date,time,temperature_2m,relative_humidity_2m,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_gusts_10m
0,2024-01-01,2024-01-01 00:00:00,4.8,95,1.8,1.8,0.0,61,100,23.6,49.0
1,2024-01-01,2024-01-01 01:00:00,4.9,95,1.2,1.2,0.0,55,100,21.6,43.2
2,2024-01-01,2024-01-01 02:00:00,4.8,96,0.6,0.6,0.0,53,100,18.4,39.2
3,2024-01-01,2024-01-01 03:00:00,4.3,96,0.8,0.8,0.0,53,100,16.7,33.8
4,2024-01-01,2024-01-01 04:00:00,4.4,97,0.3,0.3,0.0,51,100,15.4,30.2
...,...,...,...,...,...,...,...,...,...,...,...
2371,2024-04-08,2024-04-08 19:00:00,14.9,70,0.0,0.0,0.0,3,100,1.3,15.1
2372,2024-04-08,2024-04-08 20:00:00,13.6,72,0.0,0.0,0.0,3,100,4.1,5.8
2373,2024-04-08,2024-04-08 21:00:00,12.6,77,0.0,0.0,0.0,3,100,4.0,6.1
2374,2024-04-08,2024-04-08 22:00:00,11.2,85,0.0,0.0,0.0,1,29,6.2,7.2


#### Electricity prices per day per hour from Energidata

In [7]:
url = ('https://api.energidataservice.dk/dataset/Elspotprices?offset=0&start=2024-01-01T00:00&end=2024-04-08T00:00&filter=%7B%22PriceArea%22:[%22DK1%22]%7D&sort=HourUTC%20DESC')

In [8]:
data = requests.get(url).json()
electricity_df = pd.DataFrame(data['records'])

In [9]:
electricity_df.drop('SpotPriceEUR', axis=1, inplace=True)
electricity_df['SpotPriceDKK_KWH'] = electricity_df['SpotPriceDKK'] / 1000
electricity_df.drop('HourUTC', axis=1, inplace=True)
electricity_df.drop('SpotPriceDKK', axis=1, inplace=True)
electricity_df.rename(columns={'HourDK': 'time'}, inplace=True)
electricity_df['time'] = electricity_df['time'].astype(str).str[:-3]
electricity_df

Unnamed: 0,time,PriceArea,SpotPriceDKK_KWH
0,2024-04-07T23:00,DK1,0.31886
1,2024-04-07T22:00,DK1,0.34078
2,2024-04-07T21:00,DK1,0.35958
3,2024-04-07T20:00,DK1,0.35645
4,2024-04-07T19:00,DK1,0.34399
...,...,...,...
2346,2024-01-01T04:00,DK1,-0.00022
2347,2024-01-01T03:00,DK1,0.03086
2348,2024-01-01T02:00,DK1,0.19874
2349,2024-01-01T01:00,DK1,0.20978


In [10]:
electricity_df['date'] = electricity_df['time'].str[:10]

new_column = electricity_df.pop('date')
electricity_df.insert(0, 'date', new_column)

# Convert string 'date' column to DATE type
electricity_df['date'] = pd.to_datetime(electricity_df['date'], format='%Y-%m-%d').dt.date

electricity_df['time'] = pd.to_datetime(electricity_df['time'])
electricity_df

Unnamed: 0,date,time,PriceArea,SpotPriceDKK_KWH
0,2024-04-07,2024-04-07 23:00:00,DK1,0.31886
1,2024-04-07,2024-04-07 22:00:00,DK1,0.34078
2,2024-04-07,2024-04-07 21:00:00,DK1,0.35958
3,2024-04-07,2024-04-07 20:00:00,DK1,0.35645
4,2024-04-07,2024-04-07 19:00:00,DK1,0.34399
...,...,...,...,...
2346,2024-01-01,2024-01-01 04:00:00,DK1,-0.00022
2347,2024-01-01,2024-01-01 03:00:00,DK1,0.03086
2348,2024-01-01,2024-01-01 02:00:00,DK1,0.19874
2349,2024-01-01,2024-01-01 01:00:00,DK1,0.20978


### Connecting to Hopsworks Feature Store

In [11]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.







Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/550040
Connected. Call `.close()` to terminate connection gracefully.


In [12]:
# Retrieve feature groups
weather_fg = fs.get_feature_group(
    name="weather_measurements",
    version=1,
)

electricity_fg = fs.get_feature_group(
    name="electricity_prices",
    version=1,
)

### Uploading new data to the Feature Store

In [13]:
# Insert data
weather_fg.insert(weather_df)

Uploading Dataframe: 100.00% |██████████| Rows 2376/2376 | Elapsed Time: 00:07 | Remaining Time: 00:00


Launching job: weather_measurements_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/550040/jobs/named/weather_measurements_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x13478ded0>, None)

In [14]:
# Insert data
electricity_fg.insert(electricity_df)

Uploading Dataframe: 100.00% |██████████| Rows 2351/2351 | Elapsed Time: 00:06 | Remaining Time: 00:00


Launching job: electricity_prices_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/550040/jobs/named/electricity_prices_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x134728210>, None)