## Load imports

In [47]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import hopsworks
from datetime import datetime, timedelta, date
from entsoe import EntsoePandasClient
import time

#### Helper functions (timestamp)

In [48]:
# # functions for replacing date and time with timestamp (seconds since 1970-01-01)

# def entsoe_timestamp_2_time(x):
#     dt_obj = datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S')
#     dt_obj = dt_obj.timestamp() * 1000
#     return int(dt_obj)

# def weather_timestamp_2_time(x, i):
#     dt_obj = datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S')
#     dt_obj = dt_obj + timedelta(hours=i)
#     dt_obj = dt_obj.timestamp() * 1000

#     return int(dt_obj)

## Fetch & Parse data

In [77]:
## Get current date and time for prediction, prediction and actual data is available 

#used to retrieve earlier dates
date_from = "20230102"

#date_from = datetime.now() - timedelta(days=1)
#date_from = date_from.date().strftime('%Y%m%d')
date_to = (datetime.strptime(date_from, '%Y%m%d') + timedelta(days=1)).strftime('%Y%m%d')

# time
# time = datetime.now().time().strftime('%H')
date_from, date_to

('20230102', '20230103')

### Entsoe API

In [78]:
# Client
client = EntsoePandasClient(api_key="cb3a29b2-3276-4a4c-aba3-6507120d99be")

# Date and country
start = pd.Timestamp(date_from, tz='Europe/Stockholm')
end = pd.Timestamp(date_to, tz='Europe/Stockholm')
country_code = 'SE_3'  

In [79]:
## Query entsoe

# Day price
df_day_price = client.query_day_ahead_prices(country_code, start=start,end=end)

# Generation per production type
df_generation_per_prod = client.query_generation(country_code, start=start,end=end, psr_type=None)

# Actual load (consumption)
df_load = client.query_load(country_code, start=start,end=end)

In [80]:
# Combine entsoe data
df_entsoe = df_generation_per_prod.join(df_day_price.rename("day_ahead_price"))
df_entsoe = df_entsoe.join(df_load)

In [81]:
# convert current index (date) into column, rename and convert into timestamp (as int64)
df_entsoe_clean = df_entsoe.reset_index()
df_entsoe_clean = df_entsoe_clean.rename(columns = {'index':'DateTime'})
df_entsoe_clean['DateTime'] = df_entsoe_clean.DateTime.values.astype('int64') // 10 ** 6  ## divide by 10^6 to convert from ns to ms


In [82]:
df_entsoe_clean.head() # gmt + 1

Unnamed: 0,DateTime,Fossil Gas,Hydro Water Reservoir,Nuclear,Other,Solar,Wind Onshore,day_ahead_price,Actual Load
0,1672614000000,0.0,597.0,5798.0,557.0,0.0,869.0,57.91,9632.0
1,1672617600000,0.0,578.0,5797.0,583.0,0.0,930.0,51.67,9491.0
2,1672621200000,0.0,558.0,5796.0,578.0,0.0,926.0,52.86,9388.0
3,1672624800000,0.0,553.0,5798.0,579.0,0.0,912.0,44.16,9290.0
4,1672628400000,0.0,552.0,5796.0,585.0,0.0,871.0,50.08,9256.0


### SMHI

In [83]:
import json
from urllib.request import urlopen
from pandas import json_normalize

In [84]:
## fetch data
url = "https://opendata-download-metobs.smhi.se/api/version/latest/parameter/1/station/71420/period/latest-months/data.json"
response = urlopen(url)

# convert response to json, to dataframe
data_json = json.loads(response.read())
df_smhi_data = json_normalize(data_json['value']) 

# get timestamps the specified day (or latest)
timeseries_from = df_entsoe_clean["DateTime"].iloc[0]
timeseries_to = df_entsoe_clean["DateTime"].iloc[-1]

# #extract only the temperature in the time stamp interval
df_smhi_data = df_smhi_data.loc[(df_smhi_data['date'] >= timeseries_from) & (df_smhi_data['date'] <= timeseries_to)]
df_smhi_data = df_smhi_data.reset_index().rename(columns = {'date':'DateTime'})

#df2.head()
# data = json.load(url)
# temp = data['value']
# temp
#df_smhi_data = df_smhi_data.iloc[:25, ::]
#int(datetime.strptime(date_from, '%Y%m%d').timestamp() * 1000)
#timeseries_to = int((datetime.strptime(date_from, '%Y%m%d') timedelta(hours=i) .timestamp() * 1000)


In [85]:
df_smhi_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   index     24 non-null     int64 
 1   DateTime  24 non-null     int64 
 2   value     24 non-null     object
 3   quality   24 non-null     object
dtypes: int64(2), object(2)
memory usage: 896.0+ bytes


## Combine & clean final data

In [86]:
df_feature_data = df_entsoe_clean.merge(df_smhi_data, how='inner', on='DateTime')
df_feature_data.drop(["Fossil Gas", "index", "quality"], axis=1, inplace=True)
df_feature_data["value"] = df_feature_data["value"].astype(float)
df_feature_data.rename(columns={"Hydro Water Reservoir": "hydro_water_reservoir", 
                                         "Wind Onshore": "wind_onshore", 
                                         "Actual Load": "total_load",
                                         "value": "temperature"}, inplace=True)
df_feature_data.head()

Unnamed: 0,DateTime,hydro_water_reservoir,Nuclear,Other,Solar,wind_onshore,day_ahead_price,total_load,temperature
0,1672614000000,597.0,5798.0,557.0,0.0,869.0,57.91,9632.0,7.4
1,1672617600000,578.0,5797.0,583.0,0.0,930.0,51.67,9491.0,7.5
2,1672621200000,558.0,5796.0,578.0,0.0,926.0,52.86,9388.0,7.4
3,1672624800000,553.0,5798.0,579.0,0.0,912.0,44.16,9290.0,7.7
4,1672628400000,552.0,5796.0,585.0,0.0,871.0,50.08,9256.0,7.6


## Add to feature group

In [87]:
df_feature_data

Unnamed: 0,DateTime,hydro_water_reservoir,Nuclear,Other,Solar,wind_onshore,day_ahead_price,total_load,temperature
0,1672614000000,597.0,5798.0,557.0,0.0,869.0,57.91,9632.0,7.4
1,1672617600000,578.0,5797.0,583.0,0.0,930.0,51.67,9491.0,7.5
2,1672621200000,558.0,5796.0,578.0,0.0,926.0,52.86,9388.0,7.4
3,1672624800000,553.0,5798.0,579.0,0.0,912.0,44.16,9290.0,7.7
4,1672628400000,552.0,5796.0,585.0,0.0,871.0,50.08,9256.0,7.6
5,1672632000000,597.0,5796.0,638.0,0.0,830.0,70.73,9402.0,7.5
6,1672635600000,691.0,5796.0,745.0,0.0,797.0,102.56,10154.0,7.5
7,1672639200000,840.0,5797.0,794.0,0.0,754.0,138.19,11148.0,7.1
8,1672642800000,882.0,5795.0,861.0,0.0,638.0,145.98,11597.0,7.2
9,1672646400000,911.0,5795.0,830.0,0.0,515.0,147.05,11628.0,6.8


In [72]:
import hopsworks

project = hopsworks.login() 
fs = project.get_feature_store() 

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/4247
Connected. Call `.close()` to terminate connection gracefully.


In [88]:
new_electricity_data_fg = fs.get_or_create_feature_group(name = 'new_electricity_data_fg', version = 1)

In [89]:
new_electricity_data_fg.insert(df_feature_data)

Uploading Dataframe: 100.00% |██████████| Rows 24/24 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/4247/jobs/named/new_electricity_data_fg_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7fad00bc7670>, None)

### Modal script for future daily features
For retrieving daily data through scheduled scripts, Modal is used in which the following function is uploaded and sheduled to run on hourly basis

In [None]:
import os
import modal
    
LOCAL=False

if LOCAL == False:
   stub = modal.Stub()
   image = modal.Image.debian_slim().pip_install(["hopsworks==3.0.4","joblib","seaborn","sklearn","dataframe-image"]) 

   @stub.function(image=image, schedule=modal.Period(days=1), secret=modal.Secret.from_name("abyel-hopsworks-secret"))
   def f():
       g()

def get_entsoe_data():
    # Day price
    df_day_price = client.query_day_ahead_prices(country_code, start=start,end=end)
    df_generation_per_prod = client.query_generation(country_code, start=start,end=end, psr_type=None)
    df_load = client.query_load(country_code, start=start,end=end)
    
    df_entsoe = df_generation_per_prod.join(df_day_price.rename("day_ahead_price"))
    df_entsoe = df_entsoe.join(df_load)
    df_entsoe_clean = df_entsoe.reset_index()
    df_entsoe_clean = df_entsoe_clean.rename(columns = {'index':'DateTime'})
    df_entsoe_clean['DateTime'] = df_entsoe_clean.DateTime.values.astype('int64') // 10 ** 6  ## divide by 10^6 to convert from ns to ms

def get_prediction_data():
    """
    Fetches the recent rediction and actual data for electricity price and weather
    """
    import pandas as pd
    import random

    # random_pclass = random.randint(1, 3)
    # random_sex = random.randint(0, 1)
    # random_age = random.randint(0, 5) # [0,1,2,3,4,5]
    # random_embarked = random.randint(0, 2)

    # passenger_df = pd.DataFrame({ "passengerid": [passenger_id],
    #                             "pclass": [random_pclass],
    #                             "sex": [random_sex],
    #                             "age": [random_age],
    #                             "embarked": [random_embarked],
    #                   })
    # passenger_df['survived'] = survived
    
    return passenger_df


def g():
    import hopsworks
    import pandas as pd

    project = hopsworks.login()
    fs = project.get_feature_store()

    electricity_data_fg = fs.get_feature_group(name="electricity_data_updated_fg", version=1)    

    # date to fetch data


    new_electricity_df = get_prediction_data(passenger_id)

    print(electricity_data_fg.head(5))
   
    
    electricity_data_fg.insert(electricity_data_fg, write_options={"wait_for_job" : False})

if __name__ == "__main__":
    if LOCAL == True :
        g()
    else:
        with stub.run():
            f()

### Old code for uploading model

In [None]:
import hopsworks

project = hopsworks.login() 

fs = project.get_feature_store() 

Copy your Api Key (first register/login): https://c.app.hopsworks.ai/account/api/generated

Paste it here: ··········
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/4247
Connected. Call `.close()` to terminate connection gracefully.




Connected. Call `.close()` to terminate connection gracefully.


In [None]:
model = mr.sklearn.create_model(
    name="lstm_model",
    description="LSTM test.",
)

In [None]:
model.save('/content/first_model.pickle')

  0%|          | 0/6 [00:00<?, ?it/s]

Model created, explore it at https://c.app.hopsworks.ai:443/p/4247/models/lstm_model/1


Model(name: 'lstm_model', version: 1)