In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd

import time
import json
import requests
from datetime import datetime

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", 100)
pd.options.display.float_format = '{:.2f}'.format
np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})

In [3]:
#I. PIPELINE

## accessing the open energy data from Energinet Denmark, through their public API
def get_PowerSystem_data():

    url = 'https://api.energidataservice.dk/dataset/PowerSystemRightNow?start=now-P1Y&end=now&sort=Minutes1DK'
    response = requests.get(url)

    if response.status_code != 200:
        print("Request failed with status code:", response.status_code)

    selected_records = response.json()['records'][::5]
    df = pd.DataFrame(selected_records).fillna(0)

    total_generation = df.ProductionGe100MW + df.ProductionLt100MW + df.SolarPower + df.OffshoreWindPower + df.OnshoreWindPower

    net_imports = df.Exchange_DK1_DE + df.Exchange_DK1_NL + df.Exchange_DK1_GB + df.Exchange_DK1_NO + df.Exchange_DK1_SE + \
                    df.Exchange_DK1_DK2 + df.Exchange_DK2_DE + df.Exchange_DK2_SE + df.Exchange_Bornholm_SE

    imbalance = df.ImbalanceDK1 + df.ImbalanceDK2

    df['Demand'] = total_generation + net_imports - imbalance
    df['Renewables'] = df.SolarPower + df.OffshoreWindPower + df.OnshoreWindPower
    actionable = df['Demand'] - df['Renewables']

    return df

In [4]:
data = get_PowerSystem_data()
data.tail(5)

Unnamed: 0,Minutes1UTC,Minutes1DK,CO2Emission,ProductionGe100MW,ProductionLt100MW,SolarPower,OffshoreWindPower,OnshoreWindPower,Exchange_Sum,Exchange_DK1_DE,Exchange_DK1_NL,Exchange_DK1_GB,Exchange_DK1_NO,Exchange_DK1_SE,Exchange_DK1_DK2,Exchange_DK2_DE,Exchange_DK2_SE,Exchange_Bornholm_SE,aFRR_ActivatedDK1,aFRR_ActivatedDK2,mFRR_ActivatedDK1,mFRR_ActivatedDK2,ImbalanceDK1,ImbalanceDK2,Demand,Renewables
105006,2024-11-04T09:48:00,2024-11-04T10:48:00,146.36,1207.0,475.01,458.26,60.18,90.48,2758.88,-357.19,691.16,-228.61,1587.79,-10.0,34.38,-69.8,1133.75,11.78,-73.5,-37.82,-497.58,-79.82,276.12,197.48,4610.59,608.92
105007,2024-11-04T09:53:00,2024-11-04T10:53:00,140.16,1152.31,467.21,477.47,56.99,91.02,2809.65,-402.45,691.16,-228.61,1594.7,124.0,-46.74,-120.44,1139.31,11.98,-63.68,-45.6,-460.76,-76.64,378.55,191.62,4437.74,625.48
105008,2024-11-04T09:58:00,2024-11-04T10:58:00,133.19,1124.58,468.77,471.21,69.77,90.06,2794.14,-442.82,691.31,-258.73,1600.74,258.0,-131.19,-210.28,1144.35,11.57,-64.4,-45.69,-423.58,-73.82,353.04,200.55,4333.75,631.04
105009,2024-11-04T10:03:00,2024-11-04T11:03:00,127.18,1090.08,463.22,458.71,62.2,87.72,2858.39,-368.18,688.25,-311.42,1608.69,387.0,-210.03,-299.13,1141.68,11.5,42.4,-59.6,-284.99,-6.41,21.77,140.41,4648.11,608.63
105010,2024-11-04T10:08:00,2024-11-04T11:08:00,121.61,1047.32,446.41,448.76,63.1,76.89,2965.63,-313.57,688.25,-334.0,1618.28,523.0,-293.24,-388.19,1160.66,11.2,29.08,-37.87,-247.76,-3.64,-110.67,106.24,4759.3,588.75


In [None]:
#II. PREDICT RENEWABLE ENERGY PRODUCTION

##demand forecast

from xgboost import XGBRegressor
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split, GridSearchCV

In [None]:
def predict_renewables(data):

    series = data.Renewables

    window_size = 10
    n_steps = 5

    ## function to create features and labels
    def create_multi_step_features(data, window_size, n_steps):
        X, y = [], []
        for i in range(len(data) - window_size - n_steps):
            X.append(data[i:i + window_size])
            y.append(data[i + window_size:i + window_size + n_steps])
        return np.array(X), np.array(y)

    ## create the features and labels
    X, y = create_multi_step_features(series, window_size, n_steps)

    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    param_grid = {
        'n_estimators': [50, 100, 150],
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [4, 6, 8],
        'subsample': [0.8, 1.0],
        'colsample_bytree': [0.8, 1.0]
    }

    print("Renewables Forecast XGBoost results:")

    best_xgb_models = []
    for step in range(n_steps):
        xgb_model = XGBRegressor(objective='reg:squarederror')
        grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, 
                                   scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)
        
        grid_search.fit(X_train, y_train[:, step])

        best_xgb_model = grid_search.best_estimator_
        best_xgb_models.append(best_xgb_model)

        print(f"Best parameters for step {step+1}: {grid_search.best_params_}")

    y_pred = []
    for best_xgb_model in best_xgb_models:
        y_pred.append(best_xgb_model.predict(X_test))

    y_pred = np.array(y_pred).T

    for step in range(n_steps):
        mse = root_mean_squared_error(y_test[:, step], y_pred[:, step])
        print(f"Mean Squared Error for step {step+1}: {mse}")

    print(" "*20)
    print("*"*20)

    return best_xgb_models

In [9]:
from joblib import dump
dump(predict_renewables(data), 'XGBoost_regressor_renewables.joblib')
print("Model saved successfully.")

Renewables Forecast XGBoost results:
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters for step 1: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 150, 'subsample': 0.8}
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters for step 2: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 150, 'subsample': 1.0}
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters for step 3: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 100, 'subsample': 1.0}
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters for step 4: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 150, 'subsample': 1.0}
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters for step 5: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 100, 'subsample'