In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import time
import json
import requests
from datetime import datetime

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", 100)
pd.options.display.float_format = '{:.2f}'.format
np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})
sns.set_theme()

In [7]:
#I. PIPELINE

## accessing the open energy data from Energinet Denmark, through their public API
def get_PowerSystem_data():

    url = 'https://api.energidataservice.dk/dataset/PowerSystemRightNow?start=now-P1Y&end=now&sort=Minutes1DK'
    response = requests.get(url)

    if response.status_code != 200:
        print("Request failed with status code:", response.status_code)

    selected_records = response.json()['records'][::5]
    df = pd.DataFrame(selected_records).fillna(0)

    total_generation = df.ProductionGe100MW + df.ProductionLt100MW + df.SolarPower + df.OffshoreWindPower + df.OnshoreWindPower

    net_imports = df.Exchange_DK1_DE + df.Exchange_DK1_NL + df.Exchange_DK1_GB + df.Exchange_DK1_NO + df.Exchange_DK1_SE + \
                    df.Exchange_DK1_DK2 + df.Exchange_DK2_DE + df.Exchange_DK2_SE + df.Exchange_Bornholm_SE

    imbalance = df.ImbalanceDK1 + df.ImbalanceDK2

    df['Demand'] = total_generation + net_imports - imbalance
    df['Renewables'] = df.SolarPower + df.OffshoreWindPower + df.OnshoreWindPower
    actionable = df['Demand'] - df['Renewables']

    return df

In [8]:
data = get_PowerSystem_data()
data.tail(5)

Unnamed: 0,Minutes1UTC,Minutes1DK,CO2Emission,ProductionGe100MW,ProductionLt100MW,SolarPower,OffshoreWindPower,OnshoreWindPower,Exchange_Sum,Exchange_DK1_DE,Exchange_DK1_NL,Exchange_DK1_GB,Exchange_DK1_NO,Exchange_DK1_SE,Exchange_DK1_DK2,Exchange_DK2_DE,Exchange_DK2_SE,Exchange_Bornholm_SE,aFRR_ActivatedDK1,aFRR_ActivatedDK2,mFRR_ActivatedDK1,mFRR_ActivatedDK2,ImbalanceDK1,ImbalanceDK2,Demand,Renewables
105006,2024-11-04T09:14:00,2024-11-04T10:14:00,156.0,1290.68,509.6,421.25,38.66,88.79,2650.84,-355.05,690.77,-228.61,1543.42,13.0,0.0,-69.89,1045.49,11.71,-0.28,-35.94,-539.26,-67.23,260.48,277.03,4462.31,548.7
105007,2024-11-04T09:19:00,2024-11-04T10:19:00,154.97,1292.53,497.89,435.3,47.53,90.76,2656.9,-384.26,690.77,-228.61,1581.73,-80.0,69.43,-69.99,1134.97,12.29,-7.85,-28.35,-528.4,-72.6,350.97,205.73,4533.64,573.59
105008,2024-11-04T09:24:00,2024-11-04T10:24:00,154.9,1295.79,499.79,440.0,38.37,90.3,2663.49,-392.2,690.77,-228.61,1581.55,-80.0,80.3,-69.91,1150.11,11.78,-0.34,-29.26,-520.0,-81.0,346.38,201.98,4559.68,568.67
105009,2024-11-04T09:29:00,2024-11-04T10:29:00,154.52,1289.15,499.47,441.55,41.88,89.21,2696.37,-378.66,690.77,-228.61,1581.36,-80.0,79.11,-69.94,1170.12,11.33,-32.04,-29.2,-520.0,-81.0,307.2,184.62,4644.92,572.64
105010,2024-11-04T09:34:00,2024-11-04T10:34:00,153.93,1285.88,494.59,446.31,49.78,92.24,2678.41,-385.06,690.77,-228.61,1581.18,-80.0,80.15,-69.9,1157.94,12.09,-20.2,-29.52,-520.0,-81.0,354.5,178.82,4594.04,588.33


In [None]:
#II. PREDICT DEMAND

from xgboost import XGBRegressor
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split, GridSearchCV

In [12]:
def predict_demand(data):

    series = data.Demand

    window_size = 10
    n_steps = 5

    ## Function to create features and labels
    def create_multi_step_features(data, window_size, n_steps):
        X, y = [], []
        for i in range(len(data) - window_size - n_steps):
            X.append(data[i:i + window_size])
            y.append(data[i + window_size:i + window_size + n_steps])
        return np.array(X), np.array(y)

    # Create the features and labels
    X, y = create_multi_step_features(series, window_size, n_steps)

    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    param_grid = {
        'n_estimators': [50, 100, 150],
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [4, 6, 8],
        'subsample': [0.8, 1.0],
        'colsample_bytree': [0.8, 1.0]
    }

    print("Demand Forecast XGBoost results:")

    best_xgb_models = []
    for step in range(n_steps):
        xgb_model = XGBRegressor(objective='reg:squarederror')
        grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, 
                                   scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)
        
        grid_search.fit(X_train, y_train[:, step])

        best_xgb_model = grid_search.best_estimator_
        best_xgb_models.append(best_xgb_model)

        print(f"Best parameters for step {step+1}: {grid_search.best_params_}")

    y_pred = []
    for best_xgb_model in best_xgb_models:
        y_pred.append(best_xgb_model.predict(X_test))

    y_pred = np.array(y_pred).T

    for step in range(n_steps):
        mse = root_mean_squared_error(y_test[:, step], y_pred[:, step])
        print(f"Mean Squared Error for step {step+1}: {mse}")

    print(" "*20)
    print("*"*20)

    return best_xgb_models

In [13]:
from joblib import dump
dump(predict_demand(data), 'XGBoost_regressor_demand.joblib')
print("Model saved successfully.")

Demand Forecast XGBoost results:
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters for step 1: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 100, 'subsample': 0.8}
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters for step 2: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 150, 'subsample': 1.0}
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters for step 3: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 150, 'subsample': 1.0}
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters for step 4: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 150, 'subsample': 0.8}
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters for step 5: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 150, 'subsample': 0.