In [None]:
! pip install openstef==3.4.72 jupyter==1.0



In Google Colab, the numpy version has to be set to 1.26.4 and the pandas version has to be set to 1.5.3 due to compatability reasons. 

In [None]:
from IPython import get_ipython

# Check if running in Google Colab.
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

# Workshop part 3 | Learn how to perform a backtest
In the third part of this workshop, we will perform a backtest for the same location as the first two parts.

The learning points are:
- What a backtest is and how it works on a high level;
- Hands on experience with evaluating a model using a backtest;
- Being able to understand the results of a backtest.

A backtest is the evaluation of the model on historical data. Essentially, it is a way of testing how OpenSTEF would have performed if it had been used in the past. 

In [13]:
import pandas as pd 
import openstef
from openstef.data_classes.model_specifications import ModelSpecificationDataClass
from openstef.data_classes.prediction_job import PredictionJobDataClass 
from openstef.pipeline.train_create_forecast_backtest import train_model_and_forecast_back_test
import openstef.metrics.metrics as openstef_metrics

# Set plotly as the default pandas plotting backend.
pd.options.plotting.backend = 'plotly'

## Define the prediction job
The same as in workshop parts 1 and 2, a prediction job has to be defined. As we are making a backtest for same location, we can use the exact same prediction job. 

In [14]:
# Define properties of training/prediction. We call this a 'prediction_job'. The same is used as in the first exercise.
pj = dict(id=288,
        model='xgb', 
        quantiles=[0.10,0.30,0.50,0.70,0.90],
        forecast_type="demand", 
        lat=52.0,
        lon=5.0,
        horizon_minutes=15,
        resolution_minutes=15,
        name="workshop_exercise_3",
        save_train_forecasts=True,
       )

pj=PredictionJobDataClass(**pj)
modelspecs = ModelSpecificationDataClass(id=pj['id'])

If you are working with Google Collab, just upload the data in the 'Files' section on Google Collab. You can find this at the left toolbar, the fifth item from the top.

In [15]:
if IN_COLAB:
    input_data=pd.read_csv("/content/input_data_sun_heavy.csv", index_col=0, parse_dates=True)
else:
    input_data=pd.read_csv("../data/input_data_sun_heavy.csv", index_col=0, parse_dates=True)

## Perform the backtest
The prediction job and input data have been provided above, so now a backtest can be performed. 

As you can see, one of the inputs of the pipeline is 'training_horizons', which is set to 0.25 and 47.0. This means that the backtest is made predicting both 0.25 hours (which is 15 minutes) and 47 hours into the future. 

Exercise: 
- How many pipelines do you need to train a model and make a backtest? 

In [16]:
n_folds=1

forecast, model, train_data, validation_data, test_data = train_model_and_forecast_back_test(
    pj,
    modelspecs = modelspecs,
    input_data = input_data,
    training_horizons=[0.25, 47.0],
    n_folds=n_folds,
 )

[2m2025-04-16 15:07:48[0m [[32m[1minfo     [0m] [1mFound 22 values of constant load (repeated values), converted to NaN value.[0m [36mcleansing_step[0m=[35mrepeated_values[0m [36mfrac_values[0m=[35m0.0006278359635855141[0m [36mnum_values[0m=[35m22[0m [36mpj_id[0m=[35m288[0m
[2m2025-04-16 15:07:48[0m [[32m[1minfo     [0m] [1mRemoved 22 NaN values         [0m [36mnum_removed_values[0m=[35m22[0m
[0]	validation_0-rmse:1.49706	validation_1-rmse:1.49304
[1]	validation_0-rmse:1.17673	validation_1-rmse:1.17598
[2]	validation_0-rmse:0.96552	validation_1-rmse:0.98380
[3]	validation_0-rmse:0.82959	validation_1-rmse:0.85964
[4]	validation_0-rmse:0.74240	validation_1-rmse:0.79956
[5]	validation_0-rmse:0.68852	validation_1-rmse:0.76081
[6]	validation_0-rmse:0.64753	validation_1-rmse:0.74064
[7]	validation_0-rmse:0.61690	validation_1-rmse:0.72323
[8]	validation_0-rmse:0.59580	validation_1-rmse:0.71132
[9]	validation_0-rmse:0.57680	validation_1-rmse:0.70236
[10]	valid

## Evaluate the results 
Below, the results from the backtest are plotted. With these plots, answer the questions of the exercise below. 

Exercise: answer the following questions: 
- When is the model uncertain? Why? 
- What difference do you see between the horizons? 

Bonus: look at the differences between the two time horizons using metrics. You can use the build-in metrics package of OpenSTEF. See the documentation website [here](https://openstef.github.io/openstef/openstef.metrics.html) . For example, look at the differnce in mean absolute error under the plot below. 


In [17]:
for horizon in set(forecast.horizon):
    fig = forecast.loc[forecast.horizon==horizon,['quantile_P10','quantile_P30',
                    'quantile_P50','quantile_P70','quantile_P90','realised','forecast']].plot(
                                                                                   title=f"Horizon: {horizon}")
    fig.update_traces(
         line=dict(color="green", width=1), fill='tonexty', fillcolor='rgba(0, 255, 0, 0.1)',
         selector=lambda x: 'quantile' in x.name and x.name != 'quantile_P10')
    fig.update_traces(
         line=dict(color="green", width=1),
         selector=lambda x: 'quantile_P10' == x.name)
    fig.update_traces(
         line=dict(color="red", width=2),
         selector=lambda x: 'realised' in x.name)
    fig.update_traces(
         line=dict(color="blue", width=2),
         selector=lambda x: 'forecast' in x.name)
    fig.show()

In [18]:
for horizon in set(forecast.horizon):
     mean_absolute_error=openstef_metrics.mae(forecast.loc[forecast.horizon==horizon, 'realised'], forecast.loc[forecast.horizon==horizon, 'forecast'])
     print(horizon, mean_absolute_error)


0.25 0.2088470445691373
47.0 0.3733396021995016
