In [None]:
! pip install openstef==3.4.72 jupyter==1.0



In Google Colab, the numpy version has to be set to 1.26.4 and the pandas version has to be set to 1.5.3 due to compatability reasons. If you get a warning in Google Colab stating that you should restart the session, you can just cancel it and resume.

In [None]:
from IPython import get_ipython

# Check if running in Google Colab.
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

if IN_COLAB:
    shell = get_ipython()
    if shell is not None:
        shell.system('pip install numpy==1.26.4 pandas==1.5.3 --force-reinstall')

# Workshop part 2 | Learn how to make a forecast
In this second part of the workshop, we will use the model trained in the first part and make a forecast with it. 

Note: if you were not able to train the model in the first part, we have trained one for you. It is in this folder: ``mlflow_trained_models``. It should automatically work in this tutorial.  

The learning points are:
- Hands on experience with using a trained model; 
- What data is required to make a forecast;
- Hands on experience using forecast pipeline;
- How the model gets automatically loaded;
- How the predictions compare to the measurements.

In [38]:
# Import required packages.
import pandas as pd 
import numpy as np 

from openstef.data_classes.prediction_job import PredictionJobDataClass
from openstef.pipeline.train_model import train_model_pipeline
from openstef.pipeline.create_forecast import create_forecast_pipeline
import openstef
# Set plotly as the default pandas plotting backend
pd.options.plotting.backend = 'plotly'

## Define the prediction job
The same as in workshop part 1, a prediction job has to be defined. As we are making a forecast for the model we trained in part 1, we can use the exact same prediction job. 

In [39]:
# Define properties of training/prediction. We call this a 'prediction_job'. The same is used as in the first exercise.
pj = dict(id=288,
        model='xgb', 
        quantiles=[0.10,0.30,0.50,0.70,0.90],
        forecast_type="demand", 
        lat=52.0,
        lon=5.0,
        horizon_minutes=2880,
        resolution_minutes=15,
        name="workshop_exercise_2",
        save_train_forecasts=True,
       )

pj=PredictionJobDataClass(**pj)

## Prepare the input data
Some other preparation of the input data is required for making a forecast. Namely, split into a test and train data set. 

Exercise: 
- Why do we split the dataset into train and test? 
- Why do we set the 'load' (the realised values) to nan (unknown) for the 'to_forecast_data'? 

If you are working with Google Colab, just upload the data in the 'Files' section on Google Colab. You can find this at the left toolbar, the fifth item from the top. 

In [40]:
if IN_COLAB:
    input_data=pd.read_csv("/content/input_data_sun_heavy.csv", index_col=0, parse_dates=True)
else:
    input_data=pd.read_csv("../data/input_data_sun_heavy.csv", index_col=0, parse_dates=True)

train_data=input_data.iloc[:-192,:] # Everything except the final 192 rows for training.
test_data=input_data.iloc[-192:,:] # Final 192 rows for testing.

In [41]:
# Prepare data to make the forecast. 
realised=input_data.loc[test_data.index, 'load'].copy(deep=True)
to_forecast_data=input_data.copy(deep=True)
to_forecast_data.loc[test_data.index, 'load']=np.nan #clear the load data for the part you want to forecast

In [None]:
# If you are working with Google Colab, storing and retrieving the model from the previous workshop is more difficult. 
# If you are working on your own device, this is not needed. As OpenSTEF is able to store and automatically retrieve your trained model. 
train_data_model, validation_data_model, test_data_model = train_model_pipeline(
    pj,
    train_data,
    check_old_model_age=False, 
    mlflow_tracking_uri="./mlflow_trained_models",
    artifact_folder="./mlflow_artifacts",
)

[2m2025-04-16 15:16:59[0m [[32m[1minfo     [0m] [1mModel successfully loaded with MLflow[0m
[2m2025-04-16 15:16:59[0m [[32m[1minfo     [0m] [1mFound 22 values of constant load (repeated values), converted to NaN value.[0m [36mcleansing_step[0m=[35mrepeated_values[0m [36mfrac_values[0m=[35m0.0006312950156388993[0m [36mnum_values[0m=[35m22[0m [36mpj_id[0m=[35m288[0m
[2m2025-04-16 15:16:59[0m [[32m[1minfo     [0m] [1mRemoved 22 NaN values         [0m [36mnum_removed_values[0m=[35m22[0m
[0]	validation_0-rmse:1.45413	validation_1-rmse:1.39974
[1]	validation_0-rmse:1.13780	validation_1-rmse:1.11427
[2]	validation_0-rmse:0.93262	validation_1-rmse:0.93406
[3]	validation_0-rmse:0.79670	validation_1-rmse:0.82264
[4]	validation_0-rmse:0.71373	validation_1-rmse:0.75898
[5]	validation_0-rmse:0.65689	validation_1-rmse:0.71956
[6]	validation_0-rmse:0.61923	validation_1-rmse:0.69671
[7]	validation_0-rmse:0.59182	validation_1-rmse:0.68115
[8]	validation_0-rmse:0

## Make the prediction
Now that the prediction job has been defined, a model has been trained and the input data is prepared, a forecast can be made. 

Exercise: 
- What input do you need to make a forecast?
- How long did it take to make a forecast?

Bonus: look-up the correct pipeline on the OpenSTEF [website](https://openstef.github.io/openstef/user_guides.html).




In [43]:
# Location where the model was stored in the last exercise.
mlflow_tracking_uri="./mlflow_trained_models" 

forecast=create_forecast_pipeline(
    pj,
    to_forecast_data, 
    mlflow_tracking_uri,
)

[2m2025-04-16 15:17:22[0m [[32m[1minfo     [0m] [1mModel successfully loaded with MLflow[0m
[2m2025-04-16 15:17:22[0m [[32m[1minfo     [0m] [1mFound 214 values of constant load (repeated values), converted to NaN value.[0m [36mcleansing_step[0m=[35mrepeated_values[0m [36mfrac_values[0m=[35m0.0061071316457863645[0m [36mnum_values[0m=[35m214[0m [36mpj_id[0m=[35m288[0m
[2m2025-04-16 15:17:23[0m [[32m[1minfo     [0m] [1mPostproces in preparation of storing[0m



overflow encountered in exp



# Inspect the results
Now that the forecast has been made, the results can be analysed. 

Exercise: answer the following questions 
- Look at the results, when is the model accurate and when is it less accurate? Why?
- Look at the two weather features plotted, do you see correlation? 

In [44]:
display(forecast.head())

Unnamed: 0,forecast,tAhead,stdev,quantile_P10,quantile_P30,quantile_P50,quantile_P70,quantile_P90,pid,customer,description,type,algtype
2023-12-30 00:15:00+00:00,2.489165,-11365.0,0.070266,2.399115,2.452318,2.489165,2.526013,2.579215,288,workshop_exercise_2,,demand,/Users/marnix.van.lieshout/Code/STEF/OpenSTEF/...
2023-12-30 00:30:00+00:00,2.122453,-11364.75,0.070266,2.032403,2.085605,2.122453,2.159301,2.212503,288,workshop_exercise_2,,demand,/Users/marnix.van.lieshout/Code/STEF/OpenSTEF/...
2023-12-30 00:45:00+00:00,2.12613,-11364.5,0.070266,2.03608,2.089282,2.12613,2.162977,2.21618,288,workshop_exercise_2,,demand,/Users/marnix.van.lieshout/Code/STEF/OpenSTEF/...
2023-12-30 01:00:00+00:00,2.037521,-11364.25,0.067613,1.950871,2.002064,2.037521,2.072977,2.12417,288,workshop_exercise_2,,demand,/Users/marnix.van.lieshout/Code/STEF/OpenSTEF/...
2023-12-30 01:15:00+00:00,2.037521,-11364.0,0.067613,1.950871,2.002064,2.037521,2.072977,2.12417,288,workshop_exercise_2,,demand,/Users/marnix.van.lieshout/Code/STEF/OpenSTEF/...


In [45]:
fig_forecast_realised=pd.concat([forecast["forecast"], realised], axis=1).plot()
fig_forecast_realised.update_layout(
    xaxis_title='Timestamp',
    yaxis_title="Load [MW]"
)
display(fig_forecast_realised.show())

None

In [46]:
# Look at the normalized plots of both the radiation and forecast, do you recognize any paterns?

fig_forecast_radiation=pd.concat(
    [
        test_data["radiation"]/max(test_data["radiation"]),
        forecast["forecast"]/max(forecast["forecast"])
    ], axis=1).plot()
fig_forecast_radiation.update_layout(
    xaxis_title='Timestamp',
    yaxis_title="Normalized values"
)
display(fig_forecast_radiation.show())

None

In [47]:
fig_forecast_windspeed=pd.concat(
    [
        test_data["windspeed"]/max(test_data["windspeed"]),
        forecast["forecast"]/max(forecast["forecast"])
    ], axis=1).plot()
fig_forecast_windspeed.update_layout(
    xaxis_title='Timestamp',
    yaxis_title="Normalized values"
)
fig_forecast_windspeed.show()

## Alter the input data 
In the code below, the radiation input data is divided by ten and thereafter a forecast is made with this new input data. The prediction job and trained model stay the same. Thus, the same model is used with half the sunshine as input.

Exercise: answer the following question: 
- What happens to the forecast when the radiation is divided by ten? Why? 


In [48]:
# Divide the radiation data by two.
to_forecast_data_rad=to_forecast_data.copy()
to_forecast_data_rad['radiation']=0.1*(to_forecast_data['radiation'])

# Make a forecast with this new input data.
mlflow_tracking_uri=r"./mlflow_trained_models" 

forecast_rad=create_forecast_pipeline(
    pj,
    to_forecast_data_rad, 
    mlflow_tracking_uri,
)

[2m2025-04-16 15:17:23[0m [[32m[1minfo     [0m] [1mModel successfully loaded with MLflow[0m
[2m2025-04-16 15:17:23[0m [[32m[1minfo     [0m] [1mFound 214 values of constant load (repeated values), converted to NaN value.[0m [36mcleansing_step[0m=[35mrepeated_values[0m [36mfrac_values[0m=[35m0.0061071316457863645[0m [36mnum_values[0m=[35m214[0m [36mpj_id[0m=[35m288[0m
[2m2025-04-16 15:17:25[0m [[32m[1minfo     [0m] [1mPostproces in preparation of storing[0m



overflow encountered in exp



In [49]:
# Inspect the results.
radiation_forecast_comparison = pd.DataFrame(
    test_data["radiation"]/max(test_data["radiation"])
)

radiation_forecast_comparison["forecast_with_full_radiation"] = forecast["forecast"]/max(forecast["forecast"])
radiation_forecast_comparison["forecast_with_half_radiation"] = forecast_rad["forecast"]/max(forecast_rad["forecast"])

fig_radiation_forecast_comparison=radiation_forecast_comparison.plot()

fig_radiation_forecast_comparison.update_layout(
    xaxis_title='Timestamp',
    yaxis_title="Normalized values"
)
display(fig_radiation_forecast_comparison.show())

None

## Bonus: Dashboard
Did you know that OpenSTEF has an eloborate dashboard which shows you everything you want to know about your forecast? Check it the dashboard documentation [here](https://raw.githack.com/OpenSTEF/.github/main/profile/html/openstef_dashboard_doc.html) . 

Which different in- and output components do you see in this dashboard? 
