# Train a model and make a forecast
In this example, a model is trained (the same as in example notebook 1) and thereafter a forecast is made with the trained model. Finally, the forecast is evaluated using visualisation and the build-in metrics of OpenSTEF.

In [None]:
import pandas as pd
from IPython.display import IFrame
import numpy as np
from openstef.pipeline.train_model import train_model_pipeline
from openstef.pipeline.create_forecast import create_forecast_pipeline
from openstef.metrics import metrics
from openstef.data_classes.prediction_job import PredictionJobDataClass

# Set plotly as the default pandas plotting backend
pd.options.plotting.backend = 'plotly'
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"

## Prepare for training & forecast
Before a model can be trained, the specifications and data need to be defined. The specification of the model are defined in the prediction job (pj), where for example the machine learning model, latitude, longtide and forecast horizon are specified. Furthermore, the data has to be retrieved from the csv file containing both load, weather and energy market data. 

In [None]:
# define properties of training/prediction. We call this a 'prediction_job'
pj=dict(id=287,
        model='xgb',
        quantiles=[0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95],
        name="backtest",
        forecast_type="demand",
        lat=52.0,
        lon=5.0,
        horizon_minutes=47*60,
        description="description",
        resolution_minutes=15,
        hyper_params={}, 
        feature_names=None,
       )

pj  = PredictionJobDataClass(**pj)

# Load input data
input_data = pd.read_csv('data/get_model_input_pid_287.csv', index_col='index', parse_dates=True)
# Last 200 rows are empty, remove them
input_data = input_data.iloc[:-200,:]

# Split in training and forecasting data
train_data = input_data.iloc[:-200,:] # everything except last 200 rows (~ 48 hours)
test_indices = input_data.iloc[-200:,:].index # last 200 rows

In [None]:
display(train_data.head())

## Train a model
Train a model using the high-level pipeline. Store the model and reports on training proces in ./mlflow_trained_models.

In [None]:
models = train_model_pipeline(
    pj,
    train_data,
    check_old_model_age=False,
    mlflow_tracking_uri="./mlflow_trained_models",
    artifact_folder="./mlflow_artifacts",
    )

You can find the trained model in ./mlflow_trained_models, along with reports on the training process.

In [None]:
# Inspect local files
IFrame('./mlflow_artifacts/{}/Predictor0.25.html'.format(pj['id']), width=900, height=400)
IFrame('./mlflow_artifacts/{}/Predictor47.0.html'.format(pj['id']), width=800, height=400)
IFrame('./mlflow_artifacts/{}/weight_plot.html'.format(pj['id']), width=800, height=400)


## Visual Studio Code has difficulties with displaying htmls. If you are working with VSC and are not able to inspect the plots, uncomment the code below
## to open the plots in your browser.
# import webbrowser
# webbrowser.open(r'.\mlflow_artifacts\{}\Predictor0.25.html'.format(pj['id']))
# webbrowser.open(r'.\mlflow_artifacts\{}\Predictor47.0.html'.format(pj['id']))
# webbrowser.open(r'.\mlflow_artifacts\{}\weight_plot.html'.format(pj['id']))

## Make a forecast
The ``create_forecast_pipeline`` is used to make a forecast using the model trained above.

In [None]:
# Prepare data such that a forecast can be made using the trained model. 
realised = input_data.loc[test_indices, 'load'].copy(deep=True)
to_forecast_data = input_data.copy(deep=True)
to_forecast_data.loc[test_indices, 'load'] = np.nan

# Make a forecast
forecast = create_forecast_pipeline(pj, to_forecast_data, mlflow_tracking_uri="./mlflow_trained_models",)

# Add realised to forecast
forecast['load'] = realised

## Evaluate the results 
Below, the results of the forecast can be evaluated by means of a visualisation and of the build-in metrics of the OpenSTEF package.

In [None]:
forecast[['forecast','load']].plot()

In [None]:
# Evaluate the forecast by using the inbuild metrics of OpenSTEF
default_metrics ={
    "RMSE": metrics.rmse(forecast["load"], forecast["forecast"]),
    "bias": metrics.bias(forecast["load"], forecast["forecast"]),
    "NSME": metrics.nsme(forecast["load"], forecast["forecast"]),
    "MAE": metrics.mae(forecast["load"], forecast["forecast"]),
    "rMAE": metrics.r_mae(forecast["load"], forecast["forecast"]),
    "rMAE_highest": metrics.r_mae_highest(forecast["load"], forecast["forecast"]),
    "rMNE_highest": metrics.r_mne_highest(forecast["load"], forecast["forecast"]),
    "rMPE_highest": metrics.r_mpe_highest(forecast["load"], forecast["forecast"]),
    "rMAE_lowest": metrics.r_mae_lowest(forecast["load"], forecast["forecast"]),

    "load_range": forecast["load"].max() - forecast["load"].min(),
    "frac_in_1sdev": metrics.frac_in_stdev(forecast["load"], forecast["forecast"], forecast["stdev"]),
    "frac_in_2sdev": metrics.frac_in_stdev(
        forecast["load"], forecast["forecast"], 2 * forecast["stdev"]
    ),
}
pd.DataFrame.from_dict(default_metrics, "index")            