# Example to train a model
Using the openstf tasks

In [None]:
import pandas as pd
import IPython
from openstef.pipeline.train_model import train_model_pipeline
from openstef.pipeline.create_forecast import create_forecast_pipeline
from openstef.metrics import metrics
from openstef.data_classes.prediction_job import PredictionJobDataClass

In [None]:
# define properties of training/prediction. We call this a 'prediction_job'
pj=dict(id=287,
        model='xgb',
        quantiles=[0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95],
        name="backtest",
        forecast_type="demand",
        lat=52.0,
        lon=5.0,
        horizon_minutes=47*60,
        description="description",
        resolution_minutes=15,
        hyper_params={}, # Note, this should become optional
        feature_names=None, # Note, this should become optional
       )

pj  = PredictionJobDataClass(**pj)

# Load input data
input_data = pd.read_csv('data/get_model_input_pid_287.csv', index_col='index', parse_dates=True)

# Split in training and forecasting data
train_data = input_data.iloc[:-200,:] # everything except last 200 rows (~ 48 hours)
to_forecast_data = input_data.iloc[:-200,:] # last 200 rows

In [None]:
train_data.head()

In [None]:
to_forecast_data.head()

# Train a model
Train a model using the high-level pipeline. Store the model and reports on training proces in ./trained_models

In [None]:
models = train_model_pipeline(
    pj,
    train_data,
    check_old_model_age=False,
    mlflow_tracking_uri="./trained_models",
    artifact_folder="./trained_models",
    )

You can find the trained model in ./trained_models, along with reports on the training process

In [None]:
## Inspect local files
IPython.display.HTML(f"<iframe src=./trained_models/{pj['id']}/Predictor0.25.html width=800 height=400></iframe>"
                     f"<iframe src=./trained_models/{pj['id']}/Predictor47.0.html width=800 height=400></iframe>"
                     f"<iframe src=./trained_models/{pj['id']}/weight_plot.html width=800 height=400></iframe>")

In [None]:
import numpy as np
realised = to_forecast_data['load'].copy(deep=True)
to_forecast_data['load'] = np.nan

# Make a forecast
forecast = create_forecast_pipeline(pj, to_forecast_data,    mlflow_tracking_uri="./trained_models",)

In [None]:
import cufflinks
cufflinks.go_offline()
forecast['load'] = realised
forecast[['load','forecast' ]].iplot()

In [None]:
forecast

In [None]:
default_metrics ={
    "RMSE": metrics.rmse(forecast["load"], forecast["forecast"]),
    "bias": metrics.bias(forecast["load"], forecast["forecast"]),
    "NSME": metrics.nsme(forecast["load"], forecast["forecast"]),
    "MAE": metrics.mae(forecast["load"], forecast["forecast"]),
    "rMAE": metrics.r_mae(forecast["load"], forecast["forecast"]),
    "rMAE_highest": metrics.r_mae_highest(forecast["load"], forecast["forecast"]),
    "rMNE_highest": metrics.r_mne_highest(forecast["load"], forecast["forecast"]),
    "rMPE_highest": metrics.r_mpe_highest(forecast["load"], forecast["forecast"]),
    "rMAE_lowest": metrics.r_mae_lowest(forecast["load"], forecast["forecast"]),

    "load_range": forecast["load"].max() - forecast["load"].min(),
    "frac_in_1sdev": metrics.frac_in_stdev(forecast["load"], forecast["forecast"], forecast["stdev"]),
    "frac_in_2sdev": metrics.frac_in_stdev(
        forecast["load"], forecast["forecast"], 2 * forecast["stdev"]
    ),
}
pd.DataFrame.from_dict(default_metrics, "index")            