# Forecasting With Workflow Presets

In [None]:
import logging

logging.basicConfig(level=logging.INFO, format="[%(asctime)s][%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)

In [None]:

from openstef_beam.analysis.plots import ForecastTimeSeriesPlotter
from openstef_core.datasets import ForecastDataset
from openstef_core.testing import create_synthetic_forecasting_dataset
from openstef_core.types import LeadTime, Q
from openstef_models.integrations.mlflow import MLFlowStorage
from openstef_models.presets import ForecastingWorkflowConfig, create_forecasting_workflow

In [None]:
from huggingface_hub import hf_hub_download

# Dataset repository ID
repo_id = "OpenSTEF/liander2024-energy-forecasting-benchmark"

# Directory to save files
local_dir = "./liander_dataset"

# List of files to download
files_to_download = [
    "load_measurements/mv_feeder/OS Gorredijk.parquet",
    "weather_forecasts_versioned/mv_feeder/OS Gorredijk.parquet",
    "EPEX.parquet",
    "profiles.parquet"
]

# Download each file
for filename in files_to_download:
    print(f"Downloading {filename}...")
    hf_hub_download(
        repo_id=repo_id,
        filename=filename,
        repo_type="dataset",
        local_dir=local_dir,
        local_dir_use_symlinks=False
    )
    print(f"âœ“ {filename} downloaded")

print("\nAll files downloaded successfully!")


In [None]:
from openstef_core.datasets import TimeSeriesDataset, VersionedTimeSeriesDataset


load_dataset = VersionedTimeSeriesDataset.read_parquet(
    local_dir + "/load_measurements/mv_feeder/OS Gorredijk.parquet"
)
weather_dataset = VersionedTimeSeriesDataset.read_parquet(
    local_dir + "/weather_forecasts_versioned/mv_feeder/OS Gorredijk.parquet"
)
epex_dataset = VersionedTimeSeriesDataset.read_parquet(local_dir + "/EPEX.parquet")
profiles_dataset = VersionedTimeSeriesDataset.read_parquet(local_dir + "/profiles.parquet")

dataset = VersionedTimeSeriesDataset.concat([load_dataset, weather_dataset, epex_dataset, profiles_dataset], mode="left").select_version()

dataset.data.head()

In [None]:
train_data = dataset.

In [None]:
workflow = create_forecasting_workflow(
    config=ForecastingWorkflowConfig(
        model_id="gblinear_forecaster_v1",
        model="gblinear",
        horizons=[LeadTime.from_string("PT36H")],
        quantiles=[Q(0.5), Q(0.1), Q(0.9)],
        verbosity=1,
        target_column="load",
        temperature_column="temperature_2m",
        relative_humidity_column="relative_humidity_2m",
        mlflow_storage=None,
    )
)

In [None]:
logger.info("Starting model training")
result = workflow.fit(dataset)
if result is not None:
    logger.info("Full eval result:\n%s", result.metrics_full.to_dataframe())

    if result.metrics_test is not None:
        logger.info("Test result:\n%s", result.metrics_test.to_dataframe())

In [None]:
logger.info("Starting forecasting")
forecast: ForecastDataset = workflow.predict(dataset)

print(forecast.data.tail())

In [None]:
# Plot the result
logger.info("Storing forecast plot to forecast_plot.html")
fig = (
    ForecastTimeSeriesPlotter()
    .add_measurements(measurements=dataset.select_version().data["load"])
    .add_model(model_name="gblinear", forecast=forecast.median_series, quantiles=forecast.quantiles_data)
    .plot()
)

fig.show()
