# Model Comparisson

In [25]:
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd


## First: Start the MLFlow-Server for logging the experiments

Run the following command: 

Powershell:

mlflow server ` 
    --backend-store-uri sqlite:///mlflow.db ` 
    --default-artifact-root ./mlartifacts/  


Bash / Git Bash / WSL / Linux / macOS
mlflow server \
  --backend-store-uri sqlite:///mlflow.db \
  --default-artifact-root ./mlartifacts


In [14]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")

## Load the best models (LSTM, Bayesian LSTM, Propeht) and Perfomance Metrics

In [56]:
perfomance_df = pd.DataFrame(columns=["Model", "RSME_Overall", "MAE_Overall", "RSME_h1", "RSME_h4", "RSME_h12", "RSME_h24", "RSME_48", "RSME_h96"])

In [57]:
LSTM_RUN_ID = "9de69e1bb0844ca29f3267bca6f7674e"
BAYESIAN_LSTM_RUN_ID = "2ccd3a12be4d4ec484be5af5fc8ab4e3"
PROPHET_RUN_ID = "73f7dd056c9c4b38907b476f7bf47615"

In [59]:
mlflow.set_experiment("load_forecasting_bayesian_lstm")

ranked_checkpoints = mlflow.search_logged_models(
    filter_string=f"source_run_id='{BAYESIAN_LSTM_RUN_ID}'",
    order_by=[{"field_name": "metrics.val_nll", "ascending": True}],
    output_format="list",
)

best_checkpoint = ranked_checkpoints[0]

baysian_lstm = mlflow.pytorch.load_model(best_checkpoint.model_uri) # pyright: ignore

run = mlflow.get_run(run_id=BAYESIAN_LSTM_RUN_ID)
metrics = run.data.metrics

rmse = run.data.metrics["test_rmse_unscaled"]
rmse_h1 = run.data.metrics["rmse_h1"]
rmse_h4 = run.data.metrics["rmse_h4"]
rmse_h12 = run.data.metrics["rmse_h12"]
rmse_h24 = run.data.metrics["rmse_h24"]
rmse_h48 = run.data.metrics["rmse_h48"]
rmse_h96 = run.data.metrics["rmse_h96"]
mae = run.data.metrics["test_mae_unscaled"]

perfomance_df.loc[len(perfomance_df)] = ["bayesian_lstm", rmse, mae, rmse_h1, rmse_h4, rmse_h12, rmse_h24, rmse_h48, rmse_h96]

Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

In [61]:
mlflow.set_experiment("load_forecasting_vanilla_lstm")

ranked_checkpoints = mlflow.search_logged_models(
    filter_string=f"source_run_id='{LSTM_RUN_ID}'",
    order_by=[{"field_name": "metrics.val_mse", "ascending": True}],
    output_format="list",
)

best_checkpoint = ranked_checkpoints[0]

lstm = mlflow.pytorch.load_model(best_checkpoint.model_uri) # pyright: ignore

run = mlflow.get_run(run_id=LSTM_RUN_ID)
metrics = run.data.metrics

rmse = run.data.metrics["test_rmse_unscaled"]
rmse_h1 = run.data.metrics["rmse_h1"]
rmse_h4 = run.data.metrics["rmse_h4"]
rmse_h12 = run.data.metrics["rmse_h12"]
rmse_h24 = run.data.metrics["rmse_h24"]
rmse_h48 = run.data.metrics["rmse_h48"]
rmse_h96 = run.data.metrics["rmse_h96"]
mae = run.data.metrics["test_mae_unscaled"]

perfomance_df.loc[len(perfomance_df)] = ["lstm", rmse, mae, rmse_h1, rmse_h4, rmse_h12, rmse_h24, rmse_h48, rmse_h96]


Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

In [62]:
mlflow.set_experiment("load_forecasting_prophet")

ranked_checkpoints = mlflow.search_logged_models(
    filter_string=f"source_run_id='{PROPHET_RUN_ID}'",
    order_by=[{"field_name": "metrics.val_mse", "ascending": True}],
    output_format="list",
)

best_checkpoint = ranked_checkpoints[0]

prophet_model = mlflow.prophet.load_model(best_checkpoint.model_uri) # pyright: ignore

mlflow.get_run(run_id=PROPHET_RUN_ID)

run = mlflow.get_run(run_id=PROPHET_RUN_ID)
metrics = run.data.metrics

rmse = run.data.metrics["rmse"]
rmse_h1 = run.data.metrics["rmse_h1"]
rmse_h4 = run.data.metrics["rmse_h4"]
rmse_h12 = run.data.metrics["rmse_h12"]
rmse_h24 = run.data.metrics["rmse_h24"]
rmse_h48 = run.data.metrics["rmse_h48"]
rmse_h96 = run.data.metrics["rmse_h96"]
mae = run.data.metrics["mae"]

perfomance_df.loc[len(perfomance_df)] = ["prophet", rmse, mae, rmse_h1, rmse_h4, rmse_h12, rmse_h24, rmse_h48, rmse_h96]


In [63]:
perfomance_df

Unnamed: 0,Model,RSME_Overall,MAE_Overall,RSME_h1,RSME_h4,RSME_h12,RSME_h24,RSME_48,RSME_h96
0,bayesian_lstm,353.585541,250.20549,70.742035,151.687988,288.078064,363.448212,379.72995,378.285156
1,lstm,341.0896,218.389709,117.053764,175.866043,276.798492,325.508362,368.340179,377.863098
2,prophet,521.325481,418.771196,521.288097,521.292154,521.277565,521.352052,521.396901,521.368254


## Load the run error data

## Comparisson Plots

### RSME, MAE and Horizon-Error-Profile

## Sample predictions

In [None]:
# Load the test data Normal Weekday, Holiday, Weekend
# 