# Packages

In [63]:
# Data wrangling
import pandas as pd
import polars as pl
import polars.selectors as cs
import numpy as np

# Visualisation
import plotnine as pn
import matplotlib.pyplot as plt
from mizani.formatters import comma_format, custom_format, currency_format, percent_format
from IPython.display import clear_output, display
import matplotlib.font_manager as fm
import matplotlib as mpl
from matplotlib import rc
import plotly.express as px

# Utils
import os
from tqdm.notebook import tqdm
import itertools
import yaml
import warnings
import time
import holidays
import pickle
import datetime

# Modelling
from sklearn.linear_model import Lasso
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import (
    StandardScaler,
    OneHotEncoder,
    FunctionTransformer,
)
from sklearn.metrics import (
    r2_score,
    mean_absolute_error,
    mean_absolute_percentage_error,
    root_mean_squared_error,
)
from sklearn.pipeline import FeatureUnion, make_pipeline, Pipeline
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.impute import SimpleImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, KNNImputer
from sklearn.feature_selection import VarianceThreshold

import ray
from ray import train, tune
from ray.tune.search.optuna import OptunaSearch
from ray.tune.schedulers import ASHAScheduler


import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from keras.callbacks import ReduceLROnPlateau
from keras.layers import ReLU, LeakyReLU

tf.keras.utils.set_random_seed(1)

rc('text', usetex=False)

jama_colour = [
    "#374e55",
    "#df8f44",
    "#00a1d5",
    "#b24745",
    "#79af97",
    "#6a6599",
    "#80796b",
]

pd.set_option("display.max.columns", 500)
pd.set_option("display.max.columns", 500)


theme_academic = pn.theme(
    text=pn.element_text(family="Latin Modern Roman"),
    plot_title=pn.element_text(weight="bold", size=14, ha="center"),
    legend_text=pn.element_text(size=9),  # Smaller font for legend items
    panel_background=pn.element_rect(fill="white"),  # Clean white background
    panel_border=pn.element_rect(color="grey", size=0.5),
    axis_ticks=pn.element_line(color="grey"),
    panel_grid_major=pn.element_line(color="grey", size=0.1, alpha=0.3),
    panel_grid_minor=pn.element_line(color="grey", size=0.1, alpha=0.3),
    legend_background=pn.element_rect(fill="white", color=None),
    legend_key=pn.element_rect(fill="white", color=None),
    plot_margin=0.02,
    figure_size=(6, 4),  # Set default figure size (width, height in inches)
)

%matplotlib inline

# Loading the data

In [64]:
df = pl.read_csv(
    "../0_data/preprocessed/df_final_reduced.csv", try_parse_dates=True
).filter(pl.col("datetime") >= pd.Timestamp("2021-09-01 00:00"))

df.head()

datetime,kWh,Zurich_shortwave_radiation,Zurich_soil_temperature_7_to_28cm
datetime[μs],f64,f64,f64
2021-09-01 00:00:00,5163300.0,0.0,16.042
2021-09-01 01:00:00,5077700.0,0.0,15.892
2021-09-01 02:00:00,4931500.0,0.0,16.042
2021-09-01 03:00:00,4787800.0,0.0,15.942
2021-09-01 04:00:00,4703800.0,0.0,15.792001


In [65]:
df.shape

(26304, 4)

# Missing values

Just forward fill for now.

In [66]:
df = df.fill_null(strategy="forward")

# Ensure 1 hour gaps

- and availability of full days (to walk in 24 hour steps)

In [67]:
(df["datetime"] - df["datetime"].shift(1)).value_counts()

datetime,count
duration[μs],u32
,1
1h,26303


In [68]:
df["datetime"].min()

datetime.datetime(2021, 9, 1, 0, 0)

In [69]:
df["datetime"].max()

datetime.datetime(2024, 8, 31, 23, 0)

# Calendar Features

In [70]:
df = df.with_columns(
    day_of_month=pl.col("datetime").dt.day(),
    day_of_year=pl.col("datetime").dt.ordinal_day(),
    day_of_week=pl.col("datetime").dt.weekday(),
    month=pl.col("datetime").dt.month(),
    hour=pl.col("datetime").dt.hour(),
    year=pl.col("datetime").dt.year(),
)

# Holidays

In [71]:
# Define the region (Canton of Berne) and the country (Switzerland)
country = "CH"
prov = "ZH"

# Create a list of the regional holidays for the canton of Berne
regional_holidays = holidays.CH(
    years=df["datetime"].dt.year().unique().to_list(), prov=prov
)

holiday_df = pl.DataFrame(
    {
        "holiday_name": list(regional_holidays.values()),
        "holiday_date": list(regional_holidays.keys()),
    }
).sort("holiday_date")

In [72]:
import polars as pl

# Define holiday names
holiday_names = [
    # Osterferienzeit (Easter Holiday Season)
    "Osterferienzeit_1",
    "Osterferienzeit_2",
    "Osterferienzeit_3",
    "Osterferienzeit_4",
    "Osterferienzeit_5",
    # Auffahrtferienzeit (Ascension Holiday Season)
    "Auffahrtferienzeit_1",
    "Auffahrtferienzeit_2",
    "Auffahrtferienzeit_3",
    "Auffahrtferienzeit_4",
    "Auffahrtferienzeit_5",
]

# Repeat holiday names for each year
holiday_names_full = holiday_names * 4

# Define holiday dates
holiday_dates = [
    # 2021
    "2021-03-31",
    "2021-04-01",
    "2021-04-02",
    "2021-04-03",
    "2021-04-04",
    "2021-05-12",
    "2021-05-13",
    "2021-05-14",
    "2021-05-15",
    "2021-05-16",
    # 2022
    "2022-04-13",
    "2022-04-14",
    "2022-04-15",
    "2022-04-16",
    "2022-04-17",
    "2022-05-25",
    "2022-05-26",
    "2022-05-27",
    "2022-05-28",
    "2022-05-29",
    # 2023
    "2023-04-05",
    "2023-04-06",
    "2023-04-07",
    "2023-04-08",
    "2023-04-09",
    "2023-05-17",
    "2023-05-18",
    "2023-05-19",
    "2023-05-20",
    "2023-05-21",
    # 2024
    "2024-03-27",
    "2024-03-28",
    "2024-03-29",
    "2024-03-30",
    "2024-03-31",
    "2024-05-08",
    "2024-05-09",
    "2024-05-10",
    "2024-05-11",
    "2024-05-12",
]

# Create the DataFrame
holiday_manual_df = pl.DataFrame(
    {
        "holiday_name": holiday_names_full,
        "holiday_date": holiday_dates,
    }
).with_columns(pl.col("holiday_date").str.to_date())

In [73]:
holiday_df = (
    pl.concat(
        [
            holiday_df,
            holiday_manual_df,
        ],
        how="vertical",
    )
    .sort("holiday_date")
    .unique("holiday_date")
)

In [74]:
holiday_df

holiday_name,holiday_date
str,date
"""Neujahrestag""",2021-01-01
"""Berchtoldstag""",2021-01-02
"""Osterferienzeit_1""",2021-03-31
"""Osterferienzeit_2""",2021-04-01
"""Karfreitag""",2021-04-02
…,…
"""Auffahrtferienzeit_5""",2024-05-12
"""Pfingstmontag""",2024-05-20
"""Nationalfeiertag""",2024-08-01
"""Weihnachten""",2024-12-25


In [75]:
df = (
    df.with_columns(date=pl.col("datetime").dt.date())
    .join(holiday_df, how="left", left_on="date", right_on="holiday_date")
    .drop("date")
    .with_columns(holiday_name=pl.col("holiday_name").fill_null("no_holiday"))
)

df.head()

datetime,kWh,Zurich_shortwave_radiation,Zurich_soil_temperature_7_to_28cm,day_of_month,day_of_year,day_of_week,month,hour,year,holiday_name
datetime[μs],f64,f64,f64,i8,i16,i8,i8,i8,i32,str
2021-09-01 00:00:00,5163300.0,0.0,16.042,1,244,3,9,0,2021,"""no_holiday"""
2021-09-01 01:00:00,5077700.0,0.0,15.892,1,244,3,9,1,2021,"""no_holiday"""
2021-09-01 02:00:00,4931500.0,0.0,16.042,1,244,3,9,2,2021,"""no_holiday"""
2021-09-01 03:00:00,4787800.0,0.0,15.942,1,244,3,9,3,2021,"""no_holiday"""
2021-09-01 04:00:00,4703800.0,0.0,15.792001,1,244,3,9,4,2021,"""no_holiday"""


# Cyclical Encoding

In [76]:
def sin_transformer(period):
    return FunctionTransformer(lambda x: np.sin(x / period * 2 * np.pi))


def cos_transformer(period):
    return FunctionTransformer(lambda x: np.cos(x / period * 2 * np.pi))


def encode_cyclically(column_name, periodicity, table):
    # Create sin and cos encoding
    table = table.with_columns(
        sin_transformer(periodicity)
        .fit_transform(table[column_name])
        .alias(f"{column_name}_sin")
    )

    table = table.with_columns(
        cos_transformer(periodicity)
        .fit_transform(table[column_name])
        .alias(f"{column_name}_cos")
    )
    # Drop the old column
    table = table.drop(column_name)

    return table

In [77]:
# Dictionary with column name and calendar periodicity
calendar_features = {
    "day_of_month": 31,
    "day_of_year": 365,
    # "day_of_week": 7,
    # "month": 12,
    # "hour": 24,
}

for column_name, periodicity in calendar_features.items():
    df = encode_cyclically(column_name, periodicity, df)

# sklearn Pipeline

In [78]:
cat_cols = ["holiday_name", "day_of_week", "month", "hour"]

num_cols = df.select(
    cs.contains(
        "soil_temperature_10_to_28cm",
        "shortwave_radiation",
    )
).columns + ["year"]

manual_cols = df.select(pl.selectors.contains("_cos", "_sin", "is_")).columns

In [79]:
df.drop(manual_cols + cat_cols + num_cols)

datetime,kWh,Zurich_soil_temperature_7_to_28cm
datetime[μs],f64,f64
2021-09-01 00:00:00,5.1633e6,16.042
2021-09-01 01:00:00,5.0777e6,15.892
2021-09-01 02:00:00,4.9315e6,16.042
2021-09-01 03:00:00,4.7878e6,15.942
2021-09-01 04:00:00,4.7038e6,15.792001
…,…,…
2024-08-31 19:00:00,5.3614e6,22.942
2024-08-31 20:00:00,5.3775e6,23.042
2024-08-31 21:00:00,5.2130e6,23.092
2024-08-31 22:00:00,5.0767e6,23.092


In [80]:
numeric_transformer = Pipeline(
    steps=[("imputer", SimpleImputer(strategy="mean")), ("scaler", StandardScaler())]
)

In [81]:
categorical_transformer = Pipeline(
    steps=[
        (
            "encoder",
            OneHotEncoder(sparse_output=False, handle_unknown="ignore"),
        ),
    ]
)

In [82]:
column_transformer = ColumnTransformer(
    transformers=[
        ("numeric", numeric_transformer, num_cols),
        ("categorical", categorical_transformer, cat_cols),
    ],
    remainder="passthrough",
)

In [83]:
preprocessor = Pipeline(
    steps=[
        ("column_transformer", column_transformer),
        (
            "variance_threshold",
            VarianceThreshold(threshold=0.0),
        ),  # Drops constant columns after transformations
    ]
)

# Wide Data Format

## Splits

Start by preprocessing the data in hourly frequency:

In [84]:
df_train = df.filter(
    (pl.col("datetime") >= pl.datetime(2021, 9, 1, 0))
    & (pl.col("datetime") <= pl.datetime(2022, 8, 31, 23))
).to_pandas()

df_val = df.filter(
    (pl.col("datetime") >= pl.datetime(2022, 9, 1, 0))
    & (pl.col("datetime") <= pl.datetime(2023, 8, 31, 23))
).to_pandas()

In [85]:
X_train = df_train.drop(columns=["datetime", "kWh"])
X_val = df_val.drop(columns=["datetime", "kWh"])

y_train = df_train["kWh"]
y_val = df_val["kWh"]

In [86]:
fitted_preprocessor = preprocessor.fit(X_train)

X_train_preprocessed = pd.DataFrame(
    fitted_preprocessor.transform(X_train),
    columns=fitted_preprocessor.get_feature_names_out(),
)

X_val_preprocessed = pd.DataFrame(
    fitted_preprocessor.transform(X_val),
    columns=fitted_preprocessor.get_feature_names_out(),
)

In [87]:
df_train_preprocessed = pd.concat(
    [df_train.filter(["datetime", "kWh"]), X_train_preprocessed], axis=1
)

df_val_preprocessed = pd.concat(
    [df_val.filter(["datetime", "kWh"]), X_val_preprocessed], axis=1
)

In [88]:
df_trainval = pd.concat(
    [df_train_preprocessed, df_val_preprocessed], axis=0
).reset_index(drop=True)


df_trainval.shape

(17520, 71)

In [89]:
(df_trainval["datetime"] - df_trainval["datetime"].shift(1)).value_counts()

datetime
0 days 01:00:00    17519
Name: count, dtype: int64

## Exogenous Variables

Define parameters:
- h: prediction horizon, here 24 (how far should be predicted into the future at once)
- l: lookback parameter (how many past observations should be considered), here 168 (one week)
    - Y lags
    - X lags
- step_size: How many hours to skip before the next transformation window (for daily format step_size = 24)

Start with exogenous variables due to edge case:
- at the start of the dataframe, there are no past observations to consider (need to be dropped)
- simplifying assumption: l can only be a multiple of h, so there doesn't need to be rounding applied to come back to the proper setup of predicting at midnight for the next day

In [90]:
def aggregate_df(df, train_size, val_size, l=168, h=24, step_size=24, verbose=False):
    assert (
        l % h == 0
    ), f"Warning: 'l' ({l}) is not a multiple of 'h' ({h}). The aggregation may not work as expected."

    # Remove observations from the top of the training dataframe:
    # 1) Adjust train size for edge case of no historical data at start of data frame
    # 1) Adjust for edge case of not mismatch in horizon and step size in training data
    # Return the train size minus l and the waste cut off at the top to give back the
    # transformed arrays in the right sizes (don't need that for validation set)
    train_origins = np.arange(l, train_size - h, step_size)
    train_tail_waste = train_size - (train_origins[-1] + h)
    if train_tail_waste % h == 0:
        train_tail_waste = 0
    df = df.iloc[train_tail_waste:]
    train_size = train_size - train_tail_waste

    # Remove observations from the bottom of the validation dataframe
    # 1) Adjust for edge case of no mismatch in horizon and step size in validation data
    val_origins = np.arange(0, val_size - h, step_size)
    val_tail_waste = val_size - (val_origins[-1] + h)
    if val_tail_waste % h == 0:
        val_tail_waste = 0
    else:
        df = df.iloc[:-val_tail_waste]

    # Get number of exogenous variables
    d = len(df.drop(columns=["datetime", "kWh"]).columns)

    # Get the origins on which the data will be transformed
    # These points start at l and move forward at step_size
    slice_points = np.arange(l, df.shape[0] - h + step_size, step_size)

    y_list = []
    X_list = []

    pbar = tqdm(slice_points, desc="Processing slices") if verbose else slice_points
    for i, origin in enumerate(pbar):
        # Create splits based on origin
        past_range = df.iloc[origin - l : origin]
        future_range = df.iloc[origin : origin + h]

        # Extract chunks of information
        y_future = future_range["kWh"].to_numpy()
        y_lags = past_range["kWh"].to_numpy()

        X_future = future_range.drop(columns=["datetime", "kWh"]).to_numpy().flatten()
        X_lags = past_range.drop(columns=["datetime", "kWh"]).to_numpy().flatten()

        # Append the information to the existing arrays
        # OMIT the historic X values
        y_list.append(y_future)
        X_list.append(
            np.hstack(
                [
                    # X_lags,
                    X_future,
                    y_lags,
                ]
            )
        )

    y = np.vstack(y_list)
    X = np.vstack(X_list)

    # Recreate splits
    if train_size > 0:
        X_train = X[: int((train_size - l) / step_size)]
        y_train = y[: int((train_size - l) / step_size)]
    else:
        X_train = None
        y_train = None

    # Handle validation size being zero
    if val_size > 0:
        X_val = X[int((train_size - l) / step_size) :]
        y_val = y[int((train_size - l) / step_size) :]
    else:
        X_val = None
        y_val = None

    return (X_train, X_val, y_train, y_val)

In [91]:
X_train, X_val, y_train, y_val = aggregate_df(
    # df=df_trainval,
    # train_size=df_train_preprocessed.shape[0],
    # val_size=df_val_preprocessed.shape[0],
    df=df_trainval,
    train_size=df_train_preprocessed.shape[0],
    val_size=df_val_preprocessed.shape[0],
    l=168,
    h=24,
    step_size=1,
    verbose=True,
)

Processing slices:   0%|          | 0/17327 [00:00<?, ?it/s]

# Hyperparameter Tuning

## Manual test

In [30]:
def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

In [136]:
# Define the learning rate schedule
initial_learning_rate = 0.001
# lr_schedule = ExponentialDecay(
#     initial_learning_rate=initial_learning_rate,
#     decay_steps=100,
#     decay_rate=0.98,
#     staircase=True,
# )
lr_schedule = ReduceLROnPlateau(monitor="val_loss", factor=0.8, patience=10, verbose=1)


# Define the model
model = Sequential(
    [
        Input(shape=(X_train.shape[1],)),
        Dense(32, activation="relu"),
        Dense(32, activation="relu"),
        Dense(32, activation="relu"),
        Dense(32, activation="relu"),
        Dense(32, activation="relu"),
        Dense(24),
    ]
)

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=initial_learning_rate),
    loss=rmse,
    metrics=[RootMeanSquaredError()],
)

# Set up Tensorboard
log_dir = "logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# Set up callbacks
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
early_stopping = EarlyStopping(
    monitor="val_loss", patience=100, restore_best_weights=True
)

# Train the model
history = model.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[tensorboard_callback, early_stopping, lr_schedule],
    verbose=1,
)

Epoch 1/100
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 2762593.2500 - root_mean_squared_error: 3258909.2500 - val_loss: 756387.1875 - val_root_mean_squared_error: 776163.2500 - learning_rate: 0.0010
Epoch 2/100
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 801086.1875 - root_mean_squared_error: 802124.3750 - val_loss: 684420.6875 - val_root_mean_squared_error: 703572.4375 - learning_rate: 0.0010
Epoch 3/100
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 708752.0625 - root_mean_squared_error: 710414.0000 - val_loss: 584830.5625 - val_root_mean_squared_error: 602688.1875 - learning_rate: 0.0010
Epoch 4/100
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 611704.4375 - root_mean_squared_error: 613513.6250 - val_loss: 521462.4375 - val_root_mean_squared_error: 541090.6250 - learning_rate: 0.0010
Epoch 5/100
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━

Due to step_size = 1 for training procedure but step_size = 24 for inference (want to make predictions in the day-ahead), now need to redo the preprocessing outside of training again:

In [None]:
(
    X_train_inference_format,
    X_val_inference_format,
    y_train_inference_format,
    y_val_inference_format,
) = aggregate_df(
    df=df_trainval,
    train_size=df_train_preprocessed.shape[0],
    val_size=df_val_preprocessed.shape[0],
    l=168,
    h=24,
    step_size=24,
    verbose=True,
)

Processing slices:   0%|          | 0/723 [00:00<?, ?it/s]

In [142]:
val_preds = pd.DataFrame(
    {
        "datetime": df_val_preprocessed["datetime"],
        "pred": model.predict(X_val_inference_format).flatten(),
        "kWh": y_val_inference_format.flatten(),
    }
)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [143]:
loss = root_mean_squared_error(y_pred=val_preds["pred"], y_true=val_preds["kWh"])
loss

324290.50401064643

In [144]:
# Create the figure with step lines for both actual and predicted values
fig = px.line(
    val_preds,
    x="datetime",
    y=["kWh", "pred"],
    labels={
        "datetime": "Date",
        "value": "Energy Consumption (kWh)",
        "variable": "Series",
    },
    title="Validation Fit",
    line_shape="hv",  # Set line shape to horizontal-vertical for step chart
)

# Customize the layout
fig.update_layout(
    template="plotly_white",
    legend=dict(title=""),
    xaxis_title="Date",
    yaxis_title="Energy Consumption (kWh)",
)

# Show the figure
fig.show()

In [155]:
# Need to account for the edge case of missing historical data at the start:
# Remove the first l observations from the datetime col that haven't been used
train_preds = pd.DataFrame(
    {
        "datetime": df_train_preprocessed["datetime"].iloc[
            df_train_preprocessed.shape[0] - X_train_inference_format.shape[0] * 24 :
        ],
        "pred": model.predict(X_train_inference_format).flatten(),
        "kWh": y_train_inference_format.flatten(),
    }
)


# Create the figure with step lines for both actual and predicted values
fig = px.line(
    train_preds,
    x="datetime",
    y=["kWh", "pred"],
    labels={
        "datetime": "Date",
        "value": "Energy Consumption (kWh)",
        "variable": "Series",
    },
    title="Training Fit",
    line_shape="hv",  # Set line shape to horizontal-vertical for step chart
)

# Customize the layout
fig.update_layout(
    template="plotly_white",
    legend=dict(title=""),
    xaxis_title="Date",
    yaxis_title="Energy Consumption (kWh)",
)

# Show the figure
fig.show()

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


## Optuna Tuning Loop

In [31]:
def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

In [32]:
import optuna

In [33]:
def objective(trial):
    # Sample hyperparameters from Optuna
    initial_learning_rate = trial.suggest_float(
        "initial_learning_rate", 1e-6, 1e-2, log=True
    )
    decay_patience = trial.suggest_int("decay_patience", 10, 100)
    decay_rate = trial.suggest_float("decay_rate", 0.5, 0.99)
    num_layers = trial.suggest_int("num_layers", 1, 6)
    units_per_layer = trial.suggest_categorical(
        "units_per_layer", [16, 32, 64, 128, 256, 512]
    )
    activation = "ReLU"
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32, 64, 128, 512])

    # Define the model
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],)))

    if activation == "ReLU":
        for _ in range(num_layers):
            model.add(Dense(units_per_layer))
            model.add(ReLU())
    elif activation == "LeakyReLU":
        for _ in range(num_layers):
            model.add(Dense(units_per_layer))
            model.add(LeakyReLU())

    model.add(Dense(24))  # Assuming `h=24`

    # Compile the model
    model.compile(
        optimizer=Adam(learning_rate=initial_learning_rate),
        loss=rmse,  # Replace with your RMSE function
        metrics=[RootMeanSquaredError()],
    )

    # Early stopping
    early_stopping = EarlyStopping(
        monitor="val_loss",
        patience=200,
        restore_best_weights=True,
    )

    # Learning rate scheduler
    lr_schedule = ReduceLROnPlateau(
        monitor="val_loss",
        factor=decay_rate,
        patience=decay_patience,
        verbose=0,
    )

    # Train the model
    history = model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        epochs=2000,
        batch_size=batch_size,
        callbacks=[early_stopping, lr_schedule],
        verbose=0,
    )

    # Evaluate the model and return the loss
    val_loss = sum(history.history["val_loss"][-200:]) / len(
        history.history["val_loss"][-200:]
    )
    return val_loss

In [34]:
study = optuna.create_study(direction="minimize", study_name="DNN Optimization")
study.optimize(objective, timeout=60 * 60 * 9)

print("Best parameters:", study.best_params)
print("Best validation loss:", study.best_value)

[I 2024-12-08 00:14:29,882] A new study created in memory with name: DNN Optimization
[I 2024-12-08 00:58:04,331] Trial 0 finished with value: 240809.039296875 and parameters: {'initial_learning_rate': 2.584741175122496e-06, 'decay_patience': 74, 'decay_rate': 0.5829588526583609, 'num_layers': 4, 'units_per_layer': 128, 'batch_size': 16}. Best is trial 0 with value: 240809.039296875.
[I 2024-12-08 01:03:32,021] Trial 1 finished with value: 346122.85328125 and parameters: {'initial_learning_rate': 0.0021572592532991667, 'decay_patience': 80, 'decay_rate': 0.6723866423025997, 'num_layers': 1, 'units_per_layer': 16, 'batch_size': 128}. Best is trial 0 with value: 240809.039296875.
[I 2024-12-08 01:26:24,281] Trial 2 finished with value: 244547.05734375 and parameters: {'initial_learning_rate': 0.0007420357381853224, 'decay_patience': 67, 'decay_rate': 0.6640021990050368, 'num_layers': 5, 'units_per_layer': 128, 'batch_size': 8}. Best is trial 0 with value: 240809.039296875.
[I 2024-12-08 

Best parameters: {'initial_learning_rate': 0.0005584628412102691, 'decay_patience': 40, 'decay_rate': 0.5432043003870151, 'num_layers': 4, 'units_per_layer': 128, 'batch_size': 32}
Best validation loss: 223016.325859375


In [None]:
study.trials_dataframe().to_csv("10_ANN_trials.csv", index=False)

# Validation Performance of the Best Model

In [36]:
trials_df = pl.read_csv("10_ANN_trials.csv")
trials_df.shape

(30, 12)

In [None]:
selected_trial = trials_df.sort("value").head(1)
best_hyperparams_df = (
    selected_trial.select(pl.selectors.contains("params_"))
    .unpivot()
    .with_columns(pl.col("variable").str.replace("params_", ""))
)
# Make dictionary (all strings)
best_hyperparams = dict(
    zip(best_hyperparams_df["variable"], best_hyperparams_df["value"])
)


# Convert strings to correct data types


converted_params = {
    "initial_learning_rate": float(best_hyperparams["initial_learning_rate"]),
    "decay_rate": float(best_hyperparams["decay_rate"]),
    "decay_patience": int(best_hyperparams["decay_patience"]),
    "num_layers": int(best_hyperparams["num_layers"]),
    "units_per_layer": int(best_hyperparams["units_per_layer"]),
    "activation": "ReLU",
    "batch_size": int(best_hyperparams["batch_size"]),
    "epochs": 2000,
    "h": 24,
    "patience": 200,
}



converted_params

{'initial_learning_rate': 0.0005584628412102691,
 'decay_rate': 0.5432043003870151,
 'decay_patience': 40,
 'num_layers': 4,
 'units_per_layer': 128,
 'activation': 'ReLU',
 'batch_size': 32,
 'epochs': 2000,
 'h': 24,
 'patience': 200}

In [48]:
selected_trial["value"]

value
f64
223016.325859


In [49]:
# Build the model dynamically based on num_layers and units_per_layer
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))

if converted_params["activation"] == "ReLU":
    for _ in range(converted_params["num_layers"]):
        model.add(Dense(converted_params["units_per_layer"]))
        model.add(ReLU())
elif converted_params["activation"] == "LeakyReLU":
    for _ in range(converted_params["num_layers"]):
        model.add(Dense(converted_params["units_per_layer"]))
        model.add(LeakyReLU())

model.add(Dense(converted_params["h"]))

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=converted_params["initial_learning_rate"]),
    # loss=rmse,
    loss=rmse,
    metrics=[RootMeanSquaredError()],
)

# Early stopping
early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=converted_params["patience"],
    restore_best_weights=True,
)

# Learning Rate scheduler
lr_schedule = ReduceLROnPlateau(
    monitor="val_loss",
    factor=converted_params["decay_rate"],
    patience=converted_params["decay_patience"],
    verbose=0,
)

# Set up Tensorboard
log_dir = "logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train the model
history = model.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=converted_params["epochs"],
    batch_size=converted_params["batch_size"],
    callbacks=[early_stopping, tensorboard_callback, lr_schedule],
    verbose=0,
)

Due to step_size = 1 for training procedure but step_size = 24 for inference (want to make predictions in the day-ahead), now need to redo the preprocessing outside of training again:

In [50]:
(
    X_train_inference_format,
    X_val_inference_format,
    y_train_inference_format,
    y_val_inference_format,
) = aggregate_df(
    df=df_trainval,
    train_size=df_train_preprocessed.shape[0],
    val_size=df_val_preprocessed.shape[0],
    l=168,
    h=24,
    step_size=24,
    verbose=True,
)

Processing slices:   0%|          | 0/723 [00:00<?, ?it/s]

In [51]:
val_preds = pd.DataFrame(
    {
        "datetime": df_val_preprocessed["datetime"],
        "pred": model.predict(X_val_inference_format).flatten(),
        "kWh": y_val_inference_format.flatten(),
    }
)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [52]:
loss = root_mean_squared_error(y_pred=val_preds["pred"], y_true=val_preds["kWh"])
loss

265040.9776693537

In [53]:
# Create the figure with step lines for both actual and predicted values
fig = px.line(
    val_preds,
    x="datetime",
    y=["kWh", "pred"],
    labels={
        "datetime": "Date",
        "value": "Energy Consumption (kWh)",
        "variable": "Series",
    },
    title="Validation Fit",
    line_shape="hv",  # Set line shape to horizontal-vertical for step chart
)

# Customize the layout
fig.update_layout(
    template="plotly_white",
    legend=dict(title=""),
    xaxis_title="Date",
    yaxis_title="Energy Consumption (kWh)",
)

# Show the figure
fig.show()

In [54]:
# Need to account for the edge case of missing historical data at the start:
# Remove the first l observations from the datetime col that haven't been used
train_preds = pd.DataFrame(
    {
        "datetime": df_train_preprocessed["datetime"].iloc[
            df_train_preprocessed.shape[0] - X_train_inference_format.shape[0] * 24 :
        ],
        "pred": model.predict(X_train_inference_format).flatten(),
        "kWh": y_train_inference_format.flatten(),
    }
)


# Create the figure with step lines for both actual and predicted values
fig = px.line(
    train_preds,
    x="datetime",
    y=["kWh", "pred"],
    labels={
        "datetime": "Date",
        "value": "Energy Consumption (kWh)",
        "variable": "Series",
    },
    title="Training Fit",
    line_shape="hv",  # Set line shape to horizontal-vertical for step chart
)

# Customize the layout
fig.update_layout(
    template="plotly_white",
    legend=dict(title=""),
    xaxis_title="Date",
    yaxis_title="Energy Consumption (kWh)",
)

# Show the figure
fig.show()

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


# Model Evaluation without Retraining

Evaluate on the holdout period by retraining with the  best_hyperparams chosen from tuning on validation set.

- Train once: on the entire training + validation period, but WITHOUT early stopping, pick the exact number of epochs to train for and train on the full training data set
    - Restore the best weights on the training set? Or just go with it

## Splits

Start by preprocessing the data in hourly frequency:

In [92]:
df_train = df.filter(
    (pl.col("datetime") >= pl.datetime(2021, 9, 1, 0))
    & (pl.col("datetime") <= pl.datetime(2023, 8, 31, 23))
).to_pandas()

df_test = df.filter(pl.col("datetime") >= pl.datetime(2023, 9, 1, 0)).to_pandas()

In [93]:
X_train = df_train.drop(columns=["datetime", "kWh"])
X_test = df_test.drop(columns=["datetime", "kWh"])

y_train = df_train["kWh"]
y_test = df_test["kWh"]

In [94]:
fitted_preprocessor = preprocessor.fit(X_train)

X_train_preprocessed = pd.DataFrame(
    fitted_preprocessor.transform(X_train),
    columns=fitted_preprocessor.get_feature_names_out(),
)

X_test_preprocessed = pd.DataFrame(
    fitted_preprocessor.transform(X_test),
    columns=fitted_preprocessor.get_feature_names_out(),
)

In [95]:
df_train_preprocessed = pd.concat(
    [df_train.filter(["datetime", "kWh"]), X_train_preprocessed], axis=1
)

df_test_preprocessed = pd.concat(
    [df_test.filter(["datetime", "kWh"]), X_test_preprocessed], axis=1
)

In [96]:
df_traintest = pd.concat(
    [df_train_preprocessed, df_test_preprocessed], axis=0
).reset_index(drop=True)

df_traintest.shape

(26304, 71)

In [97]:
(df_traintest["datetime"] - df_traintest["datetime"].shift(1)).value_counts()

datetime
0 days 01:00:00    26303
Name: count, dtype: int64

## Aggregate

In [98]:
X_train, X_test, y_train, y_test = aggregate_df(
    df=df_traintest,
    train_size=df_train_preprocessed.shape[0],
    val_size=df_test_preprocessed.shape[0],
    l=168,
    h=24,
    step_size=1,
    verbose=True,
)

Processing slices:   0%|          | 0/26111 [00:00<?, ?it/s]

## Fit

Here, I can't use early stopping as it would be considered data leakage. Need to blindly train on the training set (train + val) for the number of epochs that seems appropriate from the validation fit.

max_epochs: Check the shape of the validation fit (train/val curves) and decide number of epochs to train for based on overfit.

In [99]:
max_epochs = 200
# max_epochs = int(len(history.history["val_loss"]) - converted_params["patience"] * 2)
max_epochs

200

In [100]:
# Build the model dynamically based on num_layers and units_per_layer
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))

if converted_params["activation"] == "ReLU":
    for _ in range(converted_params["num_layers"]):
        model.add(Dense(converted_params["units_per_layer"]))
        model.add(ReLU())
elif converted_params["activation"] == "LeakyReLU":
    for _ in range(converted_params["num_layers"]):
        model.add(Dense(converted_params["units_per_layer"]))
        model.add(LeakyReLU())

model.add(Dense(converted_params["h"]))

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=converted_params["initial_learning_rate"]),
    # loss=rmse,
    loss=rmse,
    metrics=[RootMeanSquaredError()],
)

# Learning Rate scheduler
lr_schedule = ReduceLROnPlateau(
    monitor="loss",
    factor=converted_params["decay_rate"],
    patience=converted_params["decay_patience"],
    verbose=0,
)

# Set up Tensorboard
log_dir = "logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train the model
history = model.fit(
    X_train,
    y_train,
    # validation_data=(None, None),
    epochs=max_epochs,
    batch_size=converted_params["batch_size"],
    callbacks=[tensorboard_callback, lr_schedule],
    verbose=0,
)

Due to step_size = 1 for training procedure but step_size = 24 for inference (want to make predictions in the day-ahead), now need to redo the preprocessing outside of training again:

In [101]:
(
    X_train_inference_format,
    X_test_inference_format,
    y_train_inference_format,
    y_test_inference_format,
) = aggregate_df(
    df=df_traintest,
    train_size=df_train_preprocessed.shape[0],
    val_size=df_test_preprocessed.shape[0],
    l=168,
    h=24,
    step_size=24,
    verbose=True,
)

Processing slices:   0%|          | 0/1089 [00:00<?, ?it/s]

In [102]:
holdout_preds = pd.DataFrame(
    {
        "datetime": df_test_preprocessed["datetime"],
        "pred": model.predict(X_test_inference_format).flatten(),
        "kWh": y_test_inference_format.flatten(),
    }
)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [103]:
loss = root_mean_squared_error(
    y_pred=holdout_preds["pred"], y_true=holdout_preds["kWh"]
)
loss

325299.8391794384

In [104]:
# Create the figure with step lines for both actual and predicted values
fig = px.line(
    holdout_preds,
    x="datetime",
    y=["kWh", "pred"],
    labels={
        "datetime": "Date",
        "value": "Energy Consumption (kWh)",
        "variable": "Series",
    },
    title="Holdout Fit without Retraining",
    line_shape="hv",  # Set line shape to horizontal-vertical for step chart
)

# Customize the layout
fig.update_layout(
    template="plotly_white",
    legend=dict(title=""),
    xaxis_title="Date",
    yaxis_title="Energy Consumption (kWh)",
)

# Show the figure
fig.show()

In [105]:
holdout_preds.to_csv("10_ANN_holdout_predictions_no_retraining.csv", index=False)

# Model Evaluation with Retraining

Evaluate on the holdout period by retraining with the best_hyperparams chosen from tuning on validation set.

- Retrain weekly: on the entire training + validation period, but WITHOUT early stopping, pick the exact number of epochs to train for and train on the full training data set
    - Restore the best weights on the training set? Or just go with it
- Add the new week of observations in each step as training data (sliding window) and remove the oldest week

In [106]:
X_traintest = np.vstack([X_train, X_test])
y_traintest = np.vstack([y_train, y_test])

In [107]:
# Daily frequency data: Retrain each week while retaining the old data
max_history = 365 * 2
retrain_freq = 7

retrain_points = np.arange(
    X_train.shape[0],
    X_traintest.shape[0],
    retrain_freq,
)

retrain_points

array([17351, 17358, 17365, ..., 26094, 26101, 26108])

In [108]:
# The year is not a perfect 52 weeks, 2 days are extra, need to
# be careful with the slicers build in a safety
X_traintest.shape[0] - retrain_points[-1]

3

In [48]:
holdout_pred_dfs = []

# Split point denotes where df is broken into history (2 years max)
# and test period (1 week, then move one week ahead and retrain)
for split_point in tqdm(retrain_points):

    # Get the training and test period for the current "viewpoint week"
    X_train_sliced = X_traintest[np.amax([0, split_point - max_history]) : split_point]
    X_test_sliced = X_traintest[
        split_point : np.amin([split_point + retrain_freq, X_traintest.shape[0]])
    ]

    y_train_sliced = y_traintest[np.amax([0, split_point - max_history]) : split_point]
    y_test_sliced = y_traintest[
        split_point : np.amin([split_point + retrain_freq, X_traintest.shape[0]])
    ]

    # Build the model dynamically based on num_layers and units_per_layer
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],)))

    if converted_params["activation"] == "ReLU":
        for _ in range(converted_params["num_layers"]):
            model.add(Dense(converted_params["units_per_layer"]))
            model.add(ReLU())
    elif converted_params["activation"] == "LeakyReLU":
        for _ in range(converted_params["num_layers"]):
            model.add(Dense(converted_params["units_per_layer"]))
            model.add(LeakyReLU())

    model.add(Dense(converted_params["h"]))

    # Compile the model
    model.compile(
        optimizer=Adam(learning_rate=converted_params["initial_learning_rate"]),
        # loss=rmse,
        loss=rmse,
        metrics=[RootMeanSquaredError()],
    )

    # Learning Rate scheduler
    lr_schedule = ReduceLROnPlateau(
        monitor="loss",
        factor=converted_params["decay_rate"],
        patience=converted_params["decay_patience"],
        verbose=0,
    )

    # Train the model
    history = model.fit(
        X_train_sliced,
        y_train_sliced,
        # validation_data=(None, None),
        epochs=max_epochs,
        batch_size=converted_params["batch_size"],
        callbacks=[lr_schedule],
        verbose=0,
    )

    # Append results
    y_preds_holdout = pd.DataFrame(
        {
            "pred": model.predict(X_test_sliced).flatten(),
            "kWh": y_test_sliced.flatten(),
        }
    )
    holdout_pred_dfs.append(y_preds_holdout)

  0%|          | 0/53 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47

In [49]:
y_preds_holdout = (
    pd.concat(holdout_pred_dfs, axis=0)
    .reset_index(drop=True)
    .assign(datetime=lambda x: df_test["datetime"])
)

y_preds_holdout.head()

Unnamed: 0,pred,kWh,datetime
0,4948940.5,4952079.0,2023-09-01 00:00:00
1,4820018.5,4789383.0,2023-09-01 01:00:00
2,4756715.5,4689283.0,2023-09-01 02:00:00
3,4591802.0,4490485.0,2023-09-01 03:00:00
4,4422430.0,4345052.0,2023-09-01 04:00:00


In [None]:
y_preds_holdout.to_csv("10_ANN_holdout_predictions_with_retraining.csv", index=False)

In [51]:
loss = root_mean_squared_error(
    y_pred=y_preds_holdout["pred"], y_true=y_preds_holdout["kWh"]
)
loss

313062.02554887487

In [52]:
fig = px.line(
    y_preds_holdout,
    x="datetime",
    y=["pred", "kWh"],
    labels={
        "datetime": "Date",
        "value": "Energy Consumption (kWh)",
        "variable": "Series",
    },
    title="Holdout Fit",
    line_shape="hv",  # Set line shape to horizontal-vertical for step chart
)

fig.update_layout(
    template="plotly_white",
    legend=dict(title=""),
    xaxis_title="Date",
    yaxis_title="Energy Consumption (kWh)",
)

fig.show()