# Imports

In [2]:
!pip install optuna
!pip install darts

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.8-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

In [3]:
import numpy as numpy
import pandas as pd

from darts import TimeSeries
from darts.models import TransformerModel
from darts.metrics import mae, mape

#from powderalert.ml_logic.data import fetch_weather_data

import optuna
import torch
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



# Improving Model Performance | Target = Snowfall

## Getting Started | Import basic dataset & preprocess

In [None]:
# for future reference - get data directly from api?
# fetch_weather_data()

In [5]:
# in the meantime use preprocessed csv (refer to notebook_Anita-Gei_get_preprocessed_data.ipynb for more info)

df = pd.read_csv('/content/historical_data_preprocessed.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.drop(columns='Unnamed: 0')
df = df.set_index('date')
df.tail(2)

Unnamed: 0_level_0,snowfall,weather_code_encoded,temperature_2m,relative_humidity_2m,dew_point_2m,precipitation,rain,snow_depth,pressure_msl,surface_pressure,...,wind_direction_10m,wind_direction_100m,wind_gusts_10m,sunshine_duration,hour_sin,hour_cos,day_of_week_sin,day_of_week_cos,month_sin,month_cos
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 22:00:00,0.0,3.0,-1.911309,0.072451,-1.882762,-0.389763,-0.272938,1.403548,-0.129623,-1.404447,...,-0.41365,-0.063053,-0.09727,-0.565147,-0.5,0.866025,0.0,1.0,0.0,1.0
2024-01-01 23:00:00,0.0,1.0,-1.875245,-0.040464,-1.882762,-0.389763,-0.272938,1.403548,-0.280686,-1.495166,...,-0.427446,-0.142219,-0.044957,-0.565147,-0.258819,0.965926,0.0,1.0,0.0,1.0


In [6]:
print(df.shape)

(131496, 28)


## Feature Engineering

In [7]:
# Create lag features for snowfall and precipitation
lags = [1, 3, 6, 12]
for lag in lags:
    df[f'snowfall_lag_{lag}'] = df['snowfall'].shift(lag)
    df[f'precipitation_lag_{lag}'] = df['precipitation'].shift(lag)

df.tail(2)

Unnamed: 0_level_0,snowfall,weather_code_encoded,temperature_2m,relative_humidity_2m,dew_point_2m,precipitation,rain,snow_depth,pressure_msl,surface_pressure,...,month_sin,month_cos,snowfall_lag_1,precipitation_lag_1,snowfall_lag_3,precipitation_lag_3,snowfall_lag_6,precipitation_lag_6,snowfall_lag_12,precipitation_lag_12
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 22:00:00,0.0,3.0,-1.911309,0.072451,-1.882762,-0.389763,-0.272938,1.403548,-0.129623,-1.404447,...,0.0,1.0,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763
2024-01-01 23:00:00,0.0,1.0,-1.875245,-0.040464,-1.882762,-0.389763,-0.272938,1.403548,-0.280686,-1.495166,...,0.0,1.0,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763


In [8]:
# Add rolling statistics
df['cloud_cover_rolling_mean_3h'] = df['cloud_cover'].rolling(window=3).mean()
df['precipitation_rolling_std_6h'] = df['precipitation'].rolling(window=6).std()
df.tail(2)

Unnamed: 0_level_0,snowfall,weather_code_encoded,temperature_2m,relative_humidity_2m,dew_point_2m,precipitation,rain,snow_depth,pressure_msl,surface_pressure,...,snowfall_lag_1,precipitation_lag_1,snowfall_lag_3,precipitation_lag_3,snowfall_lag_6,precipitation_lag_6,snowfall_lag_12,precipitation_lag_12,cloud_cover_rolling_mean_3h,precipitation_rolling_std_6h
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 22:00:00,0.0,3.0,-1.911309,0.072451,-1.882762,-0.389763,-0.272938,1.403548,-0.129623,-1.404447,...,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763,0.182911,0.0
2024-01-01 23:00:00,0.0,1.0,-1.875245,-0.040464,-1.882762,-0.389763,-0.272938,1.403548,-0.280686,-1.495166,...,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763,0.3093,0.0


In [9]:
# Drop rows with NaN values (introduced by lags and rolling)
df = df.dropna()
print(df.shape)

(131484, 38)


## Use Subset of Data while finding the "best" Model!

In [10]:
train_df = df[-30000:]
print(train_df.shape)
train_df.tail(2)

(30000, 38)


Unnamed: 0_level_0,snowfall,weather_code_encoded,temperature_2m,relative_humidity_2m,dew_point_2m,precipitation,rain,snow_depth,pressure_msl,surface_pressure,...,snowfall_lag_1,precipitation_lag_1,snowfall_lag_3,precipitation_lag_3,snowfall_lag_6,precipitation_lag_6,snowfall_lag_12,precipitation_lag_12,cloud_cover_rolling_mean_3h,precipitation_rolling_std_6h
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 22:00:00,0.0,3.0,-1.911309,0.072451,-1.882762,-0.389763,-0.272938,1.403548,-0.129623,-1.404447,...,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763,0.182911,0.0
2024-01-01 23:00:00,0.0,1.0,-1.875245,-0.040464,-1.882762,-0.389763,-0.272938,1.403548,-0.280686,-1.495166,...,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763,0.0,-0.389763,0.3093,0.0


## Create Time Series

In [11]:
feature_columns = train_df.drop(columns=['snowfall']).columns.tolist()

snowfall_series = TimeSeries.from_dataframe(train_df, value_cols='snowfall')
feature_series = TimeSeries.from_dataframe(train_df, value_cols=feature_columns)

## Split into Train, Validation and Test Sets

In [12]:
y_train_val, y_test = snowfall_series.split_before(0.8)
y_train, y_val = y_train_val.split_before(0.8)

X_train_val, X_test = feature_series.split_before(0.8)
X_train, X_val = X_train_val.split_before(0.8)

## Optimize Hyperparamters with Optuna

In [13]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [23]:
def objective(trial):
    d_model = trial.suggest_categorical("d_model", [128, 256, 512])
    nhead = trial.suggest_categorical("nhead", [4, 8])
    dropout = trial.suggest_categorical("dropout", [0.1, 0.2, 0.3])
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])
    num_encoder_layers = trial.suggest_categorical("num_encoder_layers", [2, 3, 6])
    num_decoder_layers = trial.suggest_categorical("num_decoder_layers", [2, 3, 6])
    dim_feedforward = trial.suggest_categorical("dim_feedforward", [256, 512, 1024])
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)

    es = EarlyStopping(
      monitor="val_loss",
      patience=5,
      min_delta=0.05,
      mode='min',
    )

    model = TransformerModel(
        input_chunk_length=48,
        output_chunk_length=48,
        batch_size=batch_size,
        n_epochs=100,
        model_name="transformer_snowfall_optuna",
        d_model=d_model,
        nhead=nhead,
        dim_feedforward=dim_feedforward,
        num_encoder_layers=num_encoder_layers,
        num_decoder_layers=num_decoder_layers,
        dropout=dropout,
        optimizer_kwargs={"lr": lr},
        pl_trainer_kwargs={
            "accelerator": "gpu",
            "devices": -1,
            "callbacks": [es]},
        random_state=42
    )

    for step in range(100):
        model.fit(
          series=y_train,
          past_covariates=X_train,
          val_series=y_val,
          val_past_covariates=X_val,
          verbose=False
        )

        intermediate_value = mae(y_test[:48], model.predict(n=48, past_covariates=X_train))

        trial.report(intermediate_value, step)

        if trial.should_prune():
            raise optuna.TrialPruned()


    predictions = model.predict(n=48, past_covariates=X_train)
    return mae(y_test[:48], predictions)

In [None]:
# Run Optuna optimization
pruner = optuna.pruners.MedianPruner()

study = optuna.create_study(direction="minimize", pruner=pruner)
study.optimize(objective, n_trials=5)

[I 2024-12-17 10:49:16,420] A new study created in memory with name: no-name-3a6413d6-540b-4dca-93eb-f798d43c7f6c
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

  return np.nanmean(
  vals = component_reduction(vals, axis=COMP_AX)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


In [None]:
# Print best parameters
print(study.best_value)
print(f"Best trial: {study.best_trial.params}")

### Alternatively Grid Search?

In [26]:
es = EarlyStopping(
  monitor="val_loss",
  patience=5,
  min_delta=0.05,
  mode='min',
)

trf_model = TransformerModel(
        input_chunk_length=48,
        output_chunk_length=48,
        batch_size=32,
        n_epochs=100,
        model_name="transformer_snowfall_grid",
        d_model=64,
        nhead=3,
        dim_feedforward=2,
        num_encoder_layers=2,
        num_decoder_layers=2,
        dropout=0.01,
        pl_trainer_kwargs={
            "accelerator": "gpu",
            "devices": -1,
            "callbacks": [es]},
        random_state=42
    )

param_grid = {
    "d_model": [128, 256, 512],
    "nhead": [4, 8],
    "num_encoder_layers": [2, 4, 6],
    "num_decoder_layers": [2, 4, 6],
    "dim_feedforward": [256, 512, 1024],
    "dropout": [0.1, 0.2],
    "activation": ["relu", "gelu"]
}

from itertools import product
import numpy as np

param_combinations = list(product(*param_grid.values()))

best_mae = float('inf')
best_params = {}

for params in param_combinations:
    d_model, nhead, num_encoder, num_decoder, dim_ff, dropout, activation = params
    model = TransformerModel(
        input_chunk_length=48,
        output_chunk_length=48,
        d_model=d_model,
        nhead=nhead,
        num_encoder_layers=num_encoder,
        num_decoder_layers=num_decoder,
        dim_feedforward=dim_ff,
        dropout=dropout,
        activation=activation
    )

    trf_model.fit(
          series=y_train,
          past_covariates=X_train,
          val_series=y_val,
          val_past_covariates=X_val,
          verbose=False
        )

    forecast = trf_model.historical_forecasts(
        X_val,
        forecast_horizon=48,
        stride=1,
        retrain=False
    )
    current_mae = mae(y_val, forecast).mean().item()

    if current_mae < best_mae:
        best_mae = mae
        best_params = {
            "d_model": d_model,
            "nhead": nhead,
            "num_encoder_layers": num_encoder,
            "num_decoder_layers": num_decoder,
            "dim_feedforward": dim_ff,
            "dropout": dropout,
            "activation": activation
        }

print(f"Best Hyperparameters: {best_params}")
print(f"Best MAE: {best_mae}")

AssertionError: embed_dim must be divisible by num_heads

## Initialize Model with optimized Parameters

In [None]:
#fill after completing optuna study!
model = TransformerModel(
    input_chunk_length=48,
    output_chunk_length=48,
    batch_size=32,
    n_epochs=20,
    model_name="snowfall_model",
    nr_epochs_val_period=1,
    d_model=64,
    nhead=4,
    num_encoder_layers=2,
    num_decoder_layers=2,
    dropout=0.1,
    random_state=42,
    likelihood=None  # Regression task
)


## Fit Model

In [None]:
model.fit(
    series=y_train,
    past_covariates=X_train,
    val_series=y_val,
    val_past_covariates=X_val,
    verbose=True
)

## Make Predictions

In [None]:
forecast = model.predict(n=48, past_covariates=X_test)