In [3]:
%reload_ext autoreload
%autoreload 2   

import sys
sys.path.append('../artifactory/')

In [11]:
import torch
import pickle
import warnings
import numpy as np
from lightning.pytorch import Trainer
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor
from torch.utils.data import DataLoader
from pathlib import Path
from itertools import repeat
from artifact import Saw
from data import ArtifactDataset, CachedArtifactDataset
from detector import WindowTransformerDetector
from utilities import parameters_k
import wandb
from datetime import datetime
import pytz
from azureml.core import ScriptRunConfig, Workspace, Experiment, Environment

# stop warnings
torch.set_float32_matmul_precision("high")
warnings.filterwarnings("ignore", ".*does not have many workers.*")

In [5]:
import azureml
print("SDK version:", azureml.core.VERSION)

SDK version: 1.54.0


In [8]:
ws = Workspace.from_config()

Performing interactive authentication. Please follow the instructions on the terminal.


To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code LEFBX4ACQ to authenticate.
The following tenants don't contain accessible subscriptions. Use 'az login --allow-no-subscriptions' to have tenant level access.
20d9a90b-518d-4726-8ece-bb30a12e0f7a 'ELIA GROUP'


Interactive authentication successfully completed.


In [10]:
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

m3-mlops-mlw-dev
m3-mlops-dev
westeurope
8de3e85d-b97f-48c1-a25b-5bddf9dc484c


In [None]:
myenv = Environment.get(workspace=ws, name="AzureML-Minimal")

experiment_name = "Artifactory_train"
experiment = Experiment(workspace=ws, name=experiment_name)

In [12]:
import mlflow
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

In [None]:
src = ScriptRunConfig(source_directory="",
                      script='train.py',
                      compute_target="local",
                      environment=myenv)

# # Set compute target
# # Skip this if you are running on your local computer
# script_run_config.run_config.target = my_compute_target

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [4]:
# Storing hyperparameters as a dictionary, because we can directly log this config dict to W&B.
CONFIG = dict(
    # width of window
    width = 512,
    convolution_features=[256, 128, 64, 32],
    convolution_width=[5, 9, 17, 33],
    convolution_dropout=0.0,
    transformer_heads=2,
    transformer_feedforward=128,
    transformer_layers=2,
    transformer_dropout=0,
    loss="mask",
    loss_boost_fp=0,
    
    artifact=Saw(min_width=4, max_width=32),
    # Optimizer Parameter

    # LearningRate Scheduler
    
    # parameters for study
    batch_size = 32, # 'values': [32, 64, 128]
    
    wandb_group_name = "test_setup",
    wandb_project_name = "artifactory"
)

All settings.

In [5]:
# model
model = WindowTransformerDetector(window=CONFIG["width"],                    
                                  convolution_features=CONFIG["convolution_features"],
                                  convolution_width=CONFIG["convolution_width"],
                                  convolution_dropout=CONFIG["convolution_dropout"],
                                  transformer_heads=CONFIG["transformer_heads"],
                                  transformer_feedforward=CONFIG["transformer_feedforward"],
                                  transformer_layers=CONFIG["transformer_layers"],
                                  transformer_dropout=CONFIG["transformer_dropout"],
                                  loss=CONFIG["loss"],
                                  loss_boost_fp=CONFIG["loss_boost_fp"])
# model = ConvolutionDetector(convolution_features=[128, 64, 32],
#                             convolution_width=[5, 9, 33],
#                             convolution_dilation=[1, 1, 1],
#                             convolution_dropout=0.0,
#                             convolution_activation="sigmoid")
model_name = f"{model.__class__.__name__}_{parameters_k(model)}_{datetime.now(pytz.timezone('Europe/Amsterdam')).strftime('%d-%m-%Y_%H:%M:%S')}"
CONFIG['wandb_run_name'] = model_name

val_file = Path(f"../data/validation{CONFIG['width']}.all.pkl")
val_datasets = [
    #"australian_electricity_demand_dataset",
    #"electricity_hourly_dataset",
    #"electricity_load_diagrams",
    #"HouseholdPowerConsumption1",
    #"HouseholdPowerConsumption2",
    #"london_smart_meters_dataset_without_missing_values",
    "solar_10_minutes_dataset",
    #"wind_farms_minutely_dataset_without_missing_values",
]
train_datasets = [
    #"australian_electricity_demand_dataset",
    #"electricity_hourly_dataset",
    #"electricity_load_diagrams",
    #"HouseholdPowerConsumption1",
    #"HouseholdPowerConsumption2",
    #"london_smart_meters_dataset_without_missing_values",
    "solar_10_minutes_dataset",
    #"wind_farms_minutely_dataset_without_missing_values",
]
print(model_name)

WindowTransformerDetector_528.96K_19-12-2023_14:49:17


Loading data.

In [6]:
def load_series(names: list[str], split: str):
    series = list()
    counts = list()
    for name in names:
        with open(f"../data/processed/{name}_{split}.pickle", "rb") as f:
            raw = [a for a in pickle.load(f) if len(a) > CONFIG["width"]]
            series.extend(np.array(a).astype(np.float32) for a in raw)
            counts.extend(repeat(1 / len(raw), len(raw)))
    counts = np.array(counts)
    return series, counts / counts.sum()

In [7]:
# train
train_data, train_weights = load_series(train_datasets, "TRAIN")
train_dataset = ArtifactDataset(train_data,
                                width=CONFIG["width"],
                                padding=64,
                                artifact=CONFIG["artifact"],
                                weight=train_weights) 
train_loader = DataLoader(train_dataset, batch_size=CONFIG["batch_size"])

In [8]:
# validation
if not val_file.exists():
    val_data, val_weights = load_series(val_datasets, "TEST")
    val_gen = ArtifactDataset(val_data,
                              width=CONFIG["width"],
                              padding=64,
                              artifact=CONFIG["artifact"],
                              weight=val_weights)
    val = CachedArtifactDataset.generate(val_gen,
                                         n=2048,
                                         to=val_file)
else:
    val = CachedArtifactDataset(file=val_file)
val_loader = DataLoader(val, batch_size=CONFIG["batch_size"])

Sanity check.

In [9]:
batch = next(iter(train_loader))
batch["data"]

tensor([[0.8020, 0.8006, 0.7920,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.6971, 0.7012, 0.7033],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.2615, 0.2385, 0.3421],
        [0.0000, 0.0019, 0.0076,  ..., 0.0000, 0.0000, 0.0000],
        [0.6250, 0.6420, 0.6534,  ..., 0.0000, 0.0000, 0.0000]])

Training!

In [13]:
# Initialize W&B run
run = wandb.init(project=CONFIG["wandb_project_name"], 
        config=CONFIG,
        entity="hvonhue",
        group=CONFIG["wandb_group_name"], 
        job_type='train',
        name=CONFIG["wandb_run_name"])

wandb.config.type = 'baseline'

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhvonhue[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [11]:
# initialize callbacks
checkpointcallback = ModelCheckpoint(monitor="validation",
                                     mode="min",
                                     save_top_k=1)
lr_monitor = LearningRateMonitor(logging_interval='step')

# initialize logger
logger = WandbLogger(project="artifactory",
                     name=model_name,
                     log_model="all")

# initialize trainer
trainer = Trainer(logger=logger,
                  max_steps=50000,
                  val_check_interval=1000,
                  callbacks=[checkpointcallback,
                             lr_monitor])

# train
trainer.fit(model,
            train_dataloaders=train_loader,
            val_dataloaders=val_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhvonhue[0m. Use [1m`wandb login --relogin`[0m to force relogin



  | Name         | Type                        | Params
-------------------------------------------------------------
0 | convolutions | Sequential                  | 503 K 
1 | position     | SinusoidalPositionEmbedding | 0     
2 | dropout      | Dropout                     | 0     
3 | transformer  | TransformerEncoder          | 25.4 K
4 | linear       | Linear                      | 33    
-------------------------------------------------------------
528 K     Trainable params
0         Non-trainable params
528 K     Total params
2.116     Total estimated model params size (MB)


Epoch 0: |          | 7000/? [1:21:12<00:00,  1.44it/s, v_num=i6nb]        

In [None]:
# End Wandb run
run.finish()