In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from buildings_bench import load_torch_dataset
from buildings_bench.models import model_factory

import tomli
from pathlib import Path
from os import environ
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# === Data Handler ===
class DataHandler:
    def __init__(self, batch_size=32):
        self.batch_size = batch_size

    def load_dataset(self, dataset_name, scaler_transform):
        from buildings_bench import load_torch_dataset
        return list(load_torch_dataset(
            dataset_name,
            apply_scaler_transform=scaler_transform,
            scaler_transform_path=Path(environ["TRANSFORM_PATH"])
        ))

    def create_dataloader(self, dataset):
        return DataLoader(dataset, batch_size=self.batch_size, shuffle=False)

In [3]:
class TimeSeriesSinusoidalPeriodicEmbedding(nn.Module):
    def __init__(self, embedding_dim: int):
        super().__init__()
        self.linear = nn.Linear(2, embedding_dim)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """`x` is expected to be [batch_size, seqlen, 1] in [-1, +1] range."""
        x = torch.cat([torch.sin(torch.pi * x), torch.cos(torch.pi * x)], dim=2)
        return self.linear(x)

class Model(nn.Module):
    DEFAULT_CONTEXT_LEN = 168
    DEFAULT_PRED_LEN = 24

    def __init__(self, activation):
        super().__init__()
        self.context_len = self.DEFAULT_CONTEXT_LEN
        self.pred_len = self.DEFAULT_PRED_LEN
        self.activation = self._get_activation(activation)
        self.embeddings = self._create_embeddings()

    def _create_embeddings(self):
        return nn.ModuleDict({
            'power': nn.Linear(1, 64),
            'building': nn.Embedding(2, 32),
            'lat': nn.Linear(1, 32),
            'lon': nn.Linear(1, 32), 
            'day_of_year': TimeSeriesSinusoidalPeriodicEmbedding(32),
            'day_of_week': TimeSeriesSinusoidalPeriodicEmbedding(32),
            'hour_of_day': TimeSeriesSinusoidalPeriodicEmbedding(32)
        })

    def _get_activation(self, name):
        return {
            "relu": nn.ReLU(),
            "tanh": nn.Tanh(),
            "gelu": nn.GELU(),
            "leaky_relu": nn.LeakyReLU()
        }.get(name.lower(), nn.ReLU())

    def _data_pre_process(self, x):
        lat = self.embeddings['lat'](x['latitude'])
        lon = self.embeddings['lon'](x['longitude'])
        btype = self.embeddings['building'](x['building_type'].squeeze(-1))
        load = self.embeddings['power'](x['load'])
        day_of_year = self.embeddings['day_of_year'](x['day_of_year'])            
        day_of_week = self.embeddings['day_of_week'](x['day_of_week'])            
        hour_of_day = self.embeddings['hour_of_day'](x['hour_of_day']) 
        return torch.cat([lat, lon, btype, day_of_year, day_of_week, hour_of_day, load], dim=2)


class NN(Model):
    def __init__(self, activation):
        super().__init__(activation)
        self.model = self._build_model()

    def _build_model(self):
        input_dim = self.context_len * 256
        return nn.Sequential(
            nn.Linear(input_dim, 512), self.activation,
            nn.Linear(512, 256), self.activation,
            nn.Linear(256, 128), self.activation,
            nn.Linear(128, self.pred_len)
        )

    def forward(self, x):
        ts_embed = self._data_pre_process(x)
        x_flat = ts_embed[:, :self.context_len, :].reshape(x['load'].shape[0], -1)
        return self.model(x_flat).unsqueeze(-1)

class RNN(Model):
    def __init__(self, activation="relu"):
        super().__init__(activation)
        self.rnn1, self.rnn2, self.output_layer = self._build_model()

    def _build_model(self):
        rnn1 = nn.RNN(256, 128, batch_first=True)
        rnn2 = nn.RNN(128, 128, batch_first=True)
        output_layer = nn.Linear(128, self.pred_len)
        return rnn1, rnn2, output_layer

    def forward(self, x):
        ts_embed = self._data_pre_process(x)
        out1, _ = self.rnn1(ts_embed)
        out2, _ = self.rnn2(out1)
        last_hidden = self.activation(out2[:, -1, :])
        return self.output_layer(last_hidden).unsqueeze(-1)

class LSTM(Model):
    def __init__(self, activation="relu"):
        super().__init__(activation)
        self.lstm1, self.lstm2, self.output_layer = self._build_model()

    def _build_model(self):
        lstm1 = nn.LSTM(256, 128, batch_first=True)
        lstm2 = nn.LSTM(128, 128, batch_first=True)
        output_layer = nn.Linear(128, self.pred_len)
        return lstm1, lstm2, output_layer

    def forward(self, x):
        ts_embed = self._data_pre_process(x)
        out1, _ = self.lstm1(ts_embed)
        out2, _ = self.lstm2(out1)
        last_hidden = self.activation(out2[:, -1, :])
        return self.output_layer(last_hidden).unsqueeze(-1)

class GRU(Model):
    def __init__(self, activation="relu"):
        super().__init__(activation)
        self.gru1, self.gru2, self.output_layer = self._build_model()

    def _build_model(self):
        gru1 = nn.GRU(256, 128, batch_first=True)
        gru2 = nn.GRU(128, 128, batch_first=True)
        output_layer = nn.Linear(128, self.pred_len)
        return gru1, gru2, output_layer

    def forward(self, x):
        ts_embed = self._data_pre_process(x)
        out1, _ = self.gru1(ts_embed)
        out2, _ = self.gru2(out1)
        last_hidden = self.activation(out2[:, -1, :])
        return self.output_layer(last_hidden).unsqueeze(-1)

# class Transformer(Model):
#     def __init__(self, model_name, activation="relu"):
#         super().__init__(activation)

#         # Set config and checkpoint paths
#         ckpt_map = {
#             "TransformerWithGaussian-S": "Transformer_Gaussian_S.pt",
#             "TransformerWithGaussian-M": "Transformer_Gaussian_M.pt",
#             "TransformerWithGaussian-L": "Transformer_Gaussian_L.pt"
#         }
#         environ["CONFIG_PATH"] = f"{environ['REPO_PATH']}/buildings_bench/configs/{model_name}.toml"
#         environ["CHECKPOINT_PATH"] = f"{environ['PATH']}/checkpoints/{ckpt_map[model_name]}"

#         # Load model and prediction function
#         with open(environ["CONFIG_PATH"], "rb") as f:
#             tomli_args = tomli.load(f)
#         model_args = tomli_args['model']

#         raw_model, _, predict_fn = model_factory(model_name, model_args)
#         self.transformer_model = raw_model.load_from_checkpoint(Path(environ["CHECKPOINT_PATH"]))
#         self.predict_fn = predict_fn

#     def forward(self, x):
#         return self.predict_fn(x)[0]

In [4]:
class Trainer:
    def __init__(self, model_name, device, scaler_transform, activation='relu', optimizer_name='adam', lr=1e-3):
        self.model_name = model_name
        self.device = device
        self.scaler_transform = scaler_transform
        self.activation = activation
        self.optimizer_name = optimizer_name
        self.lr = lr

        self.model = self._load_model()
        self.optimizer = self._get_optimizer()
        self.loss_fn = nn.MSELoss()
        self.handler = DataHandler(batch_size=32)

        # Only initialize optimizer if the model has trainable parameters
        # self.optimizer = (
        #     self._get_optimizer() if any(p.requires_grad for p in self.model.parameters()) else None
        # )

    def _load_model(self):
        model_map = {
            'NN': NN,
            'RNN': RNN,
            'LSTM': LSTM,
            'GRU': GRU
        }
        if self.model_name in model_map:
            return model_map[self.model_name](activation=self.activation).to(self.device)

        # Transformer model path setup and instantiation
        # return Transformer(model_name=self.model_name, activation=self.activation).to(self.device)

    def _get_optimizer(self):
        opt_map = {
            'adam': torch.optim.Adam,
            'sgd': torch.optim.SGD,
            'adamw': torch.optim.AdamW
        }
        optimizer_cls = opt_map.get(self.optimizer_name.lower(), torch.optim.Adam)
        return optimizer_cls(self.model.parameters(), lr=self.lr)

    def train(self, train_buildings, epochs=5):
        self.model.train()
        for epoch in range(epochs):
            total_loss = 0.0
            for building_id, building_dataset in train_buildings:
                dataloader = self.handler.create_dataloader(building_dataset)
                for batch in dataloader:
                    for key, value in batch.items():
                        batch[key] = value.to(self.device)
                    self.optimizer.zero_grad()
                    predictions = self.model(batch)
                    targets = batch['load'][:, self.model.context_len:, 0]
                    loss = self.loss_fn(predictions[:, :, 0], targets)
                    loss.backward()
                    self.optimizer.step()
                    total_loss += loss.item()
            print(f"[{self.model_name}] Epoch {epoch + 1}: Loss = {total_loss:.4f}")
        self.model.eval()
        return self.model

    def evaluate(self, test_buildings):
        self.model.eval()
        results = {}
        mae_total = 0.0
        rmse_total = 0.0
        r2_total = 0.0
        count = 0
        for building_id, building_dataset in test_buildings:
            inverse_transform = building_dataset.datasets[0].load_transform.undo_transform
            dataloader = self.handler.create_dataloader(building_dataset)
            target_list = []
            prediction_list = []
            with torch.no_grad():
                for batch in dataloader:
                    for key, value in batch.items():
                        batch[key] = value.to(self.device)
                    predictions = self.model(batch)
                    targets = batch['load'][:, self.model.context_len:]
                    targets = inverse_transform(targets)
                    predictions = inverse_transform(predictions)
                    prediction_list.append(predictions.detach().cpu())
                    target_list.append(targets.detach().cpu())
            predictions_all = torch.cat(prediction_list)
            targets_all = torch.cat(target_list)
            mae = torch.abs(predictions_all - targets_all).mean().item()
            rmse = torch.sqrt(((predictions_all - targets_all) ** 2).mean()).item()
            r2 = 1 - (((predictions_all - targets_all) ** 2).sum() / ((targets_all - targets_all.mean()) ** 2).sum()).item()
            mae_total += mae
            rmse_total += rmse
            r2_total += r2
            count += 1
            results[building_id] = (predictions_all, targets_all)
        return results, mae_total / count, rmse_total / count, r2_total / count

In [None]:
# === Main Execution ===

# TBD: ['buildings-900k-test', 'sceaux', 'borealis', 'ideal', 'bdg-2', 'bdg-2:panther', 'bdg-2:fox', 'bdg-2:rat', 'bdg-2:bear', 'electricity', 'smart', 'lcl']

# Configuration dictionary
# Options: 
# dataset_name: ideal, electricity, lcl, sceaux
# epochs: 10, 20, 30
# activation: relu, tanh, leaky_relu, gelu
# optimizer: adam, sgd, adamw

config = {
    "PATH": "/pscratch/sd/n/nrushad",
    "dataset_name": "ideal",     
    "scaler_transform": "boxcox",
    "batch_size": 32,
    "epochs": 5,
    "activation": "gelu",             
    "optimizer_name": "adamw",        
    "lr": 1e-3
}

# Set environment variables
environ["PATH"] = config["PATH"]
environ["REPO_PATH"] = f"{config['PATH']}/BuildingsBenchTutorial/BuildingsBench"
environ["BUILDINGS_BENCH"] = f"{config['PATH']}/Dataset"
environ["TRANSFORM_PATH"] = f"{config['PATH']}/Dataset/metadata/transforms"

# Set device
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# Load and split dataset
handler = DataHandler(batch_size=config["batch_size"])
all_buildings = handler.load_dataset(config["dataset_name"], config["scaler_transform"])
train_buildings = all_buildings[:int(0.8 * len(all_buildings))]
test_buildings = all_buildings[int(0.8 * len(all_buildings)):] 

model_names = [
    'NN',
    'RNN',
    'LSTM',
    'GRU',
    # 'TransformerWithGaussian-S',
    # 'TransformerWithGaussian-M',
    # 'TransformerWithGaussian-L'
]

for model_name in model_names:
    print(f"\n--- Training {model_name} ---")

    trainer = Trainer(
        model_name=model_name,
        device=device,
        scaler_transform=config["scaler_transform"],
        activation=config["activation"],
        optimizer_name=config["optimizer_name"],
        lr=config["lr"]
    )

    trainer.train(train_buildings, epochs=config["epochs"])

    # Evaluate after training
    results, mae, rmse, r2 = trainer.evaluate(test_buildings)

    print(f"[{model_name}] Evaluation Metrics:")
    print(f"  MAE  = {mae:.4f}")
    print(f"  RMSE = {rmse:.4f}")
    print(f"  R²   = {r2:.4f}")


--- Training NN ---
[NN] Epoch 1: Loss = 481.2123
[NN] Epoch 2: Loss = 366.1863


In [None]:
# pscratch/sd/n/nrushad/BuildingsBenchTutorial/BuildingsBench/buildings_bench/configs