In [1]:
# import sys

# !git clone https://github.com/TimeBinFM/binconvfm.git
# %cd binconvfm
# !{sys.executable} -m pip install .

In [2]:
from binconvfm.utils.download.quick import get_file_names_per_dataset, get_target_dataset, dataset_to_window_tensors
from binconvfm.utils.download.gift_eval import list_arrow_files

## Base dataset preparation

In [3]:
import datasets
import torch

def get_target_dataset(dataset_name: str, file_names_to_process: list[str], batch_size=8192, window_size=32, 
                       prediction_depth=1, step=1, pre_batch_ts_count=100):
    def item_to_window_tensor(target):
        target_tensor = torch.tensor(target)
        target_tensor_shape = target_tensor.shape

        unfold_dimension = len(target_tensor_shape) - 1 
        
        window_tensor = target_tensor.unfold(dimension=unfold_dimension, size=window_size+prediction_depth, step=step)
        
        window_tensor_shape = window_tensor.shape
        result_tensor = window_tensor if len(window_tensor_shape) == 2 else window_tensor.flatten(0, 1)

        return result_tensor

    def to_window_tensors(batch):
        tensors = [item_to_window_tensor(item) for item in batch['target']]
        total_tensor = torch.cat(tensors)
        return {
            'target': torch.split(total_tensor, batch_size)
        }
                                 
        
    return (
        datasets.load_dataset(
            dataset_name,
            split='train',
            data_files=file_names_to_process,
            streaming=True,
        )
            .select_columns(['target'])
            .batch(pre_batch_ts_count)
            .map(to_window_tensors)
    )

In [4]:
dataset_name = "Salesforce/GiftEvalPretrain"
ds_name = 'buildings_900k'

file_names_per_dataset = get_file_names_per_dataset(dataset_name)

files_per_ds = 4

def get_ds(ds_name):
    file_names = file_names_per_dataset[ds_name][:files_per_ds]
    return get_target_dataset(dataset_name, file_names)

# ds1 = get_ds('buildings_900k')
ds1 = get_ds('borg_cluster_data_2011')

In [5]:
tmp = next(iter(ds1))

## Model preparation

In [6]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import pytorch_lightning as pl
from torch import nn

class LinearRegressionModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(32, 1)

    def forward(self, x):
        return self.linear(x)  # Output shape: [batch_size, 1]

    def training_step(self, batch, batch_idx):
        x, y = batch
        x = x.float()
        y = y.float()
        y_hat = self(x)
        loss = F.mse_loss(y_hat, y)
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        # GPU-accelerated AdamW (fused)
        return torch.optim.AdamW(self.parameters(), lr=1e-3, fused=True)

## Preprocess data

In [7]:
import torch
from torch.utils.data import Dataset

class TensorDataset(Dataset):
    def __init__(self, tensor: torch.Tensor):
        self.tensor = tensor

    def __len__(self):
        return self.tensor.size(0)

    def __getitem__(self, idx):
        x = self.tensor[idx, :-1]
        y = self.tensor[idx, -1]
        return x, y

In [8]:
import torch
from torch.utils.data import IterableDataset

class WrapperDataset(IterableDataset):
    def __init__(self, base_dataset):
        super().__init__()
        self.base_dataset = base_dataset

    def __iter__(self):
        for batch in self.base_dataset:
            for item in batch['target']:
                X = item[:, :-1]
                y = item[:, -1:]
                yield X, y

In [9]:
window_tensor_dataset = WrapperDataset(ds1)

## Train a model

In [10]:
import torch
import pytorch_lightning as pl
from pytorch_lightning.profilers import PyTorchProfiler
from torch.utils.data import DataLoader

# ----------------------------
# Dataset and DataLoader
# ----------------------------
# window_tensor_dataset must be a torch.Tensor dataset or TensorDataset
dataloader = DataLoader(
    window_tensor_dataset,
    shuffle=False,
    batch_size=None,
    num_workers=1,        # CPU parallelism for data loading
    pin_memory=True,       # speeds up CPU->GPU transfer
    persistent_workers=True,  # keep workers alive between epochs
    prefetch_factor=1000,     # prefetch batches per worker
)

# # ----------------------------
# # PyTorch Lightning Profiler
# # ----------------------------
# profiler = PyTorchProfiler(
#     schedule=torch.profiler.schedule(wait=50, warmup=50, active=5, repeat=1),
#     on_trace_ready=torch.profiler.tensorboard_trace_handler("/workspace"),
#     record_shapes=True,
#     profile_memory=True,
#     with_stack=True,
# )

# ----------------------------
# Trainer
# ----------------------------
trainer = pl.Trainer(
    max_epochs=1,
    accelerator="gpu",         # force GPU
    devices=1,                 # single GPU
    precision=16,              # automatic mixed precision for speed
    # profiler=profiler,
    gradient_clip_val=0.0,     # avoid gradient clipping overhead
)

# ----------------------------
# Model to GPU
# ----------------------------
model = LinearRegressionModel().to("cuda")

# ----------------------------
# Training
# ----------------------------
trainer.fit(model, dataloader)

/venv/main/lib/python3.12/site-packages/lightning_fabric/connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/venv/main/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Pl

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
