In [1157]:
import numpy as np

from probts.data.data_utils.data_scaler import Scaler, StandardScaler, IdentityScaler, TemporalScaler

from probts.model.forecaster import LinearForecaster, NaiveForecaster
from probts.model.forecast_module import ProbTSForecastModule
from probts.data import ProbTSDataModule, DataManager, ProbTSBatchData
from probts.utils import find_best_epoch
from lightning import Trainer
from pytorch_lightning.loggers import CSVLogger
import torch
import matplotlib.pyplot as plt
import torch.nn as nn

In [1158]:
class BinaryQuantizer(Scaler):
    def __init__(self, num_bins=200, min_val=-10.0, max_val=10.0):
        super().__init__()
        self.num_bins = num_bins
        self.min_val = min_val
        self.max_val = max_val
        self.bin_values_ = torch.linspace(self.min_val, self.max_val, self.num_bins)

    def fit(self, values):
        self.min_val = values.min()
        self.max_val = values.max()
        self.bin_values_ = torch.linspace(self.min_val, self.max_val, self.num_bins)

    def fit_transform(self, values):
        self.fit(values)
        return self.transform(values)

    def transform(self, values):
        bin_thresholds = self.bin_values_.reshape(1, 1, -1)
        return (values >= bin_thresholds).float()

    def inverse_transform(self, values):
        reversed_bin = torch.flip(values, dims=(-1,))
        idx_first_one_reversed = reversed_bin.argmax(axis=-1)[..., None]
        idx_last_one = self.num_bins - 1 - idx_first_one_reversed
        reconstructed = self.bin_values_[idx_last_one]
        return reconstructed

In [1159]:
class StandardBinScaler(Scaler):
    def __init__(self, standard: StandardScaler, bin: BinaryQuantizer):
        super().__init__()
        self.standard = standard
        self.bin = bin

    def fit(self, X):
        Z = self.standard.fit_transform(X)
        self.bin.fit(Z)
        print('the scaler was fitted')

    def transform(self, X):
        Z = self.standard.transform(X)
        return self.bin.transform(Z)

    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)

    def inverse_transform(self, X):
        Z = self.bin.inverse_transform(X)
        return self.standard.inverse_transform(Z)

In [1160]:
# data_manager = DataManager(
#     dataset='tourism_monthly',
#     path='../datasets',
#     context_length=12,
#     prediction_length=12,
# )
# data_manager.context_length

In [1161]:
class CustomDataManager(DataManager):
    def _configure_scaler(self, scaler_type: str):
        """Configure the scaler."""
        if scaler_type == "standard":
            return StandardScaler(var_specific=self.var_specific_norm)
        elif scaler_type == "temporal":
            return TemporalScaler()
        elif scaler_type == "binary":
            return BinaryQuantizer()
        elif scaler_type == "standard_binary":
            # return StandardBinScaler(StandardScaler(var_specific=self.var_specific_norm), BinaryQuantizer())
            return StandardBinScaler(TemporalScaler(), BinaryQuantizer())
        return IdentityScaler()

In [1162]:
# data_module = ProbTSDataModule(
#     data_manager=data_manager,
#     batch_size=32,
#     test_batch_size=32,
#     num_workers=8,
# )
# test_dataloader = data_module.test_dataloader()
# train_dataloader = data_module.train_dataloader()
# val_dataloader = data_module.val_dataloader()

In [1163]:
# for test_batch in test_dataloader:
#     break

In [1164]:
# batch_data = ProbTSBatchData(test_batch, 'cpu')
# batch_data.past_target_cdf.shape

In [1165]:
# plt.figure(figsize=(10,3))
# plt.plot(batch_data.past_target_cdf[13, :, 0].t())
# plt.show()

In [1166]:
# scaler = StandardBinScaler(StandardScaler(), BinaryQuantizer())
# scaler.fit(batch_data.past_target_cdf)
# transformed = scaler.transform(batch_data.past_target_cdf)
# transformed.shape

In [1167]:
# plt.figure(figsize=(10,3))
# plt.imshow(transformed[13].T, aspect='auto', interpolation='none', cmap='Reds')
# plt.show()

In [1168]:
# reconstructed = scaler.inverse_transform(transformed)
# reconstructed.shape

In [1169]:
# plt.figure(figsize=(10,3))
# plt.plot(reconstructed[13, :, 0].t())
# plt.show()


In [1170]:
data_manager = CustomDataManager(
    dataset='tourism_monthly',
    path='../datasets',
    context_length=72,
    prediction_length=24,
    # scaler="standard_binary",
    scaler="identity",
)

# data_manager = DataManager(
#     dataset='m4_daily',
#     # dataset='etth1',
#     path='./datasets',
#     context_length=12,
#     prediction_length=12,
#     scaler="standard_binary",
# )

Loading Short-term Dataset: tourism_monthly



Download tourism_monthly_dataset.zip:: 200kB [00:00, 491kB/s]

creating json files: 100%|██████████| 366/366 [00:00<00:00, 289535.13it/s]

No validation set is used.





In [1171]:
# data_manager.dataset_raw.training_dataset

In [1172]:
data_manager.context_length

72

In [1173]:
data_module = ProbTSDataModule(
    data_manager=data_manager,
    batch_size=1,
    test_batch_size=1,
    num_workers=8,
)
test_dataloader = data_module.test_dataloader()
train_dataloader = data_module.train_dataloader()
val_dataloader = data_module.val_dataloader()

In [1174]:
for test_batch in test_dataloader:
    break

In [1175]:
test_batch['past_target_cdf'].shape

torch.Size([1, 84])

In [1176]:
test_batch['past_target_cdf'].reshape(-1, 1).shape

torch.Size([84, 1])

In [1177]:
# data_manager.scaler.standard.mean

In [1178]:
data_manager.scaler.transform(test_batch['past_target_cdf'].reshape(-1, 1))

tensor([[5466.7803],
        [3235.1677],
        [2157.9800],
        [1379.7252],
        [1728.0400],
        [1350.1099],
        [1216.0149],
        [1751.3252],
        [1805.3201],
        [2570.0249],
        [3204.2402],
        [5395.7202],
        [6078.8286],
        [3587.0984],
        [2285.1951],
        [1582.1899],
        [1787.4298],
        [1554.8701],
        [1409.8649],
        [1612.1250],
        [2286.2400],
        [2913.7551],
        [3645.9084],
        [5956.7085],
        [6326.9751],
        [3914.6602],
        [2617.6750],
        [1675.1650],
        [2139.2200],
        [1715.4899],
        [1663.5800],
        [2053.7000],
        [2354.9299],
        [3038.5918],
        [3470.6094],
        [6606.1836],
        [6587.6367],
        [4133.7827],
        [2960.0244],
        [1762.5850],
        [2125.6401],
        [1815.9150],
        [1632.3149],
        [2210.3950],
        [2210.2151],
        [3099.2693],
        [3468.7778],
        [6482

In [1179]:
# for train_batch in train_dataloader:
#     break

In [1180]:
# batch_data = ProbTSBatchData(test_batch, 'cpu')
# batch_data.past_target_cdf.shape

In [1181]:
data_manager.context_length

72

In [1182]:
def sliding_window_batch(x, L, H):
    """
    x: Tensor of shape (B, L+H, C)
    Returns: Tensor of shape (B, H, L, C)
    """
    B, total_len, C = x.shape
    assert total_len >= L + H, "Not enough sequence length for given L and H"

    windows = [x[:, h:h + L, :].unsqueeze(1) for h in range(H)]  # list of (B, 1, L, C)
    return torch.cat(windows, dim=1)  # (B, H, L, C)

In [None]:
### h = 2, L =5
### total: 7
### target indexes: 5,6
## h=0 - 0:5 [0,1,2,3,4]
## h=1 - 1:6 [1,2,3,4,5]

In [1183]:
def most_probable_monotonic_sequence(p: torch.Tensor):
    """
    p: Tensor of shape (B, D) where each row is a probability vector
    Returns:
        best_sequences: Tensor of shape (B, D) with most probable [1...1, 0...0] sequence per batch
        best_probs: Tensor of shape (B,) with normalized probability of each best sequence
    """
    B, D = p.shape

    # Compute cumulative product of p and (1 - p)
    left_cumprod = torch.cumprod(p, dim=1)  # shape (B, D)
    right_cumprod = torch.cumprod((1 - p).flip(dims=[1]), dim=1).flip(dims=[1])  # shape (B, D)

    # Pad left with 1 at the beginning (per batch)
    ones = torch.ones((B, 1), dtype=p.dtype, device=p.device)
    left = torch.cat([ones, left_cumprod[:, :-1]], dim=1)  # shape (B, D)
    right = right_cumprod  # shape (B, D)

    # Element-wise multiply
    probs = left * right  # shape (B, D)

    # Normalize
    probs_sum = probs.sum(dim=1, keepdim=True)  # shape (B, 1)
    probs_normalized = probs / probs_sum

    # Find best cut index per batch
    best_k = torch.argmax(probs_normalized, dim=1)  # shape (B,)

    # Construct best sequences
    arange = torch.arange(D, device=p.device).unsqueeze(0)  # shape (1, D)
    best_k_expanded = best_k.unsqueeze(1)  # shape (B, 1)
    best_sequences = (arange < best_k_expanded).to(p.dtype)  # shape (B, D)

    # Get best normalized probabilities
    best_probs = torch.gather(probs_normalized, dim=1, index=best_k.unsqueeze(1)).squeeze(1)  # shape (B,)

    return best_sequences, best_probs

In [1184]:
class DynamicTanh(nn.Module):
    def __init__(self, normalized_shape, channels_last, alpha_init_value=0.5):
        super().__init__()
        self.normalized_shape = normalized_shape
        self.alpha_init_value = alpha_init_value
        self.channels_last = channels_last

        self.alpha = nn.Parameter(torch.ones(1) * alpha_init_value)
        self.weight = nn.Parameter(torch.ones(normalized_shape))
        self.bias = nn.Parameter(torch.zeros(normalized_shape))

    def forward(self, x):
        x = torch.tanh(self.alpha * x)
        if self.channels_last:
            x = x * self.weight + self.bias
        else:
            # x = x * self.weight[:, None, None] + self.bias[:, None, None]
            x = x * self.weight[:, None] + self.bias[:, None]
        return x

    def extra_repr(self):
        return f"normalized_shape={self.normalized_shape}, alpha_init_value={self.alpha_init_value}, channels_last={self.channels_last}"


In [1185]:
from probts.model.forecaster import Forecaster
from torch import nn
import torch.nn.functional as F


class BinConv(Forecaster):
    def __init__(self, context_length: int, num_bins: int, kernel_size_across_bins_2d: int = 3,
                 kernel_size_across_bins_1d: int = 3, num_filters_2d: int = 8,
                 num_filters_1d: int = 32, is_cum_sum: bool = False, num_1d_layers: int = 2, num_blocks: int = 3,
                 kernel_size_ffn: int = 51,
                 **kwargs) -> None:
        """
        Initialize the model with parameters.
        """
        super().__init__(context_length=context_length, **kwargs)
        # Initialize model parameters here
        self.context_length = context_length
        self.num_bins = num_bins
        self.num_filters_2d = num_filters_2d
        self.num_filters_1d = num_filters_1d
        self.kernel_size_across_bins_2d = kernel_size_across_bins_2d
        self.kernel_size_across_bins_1d = kernel_size_across_bins_1d
        self.is_cum_sum = is_cum_sum
        self.scaler = StandardBinScaler(StandardScaler(var_specific=True), BinaryQuantizer())
        self.num_1d_layers = num_1d_layers
        self.num_blocks = num_blocks
        self.kernel_size_ffn = kernel_size_ffn
        self.dropout = nn.Dropout(0.15)

        # Conv2d over (context_length, num_bins)

        self.conv2d = nn.ModuleList([nn.Conv2d(
            in_channels=1,
            out_channels=self.num_filters_2d,
            # kernel_size=(context_length if i == 0 else kernel_size_across_bins_2d, kernel_size_across_bins_2d),
            kernel_size=(context_length, kernel_size_across_bins_2d),
            bias=True
        ) for _ in range(num_blocks)
        ])
        self.conv1d = nn.ModuleList([
            nn.ModuleList([
                nn.Conv1d(in_channels=num_filters_2d if i == 0 else num_filters_1d,
                          out_channels=context_length if i == num_1d_layers - 1 else num_filters_1d,
                          kernel_size=kernel_size_across_bins_1d, bias=True,
                          groups=num_filters_1d)
                for i in range(num_1d_layers)
            ]) for _ in range(num_blocks)
        ])
        self.conv_ffn = nn.Conv1d(
            # in_channels=self.num_filters_1d,
            in_channels=context_length,
            out_channels=1,
            kernel_size=kernel_size_ffn,  # large kernel size?
            groups=1,
            bias=True
        )
        print('conv 1d:')
        print(self.conv1d)
        assert num_filters_2d == num_filters_1d, "todo: change the self.act shape if not"
        self.act = nn.ModuleList([
            nn.ModuleList([
                # DynamicTanh(normalized_shape=num_filters_2d if i == 0 else num_filters_1d, channels_last=False)
                DynamicTanh(normalized_shape=num_filters_2d if i < self.num_1d_layers else context_length,
                            channels_last=False)
                for i in range(self.num_1d_layers + 1)  # applied after conv2d, and all conv1d including the last one
            ]) for _ in range(self.num_blocks)
        ])
        # self.conv_ffn = nn.Sequential(
        #     nn.Conv1d(1, num_bins, kernel_size=1),
        #     nn.ReLU(),
        #     nn.Conv1d(num_bins, num_bins, kernel_size=1),
        #     # nn.Dropout(dropout)
        # )

        #
        # self.conv1d_1 = nn.Conv1d(
        #     in_channels=self.num_filters_2d,
        #     out_channels=self.num_filters_1d,
        #     kernel_size=kernel_size_across_bins_1d,
        #     # groups=self.num_filters_1d,
        #     bias=True
        # )
        #
        # self.conv1d_2 = nn.Conv1d(
        #     in_channels=self.num_filters_1d,
        #     out_channels=1,
        #     kernel_size=kernel_size_across_bins_1d,
        #     bias=True
        # )
        #
        # self.dyt_2d = DynamicTanh(normalized_shape=num_filters_2d, channels_last=False)
        # self.dyt_1d = DynamicTanh(normalized_shape=num_filters_1d, channels_last=False)

    def _pad_channels(self, tensor: torch.Tensor, pad_size: int, pad_val_left=1.0, pad_val_right=0.0):
        if pad_size == 0:
            return tensor
        left = torch.full((*tensor.shape[:-1], pad_size), pad_val_left, device=tensor.device)
        right = torch.full((*tensor.shape[:-1], pad_size), pad_val_right, device=tensor.device)
        return torch.cat([left, tensor, right], dim=-1)

    def conv_layer(self, x: torch.Tensor, conv_func, act_func, kernel_size: int, is_2d: bool, ):
        # kernel_size = self.kernel_size_across_bins_2d if is_2d else self.kernel_size_across_bins_1d
        pad = kernel_size // 2 if kernel_size > 1 else 0
        x_padded = self._pad_channels(x, pad)
        if is_2d:
            x_padded = x_padded.unsqueeze(1)
        conv_out = conv_func(x_padded)  # (batch_size, num_filters_2d, num_bins)
        # print(f'conv out shape:{conv_out.shape}')
        if is_2d:
            conv_out = conv_out.squeeze(2)
        if act_func is not None:
            conv_out = act_func(conv_out)
        return conv_out

    def forward(self, x):

        x = x.float()
        # x: (batch_size, context_length, num_bins)
        batch_size, context_length, num_bins = x.shape
        assert context_length == self.context_length, "Mismatch in context length"

        for j in range(self.num_blocks):

            residual = x
            x = self.conv_layer(x, self.conv2d[j], self.act[j][0], self.kernel_size_across_bins_2d, True)
            for i in range(self.num_1d_layers):
                # x = self.conv_layer(x, self.conv1d[j][i], self.act[j][i + 1], False)
                x = self.conv_layer(x, self.conv1d[j][i], F.relu,
                                    self.kernel_size_across_bins_1d, False)
            x = self.dropout(x)
            x = x + residual

            # x = self.conv_layer(x, self.conv2d, self.dyt_2d, True)  # (batch_size, num_filters_2d, num_bins)
            # x = self.conv_layer(x, self.conv1d_1, self.dyt_1d, False)
            # x = self.conv_layer(x, self.conv1d_2, None, False) # (batch_size, num_bins)
            # print('x shape')
            # print(x.squeeze(1).shape)

        out = self.conv_layer(x, self.conv_ffn, None, self.kernel_size_ffn, False).squeeze(1)
        # out = self.conv_ffn(x.squeeze(1))

        if self.is_cum_sum:
            assert False
            out = torch.flip(torch.cumsum(torch.flip(out, dims=[1]), dim=1), dims=[1])
        return out

    def loss(self, batch_data):
        """
        Compute the loss for the given batch data.

        Parameters:
        batch_data [dict]: Dictionary containing input data and possibly target data.

        Returns:
        Tensor: Computed loss.
        """
        # Extract inputs and targets from batch_data
        inputs = self.get_inputs(batch_data, 'all')
        orig_target = inputs[:, -self.prediction_length:, :]
        # print(f'inputs shape:{inputs.shape}')
        # print(f'scale shape:{inputs.reshape(-1)[:-self.prediction_length].shape}')
        self.scaler.fit(inputs.reshape(-1)[:-self.prediction_length])
        inputs = self.scaler.transform(inputs)
        print('inputs transformed')
        print(inputs.shape)
        print(inputs.sum(dim=-1))
        target = inputs[:, -self.prediction_length:, :]
        # print(f'bool:{torch.allclose(inputs[:, -self.prediction_length:, :], batch_data.future_target_cdf.float())}')
        inputs = sliding_window_batch(inputs, self.context_length, self.prediction_length).float()
        outputs = self(inputs.view(-1, *inputs.shape[2:]))
        print(f'outputs transformed:')
        print(outputs.shape)
        with torch.no_grad():
            pred = F.sigmoid(outputs)
            print('seq')
            print(pred.sum(dim=-1))
            # pred, _ = most_probable_monotonic_sequence(pred)
            pred = (pred >= 0.5).int()
            print('out:')
            pred = self.scaler.inverse_transform(pred).reshape(-1)
            target_inv = self.scaler.inverse_transform(target).reshape(-1)
            print(pred)
            print(target_inv)
            print(orig_target.reshape(-1))
            # plt.plot(pred, color='red')
            # plt.plot(target_inv)
            plt.show()
        # outputs = outputs[:, -self.prediction_length-1:-1, ...]
        # target = batch_data.future_target_cdf.float()

        loss = F.binary_cross_entropy_with_logits(input=outputs, target=target.view(-1, *target.shape[2:]), )
        print(loss)
        return loss

    def forecast(self, batch_data, num_samples=None):
        inputs = self.get_inputs(batch_data, 'encode')

        self.scaler.fit(inputs.reshape(-1))
        inputs = self.scaler.transform(inputs)
        print('inputs:')
        print(inputs.shape)
        print(inputs.sum(dim=-1))
        current_context = inputs.clone()
        forecasts = []
        for _ in range(self.prediction_length):
            pred = F.sigmoid(self(current_context))  # (B, D)
            # pred = (pred >= 0.5).int()
            pred, _ = most_probable_monotonic_sequence(pred)
            pred = pred.int()
            forecasts.append(pred.unsqueeze(1))  # (B, 1, D)
            next_input = pred.unsqueeze(1)
            current_context = torch.cat([current_context[:, 1:], next_input], dim=1)

        forecasts = torch.cat(forecasts, dim=1)
        print(forecasts.shape)
        print(f'forecast : {forecasts.sum(dim=-1)}')
        forecasts = self.scaler.inverse_transform(forecasts)
        # print(f'post forecast shape:{forecasts.shape}')
        return forecasts  # (B, T, D)

    # def forecast(self, batch_data, num_samples=None):
    #     """
    #     Generate forecasts for the given batch data.
    #
    #     Parameters:
    #     batch_data [dict]: Dictionary containing input data.
    #     num_samples [int, optional]: Number of samples per distribution during evaluation. Defaults to None.
    #
    #     Returns:
    #     Tensor: Forecasted outputs.
    #     """
    #     # Perform the forward pass to get the outputs
    #     outputs = self(batch_data.past_target_cdf[:, -self.context_length:, :])
    #     print(f'num samples:{num_samples}')
    #     if num_samples is not None:
    #         # If num_samples is specified, use it to sample from the distribution
    #         outputs = self.sample_from_distribution(outputs, num_samples)
    #     else:
    #         # If perform point estimation, the num_samples is equal to 1
    #         outputs = outputs.unsqueeze(1)
    #     return outputs  # [batch_size, num_samples, prediction_length, var_num]

In [1186]:
data_manager.context_length

72

In [1187]:
forecaster = BinConv(
    num_bins=200,
    kernel_size_across_bins_2d=3,
    kernel_size_across_bins_1d=3,
    num_filters_2d=72 // 2,
    num_filters_1d=72 // 2,
    kernel_size_ffn= 200//4 + 1,
    num_1d_layers=2,
    num_blocks=3,
    use_lags=False,
    use_feat_idx_emb=False,
    use_time_feat=False,
    target_dim=data_manager.target_dim,
    context_length=data_manager.context_length,
    prediction_length=data_manager.prediction_length,
    freq=data_manager.freq,
    lags_list=data_manager.lags_list,
    time_feat_dim=data_manager.time_feat_dim,
    dataset=data_manager.dataset,
)
model = ProbTSForecastModule(
    forecaster=forecaster,
    scaler=data_manager.scaler,
    learning_rate=0.001,
    quantiles_num=20,
    num_samples=None
)

conv 1d:
ModuleList(
  (0-2): 3 x ModuleList(
    (0): Conv1d(36, 36, kernel_size=(3,), stride=(1,), groups=36)
    (1): Conv1d(36, 72, kernel_size=(3,), stride=(1,), groups=36)
  )
)
sampling_weight_scheme: none


/Users/andreichernov/miniforge3/envs/probts/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'forecaster' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['forecaster'])`.


In [1188]:
trainer = Trainer(
    accelerator="cpu",
    devices=1,
    strategy="auto",
    max_epochs=50,
    use_distributed_sampler=False,
    limit_train_batches=100,
    log_every_n_steps=1,
    accumulate_grad_batches=8,
    default_root_dir='./results',
    logger=CSVLogger('./logs'),
)

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Users/andreichernov/miniforge3/envs/probts/lib/python3.10/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


In [1189]:
target = []
for data in list(data_manager.dataset_raw.train):
    target.append(data['target'])

In [1190]:
# import numpy as np
# model.scaler.fit(torch.tensor(np.concatenate(target)).reshape(-1))

In [1191]:
# model.scaler.standard.mean

In [None]:
trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)


  | Name       | Type    | Params | Mode 
-----------------------------------------------
0 | forecaster | BinConv | 29.3 K | train
-----------------------------------------------
29.3 K    Trainable params
0         Non-trainable params
29.3 K    Total params
0.117     Total estimated model params size (MB)
30        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/andreichernov/miniforge3/envs/probts/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.
/Users/andreichernov/miniforge3/envs/probts/lib/python3.10/site-packages/lightning/pytorch/utilities/data.py:106: Total length of `DataLoader` across ranks is zero. Please make sure this was your intention.


Training: |          | 0/? [00:00<?, ?it/s]

the scaler was fitted
inputs transformed
torch.Size([1, 96, 200])
tensor([[ 74.,  32.,   9.,  21.,  11.,   1.,   8.,  17.,  28.,  59.,  91., 129.,
          97.,  63.,  37.,   7.,  14.,   9.,  12.,  21.,  65.,  76., 121., 157.,
         105., 100.,  52.,  18.,  30.,  13.,  19.,  37.,  56.,  66., 135., 151.,
         110., 103.,  32.,  20.,  38.,  25.,  27.,  69.,  59.,  92., 154., 152.,
         151., 115.,  56.,  40.,  47.,  28.,  46.,  52.,  87., 112., 180., 175.,
         139., 101.,  47.,  35.,  47.,  53.,  51.,  68.,  78., 104., 143., 200.,
         147., 105.,  58.,  32.,  52.,  37.,  50.,  53.,  85.,  96., 126., 172.,
         116., 112.,  17.,  20.,  59.,  32.,  24.,  48.,  62.,  54., 116., 136.]])
outputs transformed:
torch.Size([24, 200])
seq
tensor([138.4145, 142.3618, 141.8635, 140.6204, 136.6376, 129.7843, 127.5699,
        128.5370, 131.3834, 137.6572, 140.7756, 139.2022, 141.6254, 142.6649,
        142.7240, 143.2991, 135.7859, 130.7348, 126.3705, 125.3555, 133.5496,
   

In [None]:
import pandas as pd

df = pd.read_csv('logs/lightning_logs/version_210/metrics.csv')
df.groupby('epoch').agg({'train_loss': 'mean'})


In [None]:
### TODO: what does individual mean

In [None]:
i = 0
for test_batch in test_dataloader:
    if i > 0:
        break
    print(i)
    i += 1

In [None]:
test_batch['past_target_cdf'].shape


In [None]:
batch_data = ProbTSBatchData(test_batch, model.device)
past_target_cdf = model.scaler.transform(batch_data.past_target_cdf)
future_target_cdf = model.scaler.transform(batch_data.future_target_cdf)
batch_data.past_target_cdf = past_target_cdf

batch_idx = 0
with torch.no_grad():
    prediction = model.forecaster.forecast(batch_data)



In [None]:
prediction  #.shape

In [None]:
batch_data

In [None]:
batch_data.past_target_cdf[0][0]

In [None]:
model.scaler.inverse_transform(prediction)

In [None]:
model.scaler.inverse_transform(prediction)

In [None]:
model.scaler.inverse_transform(future_target_cdf)

In [None]:
batch_data.future_target_cdf.reshape(-1)

In [None]:
plt.plot(model.scaler.inverse_transform(prediction).reshape(-1), c='r')
plt.plot(batch_data.future_target_cdf.reshape(-1), c='b')
plt.plot(model.scaler.inverse_transform(future_target_cdf).reshape(-1))

In [None]:
batch_data.past_target_cdf.shape

In [None]:
# context_length = data_manager.context_length
# prediction_length = data_manager.prediction_length
# past_range = range(0, context_length)
# future_range = range(context_length, context_length + prediction_length)
# full_range = range(0, context_length + prediction_length)
#
# for i in range(min(10, forecaster.target_dim)):
#     target = torch.cat([past_target_cdf[batch_idx, -context_length:, i], future_target_cdf[batch_idx, :, i]])
#     plt.figure(figsize=(10, 2))
#     plt.plot(full_range, target)
#     plt.plot(future_range, prediction[:, i])

In [None]:
trainer.test(model=model, datamodule=data_module);

In [None]:
future_target_cdf.shape

In [None]:
# trainer.test(model=model, dataloaders=train_dataloader);