# RevIN

In [10]:
"""
Reversible Instance Normalization from 
https://github.com/ts-kim/RevIN
"""

import torch
import torch.nn as nn


class MovingAvg(nn.Module):
    def __init__(self, kernel_size, stride):
        super().__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        # padding on the both ends of time series
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x


class SeriesDecomposition(nn.Module):
    def __init__(self, kernel_size):
        super().__init__()
        self.moving_avg = MovingAvg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        return res, moving_mean


class RevIN(nn.Module):
    def __init__(self, num_features: int, eps=1e-5, affine=True):
        """
        :param num_features: the number of features or channels
        :param eps: a value added for numerical stability
        :param affine: if True, RevIN has learnable affine parameters
        """
        super().__init__()
        self.num_features = num_features
        self.eps = eps
        self.affine = affine
        if self.affine:
            self._init_params()

    def forward(self, x, mode: str, update_stats=True):
        assert x.ndim == 3
        if mode == "norm":
            if update_stats:
                self._get_statistics(x)
            x = self._normalize(x)
        elif mode == "denorm":
            x = self._denormalize(x)
        else:
            raise NotImplementedError
        return x

    def _init_params(self):
        # initialize RevIN params: (C,)
        self.affine_weight = nn.Parameter(torch.ones(self.num_features))
        self.affine_bias = nn.Parameter(torch.zeros(self.num_features))

    def _get_statistics(self, x):
        dim2reduce = tuple(range(1, x.ndim - 1))
        self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
        self.stdev = torch.sqrt(
            torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps
        ).detach()

    def _normalize(self, x):
        x = x - self.mean
        x = x / self.stdev
        if self.affine:
            x = x * self.affine_weight
            x = x + self.affine_bias
        return x

    def _denormalize(self, x):
        if self.affine:
            x = x - self.affine_bias
            x = x / (self.affine_weight + self.eps * self.eps)
        x = x * self.stdev
        x = x + self.mean
        return x

# Time2Vec

In [11]:
import torch
from torch import nn


class Time2Vec(nn.Module):
    def __init__(self, input_dim=6, embed_dim=512, act_function=torch.sin):
        assert embed_dim % input_dim == 0
        super(Time2Vec, self).__init__()
        self.enabled = embed_dim > 0
        if self.enabled:
            self.embed_dim = embed_dim // input_dim
            self.input_dim = input_dim
            self.embed_weight = nn.Parameter(torch.randn(self.input_dim, self.embed_dim))
            self.embed_bias = nn.Parameter(torch.randn(self.input_dim, self.embed_dim))
            self.act_function = act_function

    def forward(self, x):
        if self.enabled:
            x = torch.diag_embed(x)
            # x.shape = (bs, sequence_length, input_dim, input_dim)
            x_affine = torch.matmul(x, self.embed_weight) + self.embed_bias
            # x_affine.shape = (bs, sequence_length, input_dim, time_embed_dim)
            x_affine_0, x_affine_remain = torch.split(
                x_affine, [1, self.embed_dim - 1], dim=-1
            )
            x_affine_remain = self.act_function(x_affine_remain)
            x_output = torch.cat([x_affine_0, x_affine_remain], dim=-1)
            x_output = x_output.view(x_output.size(0), x_output.size(1), -1)
            # x_output.shape = (bs, sequence_length, input_dim * time_embed_dim)
        else:
            x_output = x
        return x_output

# Eval Stats

In [12]:
import numpy as np

EPSILON = 1e-7


def r_squared(actual: np.ndarray, predicted: np.ndarray):
    rss = (_error(actual, predicted) ** 2).sum(1)
    tss = (_error(actual, actual.mean(1, keepdims=True)) ** 2).sum(1)
    r2 = 1.0 - rss / (tss + EPSILON)
    return r2.mean()


def _error(actual: np.ndarray, predicted: np.ndarray):
    """Simple error"""
    return actual - predicted


def _percentage_error(actual: np.ndarray, predicted: np.ndarray):
    """
    Percentage error

    Note: result is NOT multiplied by 100
    """
    return _error(actual, predicted) / (actual + EPSILON)


def mse(actual: np.ndarray, predicted: np.ndarray):
    """Mean Squared Error"""
    return np.mean(np.square(_error(actual, predicted)))


def mae(actual: np.ndarray, predicted: np.ndarray):
    """Mean Absolute Error"""
    return np.mean(np.abs(_error(actual, predicted)))


def mape(actual: np.ndarray, predicted: np.ndarray):
    """Mean Absolute Percentage Error"""
    return np.mean(np.abs(_percentage_error(actual, predicted)))


def smape(actual: np.ndarray, predicted: np.ndarray):
    """Symmetric Mean Absolute Percentage Error"""
    return np.mean(
        2.0
        * np.abs(actual - predicted)
        / ((np.abs(actual) + np.abs(predicted)) + EPSILON)
    )

# LinearAR

In [6]:
import math
import torch
from torch import nn
from torch.optim import Adam
from einops import rearrange
import pandas as pd
import numpy as np

In [2]:
class LinearModel(nn.Module):
    def __init__(self, context_points: int, shared_weights: bool = False, d_yt: int = 7):
        super().__init__()

        if not shared_weights:
            assert d_yt is not None
            layer_count = d_yt
        else:
            layer_count = 1

        self.weights = nn.Parameter(torch.ones((context_points, layer_count)), requires_grad=True)
        self.bias = nn.Parameter(torch.ones((layer_count)), requires_grad=True)

        d = math.sqrt(1.0 / context_points)
        self.weights.data.uniform_(-d, d)
        self.bias.data.uniform_(-d, d)

        self.window = context_points
        self.shared_weights = shared_weights
        self.d_yt = d_yt

    def forward(self, y_c: torch.Tensor, pred_len: int, d_yt: int = None):
        batch, length, d_yc = y_c.shape
        d_yt = d_yt or self.d_yt

        output = torch.zeros(batch, pred_len, d_yt, device=y_c.device)

        for i in range(pred_len):
            inp = torch.cat((y_c[:, i:, :d_yt], output[:, :i]), dim=1)
            output[:, i, :] = self._inner_forward(inp)
        return output

    def _inner_forward(self, inp):
        batch = inp.shape[0]
        if self.shared_weights:
            inp = rearrange(inp, "batch length dy -> (batch dy) length 1")
        baseline = (self.weights * inp[:, -self.window :, :]).sum(1) + self.bias
        if self.shared_weights:
            baseline = rearrange(baseline, "(batch dy) 1 -> batch dy", batch=batch)
        return baseline

In [3]:
def train(model, optimizer, criterion, y_c, target, epochs=1000):
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        pred_len = target.shape[1]
        output = model(y_c, pred_len)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

In [8]:
# ETTm1
import numpy as np
import pandas as pd

def create_sliding_windows(data: np.ndarray, context_points: int, pred_len: int):
    """
    data: numpy array of shape (T, d_yt)
    returns: y_context (batch, context_points, d_yt) and
             y_target (batch, pred_len, d_yt)
    """
    X, Y = [], []
    total_length = data.shape[0]
    # Create sliding windows
    for i in range(total_length - context_points - pred_len + 1):
        X.append(data[i : i + context_points])
        Y.append(data[i + context_points : i + context_points + pred_len])
    return np.array(X), np.array(Y)

In [9]:
csv_path = "S:\\spatiotemporal-analysis\\ETTm1_modified.csv"
df = pd.read_csv(csv_path, parse_dates=["date"])

# Sort by date (if not already sorted)
df.sort_values("date", inplace=True)

# Use the numeric columns as features (d_yt should equal the number of features: 7)
feature_columns = ["HUFL", "HULL", "MUFL", "MULL", "LUFL", "LULL", "OT"]
data = df[feature_columns].values  # shape (T, 7)

# Define context and prediction lengths
context_points = 10
pred_len = 5

# Create sliding windows from the data
y_context_np, y_target_np = create_sliding_windows(data, context_points, pred_len)

# Convert to torch tensors
y_context = torch.tensor(y_context_np, dtype=torch.float32)
y_target = torch.tensor(y_target_np, dtype=torch.float32)

print("y_context shape:", y_context.shape)  # (batch, context_points, 7)
print("y_target shape:", y_target.shape)    # (batch, pred_len, 7)

# Create the model, optimizer, and loss function
d_yt = len(feature_columns)
model = LinearModel(context_points=context_points, shared_weights=False, d_yt=d_yt)
optimizer = Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

# Train the model using all training examples as one batch.
train(model, optimizer, criterion, y_context, y_target, epochs=1000)

y_context shape: torch.Size([69666, 10, 7])
y_target shape: torch.Size([69666, 5, 7])
Epoch 0, Loss: 70.6213
Epoch 100, Loss: 7.1133
Epoch 200, Loss: 4.4918
Epoch 300, Loss: 4.1479
Epoch 400, Loss: 3.7757
Epoch 500, Loss: 3.4020


KeyboardInterrupt: 

# Forecaster - The parent class model

In [14]:
from abc import ABC, abstractmethod
from typing import Tuple

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

class Forecaster(nn.Module, ABC):
    def __init__(
        self,
        d_x: int,
        d_yc: int,
        d_yt: int,
        learning_rate: float = 1e-3,
        l2_coeff: float = 0,
        loss: str = "mse",
        linear_window: int = 0,
        linear_shared_weights: bool = False,
        use_revin: bool = False,
        use_seasonal_decomp: bool = False,
        verbose: int = True,
    ):
        super().__init__()

        qprint = lambda msg: print(msg) if verbose else None
        qprint("Forecaster")
        qprint(f"\tL2: {l2_coeff}")
        qprint(f"\tLinear Window: {linear_window}")
        qprint(f"\tLinear Shared Weights: {linear_shared_weights}")
        qprint(f"\tRevIN: {use_revin}")
        qprint(f"\tDecomposition: {use_seasonal_decomp}")

        self._inv_scaler = lambda x: x
        self.l2_coeff = l2_coeff
        self.learning_rate = learning_rate
        self.time_masked_idx = None
        self.null_value = None
        self.loss = loss

        if linear_window:
            self.linear_model = LinearModel(
                linear_window, shared_weights=linear_shared_weights, d_yt=d_yt
            )
        else:
            self.linear_model = lambda x, *args, **kwargs: 0.0

        self.use_revin = use_revin
        if use_revin:
            assert d_yc == d_yt, "TODO: figure out exo case for revin"
            self.revin = RevIN(num_features=d_yc)
        else:
            self.revin = lambda x, **kwargs: x

        self.use_seasonal_decomp = use_seasonal_decomp
        if use_seasonal_decomp:
            self.seasonal_decomp = SeriesDecomposition(kernel_size=25)
        else:
            self.seasonal_decomp = lambda x: (x, x.clone())

        self.d_x = d_x
        self.d_yc = d_yc
        self.d_yt = d_yt

    def set_null_value(self, val: float) -> None:
        self.null_value = val

    def set_inv_scaler(self, scaler) -> None:
        self._inv_scaler = scaler

    def set_scaler(self, scaler) -> None:
        self._scaler = scaler

    @property
    @abstractmethod
    def train_step_forward_kwargs(self):
        return {}

    @property
    @abstractmethod
    def eval_step_forward_kwargs(self):
        return {}

    def loss_fn(self, true: torch.Tensor, preds: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
        true = torch.nan_to_num(true)
        if self.loss == "mse":
            loss = (mask * (true - preds)).square().sum() / max(mask.sum(), 1)
        elif self.loss == "mae":
            loss = torch.abs(mask * (true - preds)).sum() / max(mask.sum(), 1)
        elif self.loss == "smape":
            num = 2.0 * torch.abs(preds - true)
            den = torch.abs(preds.detach()) + torch.abs(true) + 1e-5
            loss = 100.0 * (mask * (num / den)).sum() / max(mask.sum(), 1)
        else:
            raise ValueError(f"Unrecognized Loss Function: {self.loss}")
        return loss

    def forecasting_loss(self, outputs: torch.Tensor, y_t: torch.Tensor, time_mask: int) -> Tuple[torch.Tensor]:
        if self.null_value is not None:
            null_mask_mat = y_t != self.null_value
        else:
            null_mask_mat = torch.ones_like(y_t)
        null_mask_mat *= ~torch.isnan(y_t)

        time_mask_mat = torch.ones_like(y_t)
        if time_mask is not None:
            time_mask_mat[:, time_mask:] = False

        full_mask = time_mask_mat * null_mask_mat
        forecasting_loss = self.loss_fn(y_t, outputs, full_mask)
        return forecasting_loss, full_mask

    def compute_loss(self, batch: Tuple[torch.Tensor], time_mask: int = None, forward_kwargs: dict = {}) -> Tuple[torch.Tensor]:
        x_c, y_c, x_t, y_t = batch
        outputs, *_ = self.forward(x_c, y_c, x_t, y_t, **forward_kwargs)
        loss, mask = self.forecasting_loss(outputs=outputs, y_t=y_t, time_mask=time_mask)
        return loss, outputs, mask

    def predict(self, x_c: torch.Tensor, y_c: torch.Tensor, x_t: torch.Tensor, sample_preds: bool = False) -> torch.Tensor:
        og_device = y_c.device
        # Ensure tensors are on the same device as the model.
        x_c = x_c.to(next(self.parameters()).device).float()
        x_t = x_t.to(next(self.parameters()).device).float()
        y_c = torch.from_numpy(self._scaler(y_c.cpu().numpy())).to(next(self.parameters()).device).float()
        y_t = torch.zeros((x_t.shape[0], x_t.shape[1], self.d_yt), device=next(self.parameters()).device).float()

        with torch.no_grad():
            normalized_preds, *_ = self.forward(x_c, y_c, x_t, y_t, **self.eval_step_forward_kwargs)
        preds = torch.from_numpy(self._inv_scaler(normalized_preds.cpu().numpy())).to(og_device).float()
        return preds

    @abstractmethod
    def forward_model_pass(
        self,
        x_c: torch.Tensor,
        y_c: torch.Tensor,
        x_t: torch.Tensor,
        y_t: torch.Tensor,
        **forward_kwargs,
    ) -> Tuple[torch.Tensor]:
        return NotImplemented

    def nan_to_num(self, *inps):
        return (torch.nan_to_num(i) for i in inps)

    def forward(self, x_c: torch.Tensor, y_c: torch.Tensor, x_t: torch.Tensor, y_t: torch.Tensor, **forward_kwargs) -> Tuple[torch.Tensor]:
        x_c, y_c, x_t, y_t = self.nan_to_num(x_c, y_c, x_t, y_t)
        _, pred_len, d_yt = y_t.shape

        y_c = self.revin(y_c, mode="norm")
        seasonal_yc, trend_yc = self.seasonal_decomp(y_c)
        preds, *extra = self.forward_model_pass(x_c, seasonal_yc, x_t, y_t, **forward_kwargs)
        baseline = self.linear_model(trend_yc, pred_len=pred_len, d_yt=d_yt)
        output = self.revin(preds + baseline, mode="denorm")

        if extra:
            return (output,) + tuple(extra)
        return (output,)

    def _compute_stats(self, pred: torch.Tensor, true: torch.Tensor, mask: torch.Tensor):
        pred = pred * mask
        true = torch.nan_to_num(true) * mask

        adj = mask.mean().cpu().numpy() + 1e-5
        pred = pred.detach().cpu().numpy()
        true = true.detach().cpu().numpy()
        scaled_pred = self._inv_scaler(pred)
        scaled_true = self._inv_scaler(true)
        stats = {
            "mape": mape(scaled_true, scaled_pred) / adj,
            "mae": mae(scaled_true, scaled_pred) / adj,
            "mse": mse(scaled_true, scaled_pred) / adj,
            "smape": smape(scaled_true, scaled_pred) / adj,
            "norm_mae": mae(true, pred) / adj,
            "norm_mse": mse(true, pred) / adj,
        }
        return stats

    def step(self, batch: Tuple[torch.Tensor], train: bool = False):
        kwargs = self.train_step_forward_kwargs if train else self.eval_step_forward_kwargs
        time_mask = self.time_masked_idx if train else None
        loss, output, mask = self.compute_loss(batch=batch, time_mask=time_mask, forward_kwargs=kwargs)
        *_, y_t = batch
        stats = self._compute_stats(output, y_t, mask)
        stats["loss"] = loss
        return stats
    
    def training_step(self, batch, batch_idx):
        return self.step(batch, train=True)

    def validation_step(self, batch, batch_idx):
        stats = self.step(batch, train=False)
        self.current_val_stats = stats
        return stats

    def test_step(self, batch, batch_idx):
        return self.step(batch, train=False)

    def _log_stats(self, section, outs):
        for key in outs.keys():
            stat = outs[key]
            if isinstance(stat, np.ndarray) or isinstance(stat, torch.Tensor):
                stat = stat.mean()
            self.log(f"{section}/{key}", stat, sync_dist=True)

    def training_step_end(self, outs):
        self._log_stats("train", outs)
        return {"loss": outs["loss"].mean()}

    def validation_step_end(self, outs):
        self._log_stats("val", outs)
        return outs

    def test_step_end(self, outs):
        self._log_stats("test", outs)
        return {"loss": outs["loss"].mean()}

    def predict_step(self, batch, batch_idx):
        return self(*batch, **self.eval_step_forward_kwargs)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(), lr=self.learning_rate, weight_decay=self.l2_coeff
        )
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            patience=3,
            factor=0.2,
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "monitor": "val/loss",
            },
        }

# LinearAR Forecaster

In [15]:
import torch
from torch import nn
import torch.nn.functional as F

class Linear_Forecaster(Forecaster):
    def __init__(
        self,
        d_x: int,
        d_yc: int,
        d_yt: int,
        context_points: int,
        learning_rate: float = 1e-3,
        l2_coeff: float = 0,
        loss: str = "mse",
        linear_window: int = 0,
        linear_shared_weights: bool = False,
        use_revin: bool = False,
        use_seasonal_decomp: bool = False,
    ):
        super().__init__(
            d_x=d_x,
            d_yc=d_yc,
            d_yt=d_yt,
            l2_coeff=l2_coeff,
            learning_rate=learning_rate,
            loss=loss,
            linear_window=linear_window,
            linear_shared_weights=linear_shared_weights,
            use_revin=use_revin,
            use_seasonal_decomp=use_seasonal_decomp,
        )

        self.model = LinearModel(
            context_points, shared_weights=linear_shared_weights, d_yt=d_yt
        )

    @property
    def eval_step_forward_kwargs(self):
        return {}

    @property
    def train_step_forward_kwargs(self):
        return {}

    def forward_model_pass(self, x_c, y_c, x_t, y_t):
        _, pred_len, d_yt = y_t.shape
        output = self.model(y_c, pred_len=pred_len, d_yt=d_yt)
        return (output,)

    @classmethod
    def add_cli(self, parser):
        super().add_cli(parser)


# Running the Linear Model

In [None]:
# ETTm1
import numpy as np
import pandas as pd

def create_sliding_windows(data: np.ndarray, context_points: int, pred_len: int):
    """
    data: numpy array of shape (T, d_yt)
    returns: y_context (batch, context_points, d_yt) and
             y_target (batch, pred_len, d_yt)
    """
    X, Y = [], []
    total_length = data.shape[0]
    # Create sliding windows
    for i in range(total_length - context_points - pred_len + 1):
        X.append(data[i : i + context_points])
        Y.append(data[i + context_points : i + context_points + pred_len])
    return np.array(X), np.array(Y)

In [None]:
csv_path = "S:\\spatiotemporal-analysis\\ETTm1_modified.csv"
df = pd.read_csv(csv_path, parse_dates=["date"])

# Sort by date (if not already sorted)
df.sort_values("date", inplace=True)

# Use the numeric columns as features (d_yt should equal the number of features: 7)
feature_columns = ["HUFL", "HULL", "MUFL", "MULL", "LUFL", "LULL", "OT"]
data = df[feature_columns].values  # shape (T, 7)

# Define context and prediction lengths
context_points = 10
pred_len = 5

# Create sliding windows from the data
y_context_np, y_target_np = create_sliding_windows(data, context_points, pred_len)

# Convert to torch tensors
y_context = torch.tensor(y_context_np, dtype=torch.float32)
y_target = torch.tensor(y_target_np, dtype=torch.float32)

print("y_context shape:", y_context.shape)  # (batch, context_points, 7)
print("y_target shape:", y_target.shape)    # (batch, pred_len, 7)

# Create the model, optimizer, and loss function
d_yt = len(feature_columns)
model = LinearModel(context_points=context_points, shared_weights=False, d_yt=d_yt)
optimizer = Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

# Train the model using all training examples as one batch.
train(model, optimizer, criterion, y_context, y_target, epochs=1000)

In [19]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import Adam

# Read CSV and prepare data
csv_path = "S:\\spatiotemporal-analysis\\ETTm1_modified.csv"
df = pd.read_csv(csv_path, parse_dates=["date"])

# Sort by date (if not already sorted)
df.sort_values("date", inplace=True)

# Use the numeric columns as features (d_yt should equal the number of features: 7)
feature_columns = ["HUFL", "HULL", "MUFL", "MULL", "LUFL", "LULL", "OT"]
data = df[feature_columns].values  # shape (T, 7)

# Define context and prediction lengths
context_points = 10
pred_len = 5

# Create sliding windows from the data
y_context_np, y_target_np = create_sliding_windows(data, context_points, pred_len)

# Convert to torch tensors
y_context = torch.tensor(y_context_np, dtype=torch.float32)  # shape: (batch, context_points, 7)
y_target = torch.tensor(y_target_np, dtype=torch.float32)    # shape: (batch, pred_len, 7)

print("y_context shape:", y_context.shape)
print("y_target shape:", y_target.shape)

# Create dummy inputs for x_c and x_t (assuming no exogenous features)
# If you have exogenous features, replace these with proper tensors.
batch = y_context.shape[0]
x_context = torch.empty(batch, context_points, 0)  # no features
x_target = torch.empty(batch, pred_len, 0)           # no features

# d_yt is the number of features (7 in this example)
d_yt = len(feature_columns)

# Instantiate the forecaster.
# Notice that for RevIN to work (if enabled), d_yc must equal d_yt.
# Here we disable RevIN by setting use_revin=False.
model = Linear_Forecaster(
    d_x=0,            # No exogenous input in this example
    d_yc=d_yt,
    d_yt=d_yt,
    context_points=context_points,
    use_revin=True,  # set True only if your use case requires and d_yc==d_yt holds
    use_seasonal_decomp=False,
    linear_window=0
)

optimizer = Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

# A simple training loop
def train(model, optimizer, criterion, x_c, y_c, x_t, y_t, epochs=1000):
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        # Forward pass returns a tuple, so use the first element
        preds = model(x_c, y_c, x_t, y_t)
        if isinstance(preds, tuple):
            preds = preds[0]
        loss = criterion(preds, y_t)
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 100 == 0:
            print(f"Epoch {epoch+1}, Loss: {loss.item():.6f}")

train(model, optimizer, criterion, x_context, y_context, x_target, y_target, epochs=1000)

y_context shape: torch.Size([69666, 10, 7])
y_target shape: torch.Size([69666, 5, 7])
Forecaster
	L2: 0
	Linear Window: 0
	Linear Shared Weights: False
	RevIN: True
	Decomposition: False
Epoch 100, Loss: 4.463812


KeyboardInterrupt: 