<a href="https://colab.research.google.com/github/Showmick119/Samay/blob/main/chronos_trial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Install Dependencies**

In [None]:
!pip install samay-0.1.0-cp311-cp311-linux_x86_64.whl

In [None]:
!pip install --upgrade -U numpy --force

# **Importing Requirements**

In [None]:
import os
import sys

import numpy as np
import torch

src_path = os.path.abspath(os.path.join("src"))
if src_path not in sys.path:
    sys.path.insert(0, src_path)

print(sys.path)

In [None]:
from samay.dataset import ChronosDataset
from samay.model import ChronosModel
from samay.visualization import ForecastVisualization

# **Loading Dataset**

In [None]:
!wget https://raw.githubusercontent.com/kage08/Samay/refs/heads/main/data/data/ETTh1.csv

In [None]:
train_dataset = ChronosDataset(
    name="ett",
    mode="train",
    path="/nethome/sli999/TSFMProject/src/tsfmproject/models/moment/data/ETTh1.csv",
    datetime_col="date",
    freq="h",
    context_len=128,
    horizon_len=64,
)
test_dataset = ChronosDataset(
    name="ett",
    mode="test",
    path="/nethome/sli999/TSFMProject/src/tsfmproject/models/moment/data/ETTh1.csv",
    datetime_col="date",
    freq="h",
    context_len=128,
    horizon_len=64,
)
# train_dataset = ChronosDataset(name="ett", mode="train", path='/nethome/abhalerao9/TIMESERIESMODELING/TSFMProject/data/dataset/timesfm_covid_pivot.csv', datetime_col='ds', freq='D', context_len=64, horizon_len=16)
# test_dataset = ChronosDataset(name="ett", mode="test", path='/nethome/abhalerao9/TIMESERIESMODELING/TSFMProject/data/dataset/timesfm_covid_pivot.csv', datetime_col='ds', freq='D', context_len=64, horizon_len=16)
print(len(test_dataset.dataset))
# print(test_dataset.dataset.shape)

# **Loading the Chronos Model**

In [None]:
repo = "amazon/chronos-t5-small"
ch = ChronosModel(config=None, repo=repo)
ch.load_model()

In [None]:
print(ch.model.model.device)

In [None]:
eval_results, trues, preds, histories = ch.evaluate(
    test_dataset, batch_size=8, metrics=["MSE", "MASE"]
)
print(eval_results)
# visualization = ForecastVisualization(trues, preds[:,:,1,:], histories)
# visualization.plot()

In [None]:
visualization = ForecastVisualization(trues, preds, histories)
visualization.plot(channel_idx=0, time_idx=0)

In [None]:
print(trues.shape)
print(preds.shape)
print(histories.shape)

In [None]:
ch.finetune(train_dataset)

In [None]:
latest_run_dir = ch.get_latest_run_dir()
model_dir = os.path.join(latest_run_dir, "checkpoint-final")
model_type = "seq2seq"
model = ch.load_model(model_dir, model_type)

In [None]:
eval_results, trues, preds, histories = ch.evaluate(
    test_dataset, batch_size=8, metrics=["MSE", "MASE"]
)
print(eval_results)

In [None]:
visualization = ForecastVisualization(trues, preds, histories)
visualization.plot(channel_idx=0, time_index=0)

In [None]:
data = test_dataset.dataset
data = np.array(data).transpose()

print(data.shape)

In [None]:
input = [torch.tensor(ts[:1000]) for i, ts in enumerate(data)]
print(input[0].shape)
predictions = ch.model.predict(context=input, prediction_length=64, num_samples=10)

In [None]:
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split

In [None]:
dataset = test_dataset.dataset
dataset = PandasDataset(dict(dataset))
train, test_template = split(dataset, offset=-128 + 20 * 64)
print(test_template)
print(len(dataset))
test_data = test_template.generate_instances(
    prediction_length=64, windows=20, distance=64
)

In [None]:
print(test_data)

In [None]:
input_it = iter(test_data.input)
label_it = iter(test_data.label)
# inp = next(input_it)
# label = next(label_it)
print(inp)
print(label["target"].shape)

In [None]:
for inp, label in zip(input_it, label_it):
    print(inp["item_id"], label["item_id"], label["target"].shape)

In [None]:
print(test_dataset.dataset)

In [None]:
import torch
from torch.utils.data import DataLoader, Dataset


class ChronosWindowDataset(Dataset):
    """
    A PyTorch Dataset for sliding window extraction from time series data.
    """

    def __init__(self, data, context_len, horizon_len, stride=-1):
        """
        Initialize the dataset with sliding window logic.

        Args:
            data (pd.DataFrame): The input time series data.
            context_len (int): Length of the context window.
            horizon_len (int): Length of the forecast horizon.
            stride (int): Step size for sliding the window.
        """
        self.data = data
        self.context_len = context_len
        self.horizon_len = horizon_len
        self.total_len = context_len + horizon_len
        self.stride = stride

        if self.stride == -1:
            self.stride = self.horizon_len

        # Generate start indices for sliding windows
        self.indices = [
            start for start in range(0, len(data) - self.total_len + 1, self.stride)
        ]

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        start = self.indices[idx]
        window = self.data.iloc[start : start + self.total_len]

        # Extract context and actuals, and convert to Torch tensors
        context = torch.tensor(
            window.iloc[: self.context_len].to_numpy().transpose(), dtype=torch.float32
        )
        actual = torch.tensor(
            window.iloc[self.context_len :].to_numpy().transpose(), dtype=torch.float32
        )

        # # Return the input as a list of tensors (one for each column)
        # input_list = [context[i] for i in range(context.shape[0])]

        return context, actual

In [None]:
test_data = test_dataset.dataset
print(test_data)

In [None]:
import torch
from torch.utils.data import Dataset

#  window_dataset = ChronosWindowDataset(data=test_data, context_len=128, horizon_len=64)
data_loader = DataLoader(test_data, batch_size=8, shuffle=False)

In [None]:
with torch.no_grad():
    for i, (context, actual) in enumerate(data_loader):
        print(context.shape)
        print(actual.shape)

In [None]:
input, actual = next(iter(data_loader))
input = input.squeeze()
actual = actual.squeeze()
print(input.shape)
print(actual.shape)

In [None]:
input_stack = input.reshape(-1, 128)
print(input_stack.shape)

In [None]:
actual = actual.reshape(-1, 64)
print(actual.shape)

In [None]:
predictions = ch.model.predict(
    context=input_stack, prediction_length=64, num_samples=20
)

In [None]:
print(predictions.shape)

In [None]:
print(predictions.shape)
pred_median = np.median(predictions, axis=1)
print(pred_median.shape)
pred_quantiles = np.quantile(
    predictions, [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], axis=1
)
print(pred_quantiles.shape)

In [None]:
actual = actual.reshape(-1, 64)
print(actual.shape)

In [None]:
mse1 = np.mean((actual.numpy() - pred_quantiles[4]) ** 2)
print(mse1)
mse1 = np.mean((actual.numpy() - pred_median) ** 2)
print(mse1)

In [None]:
print(ch.model.model.device)

In [None]:
actual = actual.reshape(8, 7, 64)
pred_median = pred_median.reshape(8, 7, 64)
print(actual.shape)

In [None]:
mse1 = np.mean((actual - pred_median) ** 2)
print(mse1)

In [None]:
data = test_dataset.dataset
print(len(data.iloc[:10]))