In [None]:
import math
import os
import tempfile

import numpy as np
import pandas as pd
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed
from transformers.integrations import INTEGRATION_TO_CALLBACK

from tsfm_public import TimeSeriesPreprocessor, TrackingCallback, count_parameters, get_datasets
from tsfm_public.toolkit.get_model import get_model
from tsfm_public.toolkit.lr_finder import optimal_lr_finder
from tsfm_public.toolkit.visualization import plot_predictions
from tsfm_public.models.tinytimemixer import TinyTimeMixerForPrediction

In [46]:
# Setup Pandas
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Dataset
timestamp_column = "date"
id_columns = []  # mention the ids that uniquely identify a time-series.

# Understanding the split config -- slides
split_config = {
    "train": [0, 24],
    "valid": [24, 32],
    "test": [
        32,
        40,
    ],
}


def generate_time_series_data(start_time: str = "2021-01-01 00:00:00", 
                              freq: str = "H", 
                              periods: int = 40, 
                              n_variables: int = 10, 
                              univariate: bool = False) -> pd.DataFrame:
    """
    Generate a univariate or multivariate time series DataFrame.
    
    Args:
        start_time (str): Start datetime.
        freq (str): Pandas frequency string (e.g., 'H' for hourly).
        periods (int): Number of time steps.
        n_variables (int): Number of variables (ignored if univariate=True).
        univariate (bool): Whether to generate univariate (single-column) data.

    Returns:
        pd.DataFrame: Time series data with datetime index.
    """
    index = pd.date_range(start=start_time, periods=periods, freq=freq)

    if univariate:
        data = np.random.randn(periods)
        df = pd.DataFrame(data, index=index, columns=["A"])
    else:
        columns = list("ABCDEFGHIJ")[:n_variables]
        data = np.random.randn(periods, n_variables)
        df = pd.DataFrame(data, index=index, columns=columns)
    
    return df

df_multivariate = generate_time_series_data()
df_multivariate.index.rename("date", inplace=True)
df_multivariate.reset_index(inplace=True)
#print(df_multivariate.head())
df_univariate = generate_time_series_data(periods=800, univariate=True)
df_univariate.index.rename("date", inplace=True)
df_univariate.reset_index(inplace=True)


context_length = 512
forecast_length = 32

#df_univariate = df_univariate.iloc[-context_length+1:]
print(df_univariate.head())

                 date         A
0 2021-01-01 00:00:00  1.247846
1 2021-01-01 01:00:00  0.103213
2 2021-01-01 02:00:00 -0.950573
3 2021-01-01 03:00:00  0.761228
4 2021-01-01 04:00:00 -0.878311


  index = pd.date_range(start=start_time, periods=periods, freq=freq)
  index = pd.date_range(start=start_time, periods=periods, freq=freq)


In [None]:
zeroshot_model = TinyTimeMixerForPrediction.from_pretrained(
  "ibm-granite/granite-timeseries-ttm-r2", 
  revision="main", 
  prediction_filter_length=forecast_length)
from tsfm_public.toolkit.time_series_forecasting_pipeline import TimeSeriesForecastingPipeline

timestamp_column = "date"
target_columns = ["A"]

column_specifiers = {
    "timestamp_column": timestamp_column,
    "id_columns": id_columns,
    "target_columns": ["A"],
    "control_columns": [],
}



tsp = TimeSeriesPreprocessor(
        **column_specifiers,
        context_length=context_length,
        prediction_length=forecast_length,
        scaling=True,
        encode_categorical=False,
        scaler_type="standard",
    )

df_uni_standardized = tsp._standardize_dataframe(df_univariate)
trained_tsp = tsp.train(df_uni_standardized)

zs_forecast_pipeline = TimeSeriesForecastingPipeline(
    model=zeroshot_model,
    device="cpu",
    timestamp_column=timestamp_column,
    id_columns=[],
    target_columns=target_columns,
    freq="H",
    context_length=context_length,
    prediction_length=forecast_length
)

x = trained_tsp.preprocess(df_univariate)
print("Preprocessed shape:", x.shape)
print("Preprocessed index:", x.index)
#print(x.head())
x.index = df_univariate["date"][:len(x)]
assert not x[target_columns].isnull().any().any(), "NaNs in target column after preprocessing"

safe_cutoff = len(df_univariate) - forecast_length
forecast = zs_forecast_pipeline(x.iloc[:safe_cutoff])
print(x[target_columns])
print("???",forecast.tail())

Device set to use cpu


Preprocessed shape: (800, 2)
Preprocessed index: Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
       ...
       790, 791, 792, 793, 794, 795, 796, 797, 798, 799],
      dtype='int64', length=800)
                            A
date                         
2021-01-01 00:00:00  1.224824
2021-01-01 01:00:00  0.097927
2021-01-01 02:00:00 -0.939532
2021-01-01 03:00:00  0.745746
2021-01-01 04:00:00 -0.868389
2021-01-01 05:00:00  0.282265
2021-01-01 06:00:00 -0.234598
2021-01-01 07:00:00 -0.443523
2021-01-01 08:00:00  1.936119
2021-01-01 09:00:00 -0.418987
2021-01-01 10:00:00  0.960555
2021-01-01 11:00:00 -1.667566
2021-01-01 12:00:00  0.698810
2021-01-01 13:00:00 -0.271703
2021-01-01 14:00:00  0.682691
2021-01-01 15:00:00 -0.431116
2021-01-01 16:00:00 -0.183971
2021-01-01 17:00:00 -1.366587
2021-01-01 18:00:00  0.265022
2021-01-01 19:00:00 -0.705252
2021-01-01 20:00:00  1.632024
2021-01-01 21:00:00 -1.051010
2021-01-01 22:00:00  1.033820
2021-01-01 23:00:00 -0.646125
2021-01-02 0

In [4]:
# Multivariate Tiny Time Mixer
column_specifiers = {
    "timestamp_column": timestamp_column,
    "id_columns": id_columns,
    "target_columns": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"],
    "control_columns": [],
}