# Introduction
This notebook shows how to use TimesFM with finetuning. 

In order to perform finetuning, you need to create the Pytorch Dataset in a proper format. The example of the Dataset is provided below.
The finetuning code can be found in timesfm.finetuning_torch.py. This notebook just imports the methods from finetuning

### Dataset Creation

In [None]:
# Function to convert a date string into a pandas Timestamp
def convert_date(date_string):
    """
    Convert a date string into a pandas Timestamp.

    Parameters:
    - date_string: str, date in 'YYYYMM' format

    Returns:
    - pd.Timestamp object representing the date
    """
    year_month = date_string.strip()
    year = int(year_month[:4])
    month = int(year_month[4:])
    return pd.Timestamp(year=year, month=month, day=1)

In [30]:
from os import path
from typing import Optional, Tuple

import numpy as np
import pandas as pd
import torch
import torch.multiprocessing as mp
import yfinance as yf
from finetuning_torch import FinetuningConfig, TimesFMFinetuner
from huggingface_hub import snapshot_download
from torch.utils.data import Dataset

from timesfm import TimesFm, TimesFmCheckpoint, TimesFmHparams
from timesfm.pytorch_patched_decoder import PatchedTimeSeriesDecoder
import os


class TimeSeriesDataset(Dataset):
  """Dataset for time series data compatible with TimesFM."""

  def __init__(self,
               series: np.ndarray,
               context_length: int,
               horizon_length: int,
               freq_type: int = 0):
    """
        Initialize dataset.

        Args:
            series: Time series data
            context_length: Number of past timesteps to use as input
            horizon_length: Number of future timesteps to predict
            freq_type: Frequency type (0, 1, or 2)
        """
    if freq_type not in [0, 1, 2]:
      raise ValueError("freq_type must be 0, 1, or 2")

    self.series = series
    self.context_length = context_length
    self.horizon_length = horizon_length
    self.freq_type = freq_type
    self._prepare_samples()

  def _prepare_samples(self) -> None:
    """Prepare sliding window samples from the time series."""
    self.samples = []
    total_length = self.context_length + self.horizon_length

    for start_idx in range(0, len(self.series) - total_length + 1):
      end_idx = start_idx + self.context_length
      x_context = self.series[start_idx:end_idx]
      x_future = self.series[end_idx:end_idx + self.horizon_length]
      self.samples.append((x_context, x_future))

  def __len__(self) -> int:
    return len(self.samples)

  def __getitem__(
      self, index: int
  ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
    x_context, x_future = self.samples[index]

    x_context = torch.tensor(x_context, dtype=torch.float32)
    x_future = torch.tensor(x_future, dtype=torch.float32)

    input_padding = torch.zeros_like(x_context)
    freq = torch.tensor([self.freq_type], dtype=torch.long)

    return x_context, input_padding, freq, x_future

  def get_samples(self) -> list:
        """Return the prepared time series samples."""
        return self.samples

def prepare_datasets(series: np.ndarray,
                     context_length: int,
                     horizon_length: int,
                     freq_type: int = 0,) -> Tuple[Dataset, Dataset]:
  """
    Prepare training and validation datasets from time series data.

    Args:
        series: Input time series data
        context_length: Number of past timesteps to use
        horizon_length: Number of future timesteps to predict
        freq_type: Frequency type (0, 1, or 2)

    Returns:
        Tuple of (train_dataset, val_dataset)
    """
  train_data = series[:-12]
  val_data = series[-12:]
  
  # Create datasets with specified frequency type
  train_dataset = TimeSeriesDataset(train_data,
                                    context_length=context_length,
                                    horizon_length=horizon_length,
                                    freq_type=freq_type)

  val_dataset = TimeSeriesDataset(val_data,
                                  context_length=context_length,
                                  horizon_length=horizon_length,
                                  freq_type=freq_type)

  return train_dataset, val_dataset


### Model Creation

In [31]:
def get_model(load_weights: bool = False):
  device = "cuda" if torch.cuda.is_available() else "cpu"
  repo_id = "google/timesfm-2.0-500m-pytorch"
  hparams = TimesFmHparams(
      backend=device,
      per_core_batch_size=32,
      horizon_len=12,
      num_layers=50,
      use_positional_embedding=False,
      context_len=
      192,  # Context length can be anything up to 2048 in multiples of 32
  )
  tfm = TimesFm(hparams=hparams,
                checkpoint=TimesFmCheckpoint(huggingface_repo_id=repo_id))

  model = PatchedTimeSeriesDecoder(tfm._model_config)
  if load_weights:
    checkpoint_path = path.join(snapshot_download(repo_id), "torch_model.ckpt")
    loaded_checkpoint = torch.load(checkpoint_path, weights_only=True)
    model.load_state_dict(loaded_checkpoint)
  return model, hparams, tfm._model_config


In [32]:
def plot_predictions(
    model: TimesFm,
    val_dataset: Dataset,
    save_path: Optional[str] = "predictions.png",
) -> None:
  """
    Plot model predictions against ground truth for a batch of validation data.

    Args:
      model: Trained TimesFM model
      val_dataset: Validation dataset
      save_path: Path to save the plot
    """
  import matplotlib.pyplot as plt

  model.eval()

  x_context, x_padding, freq, x_future = val_dataset[0]
  x_context = x_context.unsqueeze(0)  # Add batch dimension
  x_padding = x_padding.unsqueeze(0)
  freq = freq.unsqueeze(0)
  x_future = x_future.unsqueeze(0)

  device = next(model.parameters()).device
  x_context = x_context.to(device)
  x_padding = x_padding.to(device)
  freq = freq.to(device)
  x_future = x_future.to(device)

  with torch.no_grad():
    predictions = model(x_context, x_padding.float(), freq)
    predictions_mean = predictions[..., 0]  # [B, N, horizon_len]
    last_patch_pred = predictions_mean[:, -1, :]  # [B, horizon_len]

  context_vals = x_context[0].cpu().numpy()
  future_vals = x_future[0].cpu().numpy()
  pred_vals = last_patch_pred[0].cpu().numpy()

  context_len = len(context_vals)
  horizon_len = len(future_vals)

  plt.figure(figsize=(12, 6))

  plt.plot(range(context_len),
           context_vals,
           label="Historical Data",
           color="blue",
           linewidth=2)

  plt.plot(
      range(context_len, context_len + horizon_len),
      future_vals,
      label="Ground Truth",
      color="green",
      linestyle="--",
      linewidth=2,
  )

  plt.plot(range(context_len, context_len + horizon_len),
           pred_vals,
           label="Prediction",
           color="red",
           linewidth=2)

  plt.xlabel("Time Step")
  plt.ylabel("Value")
  plt.title("TimesFM Predictions vs Ground Truth")
  plt.legend()
  plt.grid(True)

  if save_path:
    plt.savefig(save_path)
    print(f"Plot saved to {save_path}")

  plt.close()



In [35]:
def get_data(context_len: int,
             horizon_len: int,
             freq_type: int = 0) -> Tuple[Dataset, Dataset]:
  state = "sp"
  derivative = "gasolinac"

  # Loading and preparing data
  df = pd.read_csv(f"../../database/venda_process/mensal/uf/{derivative}/mensal_{state}_{derivative}.csv", sep=";", parse_dates=['timestamp'], date_parser=convert_date)
  time_series = np.array(df["m3"][:-12])
  time_series = time_series.reshape(-1, 1)

  print(time_series)

  train_dataset, val_dataset = prepare_datasets(
      series=time_series,
      context_length=context_len,
      horizon_length=horizon_len,
      freq_type=freq_type,
  )

  print(f"Created datasets:")
  print(f"- Training samples: {len(train_dataset)}")
  print(f"- Validation samples: {len(val_dataset)}")
  print(f"- Using frequency type: {freq_type}")
  return train_dataset, val_dataset

def single_gpu_example():
  """Basic example of finetuning TimesFM on stock data."""
  model, hparams, tfm_config = get_model(load_weights=True)
  config = FinetuningConfig(batch_size=256,
                            num_epochs=5,
                            learning_rate=1e-4,
                            use_wandb=False,
                            freq_type=1,
                            log_every_n_steps=10,
                            val_check_interval=0.5,
                            use_quantile_loss=False)


  train_dataset, val_dataset = get_data(128,
                                        horizon_len=12,
                                        freq_type=1)
  finetuner = TimesFMFinetuner(model, config)

  print(train_dataset.get_samples())
  print(val_dataset.get_samples())

  print("\nStarting finetuning...")
  results = finetuner.finetune(train_dataset=train_dataset,
                               val_dataset=val_dataset)

  print("\nFinetuning completed!")
  print(f"Training history: {len(results['history']['train_loss'])} epochs")

  plot_predictions(
      model=model,
      val_dataset=val_dataset,
      save_path="timesfm_predictions.png",
  )


In [36]:
single_gpu_example()

Fetching 3 files: 100%|██████████| 3/3 [00:00<00:00, 86184.33it/s]
Fetching 3 files: 100%|██████████| 3/3 [00:00<00:00, 74455.10it/s]
  df = pd.read_csv(f"../../database/venda_process/mensal/uf/{derivative}/mensal_{state}_{derivative}.csv", sep=";", parse_dates=['timestamp'], date_parser=convert_date)


[[ 415988.636 ]
 [ 343407.589 ]
 [ 297410.78  ]
 [ 393471.726 ]
 [ 360946.728 ]
 [ 313361.825 ]
 [ 332742.377 ]
 [ 319142.905 ]
 [ 359599.813 ]
 [ 346063.151 ]
 [ 451630.597 ]
 [ 409350.113 ]
 [ 385358.311 ]
 [ 296607.2   ]
 [ 318181.013 ]
 [ 453017.498 ]
 [ 454882.326 ]
 [ 350214.723 ]
 [ 420528.255 ]
 [ 470291.906 ]
 [ 368692.253 ]
 [ 431147.307 ]
 [ 336725.887 ]
 [ 427183.85  ]
 [ 357482.642 ]
 [ 349174.077 ]
 [ 343258.247 ]
 [ 383437.433 ]
 [ 352297.228 ]
 [ 384838.387 ]
 [ 391955.762 ]
 [ 350419.13  ]
 [ 319026.795 ]
 [ 340394.101 ]
 [ 379961.252 ]
 [ 424405.734 ]
 [ 361739.699 ]
 [ 323606.055 ]
 [ 436803.552 ]
 [ 380023.019 ]
 [ 381145.177 ]
 [ 416616.828 ]
 [ 373806.722 ]
 [ 401582.313 ]
 [ 392672.654 ]
 [ 401994.961 ]
 [ 409096.266 ]
 [ 476449.454 ]
 [ 385763.492 ]
 [ 389126.92  ]
 [ 446277.229 ]
 [ 400609.93  ]
 [ 439388.476 ]
 [ 418505.255 ]
 [ 393614.058 ]
 [ 463014.579 ]
 [ 477525.765 ]
 [ 448624.795 ]
 [ 482021.626 ]
 [ 511591.199 ]
 [ 477697.677 ]
 [ 440959.523 ]
 [ 52039

RuntimeError: The size of tensor a (128) must match the size of tensor b (12) at non-singleton dimension 1

In [None]:
import timesfm
from metrics_times import rrmse, pbe, pocid 
from sklearn.metrics import mean_absolute_percentage_error as mape

model_tfm = timesfm.TimesFm(
            hparams=timesfm.TimesFmHparams(
                backend="gpu",
                per_core_batch_size=16,                
                horizon_len=12,
                num_layers=50,
                use_positional_embedding=False,
                context_len=2048,
            ),
            checkpoint=timesfm.TimesFmCheckpoint(
                huggingface_repo_id="google/timesfm-2.0-500m-pytorch"),
        )


state = "sp"
derivative = "gasolinac"

# Loading and preparing data
df = pd.read_csv(f"../../database/venda_process/mensal/uf/{derivative}/mensal_{state}_{derivative}.csv", sep=";", parse_dates=['timestamp'], date_parser=convert_date)

df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')

df['unique_id'] = 1
df.rename(columns={'timestamp': 'ds'}, inplace=True)
df = df[['unique_id', 'ds', 'm3']] 

train_data = df.iloc[:-12]  
test_data = df.iloc[-12:]  

# forecast
forecast_df = model_tfm.forecast_on_df(
        inputs=train_data,
        freq="M",  # monthly
        value_name="m3",
        num_jobs=-1,
    )

y_pred = forecast_df["timesfm"].tolist()

y_pred = [round(value, 3) for value in y_pred]

# Display results
print("\nMean forecast for the last 12 months:")
print(y_pred)

print("\nActual values for the last 12 months:")
print(test_data["m3"].tolist())

y_test = df["m3"][-12:].values

# Calculating evaluation metrics
y_baseline = df["m3"][-12*2:-12].values
rrmse_result = rrmse(y_test, y_pred, df["m3"][:-12].mean())
mape_result = mape(y_test, y_pred)
pbe_result = pbe(y_test, y_pred)
pocid_result = pocid(y_test, y_pred)
mase_result = np.mean(np.abs(y_test - y_pred)) / np.mean(np.abs(y_test - y_baseline))

print(f"\nResultados TimesFM modelo: 200M \n")
print(f'RRMSE: {rrmse_result}')
print(f'MAPE: {mape_result}')
print(f'PBE: {pbe_result}')
print(f'POCID: {pocid_result}')
print(f'MASE: {mase_result}')

Fetching 3 files: 100%|██████████| 3/3 [00:00<00:00, 61984.79it/s]


Processing dataframe with multiple processes.


  df = pd.read_csv(f"../../database/venda_process/mensal/uf/{derivative}/mensal_{state}_{derivative}.csv", sep=";", parse_dates=['timestamp'], date_parser=convert_date)


Finished preprocessing dataframe.
Finished forecasting.

Mean forecast for the last 12 months:
[893001.938, 864328.75, 890328.0, 852707.938, 880568.25, 907226.375, 887686.062, 914125.688, 916511.625, 1046955.25, 899398.312, 879539.188]

Actual values for the last 12 months:
[904767.982, 864674.562, 1003722.825, 891229.559, 857554.969, 878948.027, 837788.733, 827009.557, 799432.996, 868003.888, 760665.314, 683361.59]

Resultados TimesFM modelo: 200M 

RRMSE: 0.16313059964402865
MAPE: 0.10129875087401145
PBE: -6.438116074339378
POCID: 72.72727272727273
MASE: 0.8085053856755883
