### Installing required libraries

In [None]:
pip install timesfm==1.1.0 numpy==1.26.4  scikit-learn==1.3.2 matplotlib==3.7.1 pandas==2.1.4



Importing libraries

In [None]:
import numpy as np
import pandas as pd
import timesfm
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from collections import defaultdict

2024-09-26 21:06:15.571288: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [None]:
import os
os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = 'false'
os.environ['JAX_PMAP_USE_TENSORSTORE'] = 'false'

Create calculate error metrics function which helps in assessing the performance of the trained model

In [None]:

def calculate_error_metrics(actual, predicted, num_predictors=1):
    # convert inputs are numpy arrays
    actual = np.array(actual)
    predicted = np.array(predicted)
    # Number of observations
    n = len(actual)
    # Calculate MSE
    mse = mean_squared_error(actual, predicted)
    # Calculate RMSE
    rmse = np.sqrt(mse)
    # Calculate MAPE
    mape = mean_absolute_percentage_error(actual, predicted)
    # Calculate R-squared
    r2 = r2_score(actual, predicted)
    # Calculate Adjusted R-squared
    adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - num_predictors - 1))
    print(f'MSE : {mse}')
    print(f'RMSE : {rmse}')
    print(f'MAPE : {mape}')
    print(f'r2 : {r2}')
    print(f'adjusted_r2 : {adjusted_r2}')

In [None]:
# To load local csv  > modify to the right path where the file is present
df = pd.read_csv('EPF_FR_BE.csv')
df[df['unique_id'] =='FR']

Unnamed: 0,unique_id,ds,y,gen_forecast,system_load,week_day
0,FR,2015-01-01 00:00:00,53.48,76905.0,74812.0,3
1,FR,2015-01-01 01:00:00,51.93,75492.0,71469.0,3
2,FR,2015-01-01 02:00:00,48.76,74394.0,69642.0,3
3,FR,2015-01-01 03:00:00,42.27,72639.0,66704.0,3
4,FR,2015-01-01 04:00:00,38.41,69347.0,65051.0,3
...,...,...,...,...,...,...
16075,FR,2016-10-31 19:00:00,63.89,55041.0,59537.0,0
16076,FR,2016-10-31 20:00:00,61.99,53535.0,53823.0,0
16077,FR,2016-10-31 21:00:00,52.70,49893.0,50622.0,0
16078,FR,2016-10-31 22:00:00,50.60,49037.0,49194.0,0


In [None]:
# Data pipelining
def get_batched_data_fn(
    batch_size: int = 128,
    context_len: int = 120,
    horizon_len: int = 24,
):
  examples = defaultdict(list)

  num_examples = 0
  for country in ("FR", "BE"):
    sub_df = df[df["unique_id"] == country]
    for start in range(0, len(sub_df) - (context_len + horizon_len), horizon_len):
      num_examples += 1
      examples["country"].append(country)
      examples["inputs"].append(sub_df["y"][start:(context_end := start + context_len)].tolist())
      examples["gen_forecast"].append(sub_df["gen_forecast"][start:context_end + horizon_len].tolist())
      examples["week_day"].append(sub_df["week_day"][start:context_end + horizon_len].tolist())
      examples["outputs"].append(sub_df["y"][context_end:(context_end + horizon_len)].tolist())

  def data_fn():
    for i in range(1 + (num_examples - 1) // batch_size):
      yield {k: v[(i * batch_size) : ((i + 1) * batch_size)] for k, v in examples.items()}

  return data_fn

In [None]:
timesfm_backend = "cpu"  # @param

from jax._src import config
config.update(
    "jax_platforms", {"cpu": "cpu", "gpu": "cuda", "tpu": ""}[timesfm_backend]
)

In [None]:
model = timesfm.TimesFm(
    context_len=512,
    horizon_len=128,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend=timesfm_backend,
)
model.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

(…)nts/checkpoint_1100000/metadata/metadata:   0%|          | 0.00/43.9k [00:00<?, ?B/s]

(…)oint_1100000/descriptor/descriptor.pbtxt:   0%|          | 0.00/499 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.73k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

checkpoint:   0%|          | 0.00/814M [00:00<?, ?B/s]

Constructing model weights.




Constructed model weights in 4.46 seconds.
Restoring checkpoint from /root/.cache/huggingface/hub/models--google--timesfm-1.0-200m/snapshots/8775f7531211ac864b739fe776b0b255c277e2be/checkpoints.


ERROR:absl:For checkpoint version > 1.0, we require users to provide
          `train_state_unpadded_shape_dtype_struct` during checkpoint
          saving/restoring, to avoid potential silent bugs when loading
          checkpoints to incompatible unpadded shapes of TrainState.


Restored checkpoint in 1.87 seconds.
Jitting decoding.
Jitted decoding in 40.41 seconds.


In [None]:
# Benchmark
batch_size = 128
context_len = 120
horizon_len = 24
input_data = get_batched_data_fn(batch_size = 128)
metrics = defaultdict(list)
import time

for i, example in enumerate(input_data()):
  raw_forecast, _ = model.forecast(
      inputs=example["inputs"], freq=[0] * len(example["inputs"])
  )
  start_time = time.time()
  # Forecast with covariates
  # Output: new forecast, forecast by the xreg
  cov_forecast, ols_forecast = model.forecast_with_covariates(
      inputs=example["inputs"],
      dynamic_numerical_covariates={
          "gen_forecast": example["gen_forecast"],
      },
      dynamic_categorical_covariates={
          "week_day": example["week_day"],
      },
      static_numerical_covariates={},
      static_categorical_covariates={
          "country": example["country"]
      },
      freq=[0] * len(example["inputs"]),
      xreg_mode="xreg + timesfm",              # default
      ridge=0.0,
      force_on_cpu=False,
      normalize_xreg_target_per_input=True,    # default
  )
  print(
      f"\rFinished batch {i} linear in {time.time() - start_time} seconds",
      end="",
  )


Finished batch 10 linear in 7.066233396530151 seconds


In [None]:
print("Without covariates: \n")
calculate_error_metrics(raw_forecast[:, :horizon_len], example["outputs"])

Without covariates: 

MSE : 404.6088033992598
RMSE : 20.114890091652498
MAPE : 0.18969476358665338
r2 : -0.04918699445529159
adjusted_r2 : -0.07199540737823273


In [None]:
print('Without covariates: \n')
calculate_error_metrics(cov_forecast, example["outputs"])

Without covariates: 

MSE : 259.9858694585536
RMSE : 16.124077321154026
MAPE : 0.15689309439567115
r2 : 0.16291603857224796
adjusted_r2 : 0.14471856114990556


In [None]:

print('ols forecast: \n')
calculate_error_metrics(ols_forecast, example["outputs"])

ols forecast: 

MSE : 3222.492537935656
RMSE : 56.767002192608835
MAPE : 477.1218213465076
r2 : -9241.742381331851
adjusted_r2 : -9442.671563534717
