In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [2]:
!git clone https://github.com/time-series-foundation-models/lag-llama/

Cloning into 'lag-llama'...
remote: Enumerating objects: 319, done.[K
remote: Counting objects: 100% (157/157), done.[K
remote: Compressing objects: 100% (71/71), done.[K
remote: Total 319 (delta 111), reused 105 (delta 84), pack-reused 162[K
Receiving objects: 100% (319/319), 232.35 KiB | 7.04 MiB/s, done.
Resolving deltas: 100% (152/152), done.


In [3]:
cd lag-llama

/content/lag-llama


In [4]:
!pip3 install -r requirements.txt --quiet
!huggingface-cli download time-series-foundation-models/Lag-Llama lag-llama.ckpt --local-dir lag-llama

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m43.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m68.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m53.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m54.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m778.1/778.1 kB[0m [31m45.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m69.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [5]:
from itertools import islice
from tqdm.autonotebook import tqdm

import torch

from gluonts.evaluation import make_evaluation_predictions, Evaluator
from gluonts.dataset.common import ListDataset

import pandas as pd
import numpy as np

from lag_llama.gluon.estimator import LagLlamaEstimator

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.graph_objects as go
import plotly.io as pio
import plotly.offline as poff
import seaborn as sns

from sklearn.metrics import mean_absolute_error

from utils.utils import set_seed
from torch import manual_seed

  from tqdm.autonotebook import tqdm


In [6]:
set_seed(42)

## Definitions

In [7]:
def create_gluonts_dataset(df, freq, target_column):
    series = {
        "start": df.index[0],  # start date of the time series
        "target": df[target_column].values,  # target values
    }

    dataset = ListDataset([series], freq=freq)
    return dataset

In [8]:
def get_lag_llama_predictions(dataset,
                                model_ckpt,
                                prediction_length = 24,
                                context_length=None,
                                num_samples=100,
                                device="cuda",
                                batch_size=64,
                                nonnegative_pred_samples=True,
                              ):


    manual_seed(42)
    _device = torch.device(device)
    _ckpt = torch.load(model_ckpt, map_location=_device)
    estimator_args = _ckpt["hyper_parameters"]["model_kwargs"]
    if context_length == None:
      context_length=estimator_args['context_length']

    estimator = LagLlamaEstimator(
        ckpt_path = model_ckpt,
        context_length=context_length,
        prediction_length=prediction_length,
        device = _device,

        # estimator args
        input_size=estimator_args["input_size"],
        n_layer=estimator_args["n_layer"],
        n_embd_per_head=estimator_args["n_embd_per_head"],
        n_head=estimator_args["n_head"],
        scaling=estimator_args["scaling"],
        time_feat=estimator_args["time_feat"],

        nonnegative_pred_samples=nonnegative_pred_samples,

        # linear positional encoding scaling
        rope_scaling={
              "type": "linear",
              "factor": max(1.0, (context_length + prediction_length) / estimator_args["context_length"]),
          },

        batch_size=batch_size,
        num_parallel_samples=num_samples,
    )

    lightning_module = estimator.create_lightning_module().to(device)
    transformation = estimator.create_transformation()
    predictor = estimator.create_predictor(transformation, lightning_module)

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=dataset,
        predictor=predictor,
        num_samples=num_samples
    )
    forecasts = list(tqdm(forecast_it, total=len(dataset[0]['target']), desc="Forecasting batches"))
    tss = list(tqdm(ts_it, total=len(dataset[0]['target']), desc="Ground truth"))

    return forecasts, tss

In [9]:
def recursive_forecast(
    model_ckpt,
    context_df,
    test_df,
    prediction_length=7,
    context_length=32,
    device='cuda',
    num_samples=100):

    context_gdf = create_gluonts_dataset(context_df, freq='D', target_column='wave_height')
    _device = torch.device(device)
    _ckpt = torch.load(model_ckpt, map_location=_device)
    estimator_args = _ckpt["hyper_parameters"]["model_kwargs"]

    batch_size = prediction_length  # Number of observations to add to context at each step of the forecast.
    total_length = len(test_df)  # Total length of the test DataFrame
    if context_length is None:
        context_length = estimator_args['context_length']

    all_point_forecasts = []
    q10_forecasts = []
    q90_forecasts = []

    # Append batches sequentially to the target DataFrame
    for i in range(0, total_length, batch_size):

        # Prediction based on context dataset
        forecasts, tss = get_lag_llama_predictions(
            model_ckpt=model_ckpt,
            dataset=context_gdf,
            prediction_length=prediction_length,
            num_samples=num_samples,
            context_length=context_length,
            device=_device
        )

        point_forecast = list(forecasts[0].mean)
        q10 = list(forecasts[0]['p10'])
        q90 = list(forecasts[0]['p90'])

        # Ensure to append only `batch_size` predictions each iteration
        all_point_forecasts.extend(point_forecast[:batch_size])
        q10_forecasts.extend(q10[:batch_size])
        q90_forecasts.extend(q90[:batch_size])

        # Update context dataset
        batch_start_index = i
        batch_end_index = min(i + batch_size, total_length)  # Ensure not to exceed the length of test_df
        batch = test_df.iloc[batch_start_index:batch_end_index]
        context_df = pd.concat([context_df, batch], ignore_index=False)
        context_gdf = create_gluonts_dataset(context_df, freq='D', target_column='wave_height')

    # Put forecast and quantiles together
    preds_dict = {
        'prediction': all_point_forecasts[:total_length],
        'p10': q10_forecasts[:total_length],
        'p90': q90_forecasts[:total_length]
    }

    return preds_dict


In [10]:
def plot_preds(forecasts, tss):
  plt.figure(figsize=(20, 15))
  date_formater = mdates.DateFormatter('%b, %d')
  plt.rcParams.update({'font.size': 15})

  # Iterate through the first 9 series, and plot the predicted samples
  for idx, (forecast, ts) in islice(enumerate(zip(forecasts, tss)), 9):
      ax = plt.subplot(3, 3, idx+1)

      plt.plot(ts[-4 * prediction_length:].to_timestamp(), label="target", )
      forecast.plot( color='g')
      plt.xticks(rotation=60)
      ax.xaxis.set_major_formatter(date_formater)
      ax.set_title(forecast.item_id)

  plt.gcf().tight_layout()
  plt.legend()
  plt.show()

In [11]:
def plot_backtest_preds(df_actual_pred, actual_col, pred_col):
    fig = go.Figure()
    trace1 = go.Scatter(x=df_actual_pred.index, y=df_actual_pred[actual_col], name="actual", mode="lines")
    trace2 = go.Scatter(x=df_actual_pred.index, y=df_actual_pred[pred_col], name="prediction", mode="lines")
    fig.add_trace(trace1)
    fig.add_trace(trace2)
    fig.update_layout(
        title="Actual value vs predicted in test data",
        xaxis_title="Date time",
        yaxis_title="Wave height (meters)",
        width=900,
        height=400,
        margin=dict(l=20, r=20, t=35, b=20),
        legend=dict(
            orientation="h",
            yanchor="top",
            y=1.1,
            xanchor="left",
            x=0.001
        )
    )

    fig.show()

In [12]:
def plot_prob_forecasts(df_forecasts):
    """
    Plots real values and predicted values with confidence intervals.

    Parameters:
    df_forecasts (pd.DataFrame): DataFrame containing the real values, predicted values,
                                 and confidence intervals with columns ['wave_height', 'p10', 'p90'].
    """
    # Plot shaded area
    fig = go.Figure([
        go.Scatter(name='Real value', x=df_forecasts.index, y=df_forecasts['wave_height'], mode='lines'),
        go.Scatter(name='Point forecast', x=df_forecasts.index, y=df_forecasts['prediction'], mode='lines'),
        go.Scatter(
            name='Upper Bound', x=df_forecasts.index, y=df_forecasts['p90'],
            mode='lines', marker=dict(color="#444"), line=dict(width=0), showlegend=False
        ),
        go.Scatter(
            name='Lower Bound', x=df_forecasts.index, y=df_forecasts['p10'],
            marker=dict(color="#444"), line=dict(width=0), mode='lines',
            fillcolor='rgba(68, 68, 68, 0.3)', fill='tonexty', showlegend=False
        )
    ])
    fig.update_layout(
        title="Real value vs predicted in test data",
        xaxis_title="Date",
        yaxis_title="Wave height (Meters)",
        width=900,
        height=400,
        margin=dict(l=20, r=20, t=35, b=20),
        hovermode="x",
        legend=dict(
            orientation="h",
            yanchor="top",
            y=1.1,
            xanchor="left",
            x=0.001
        )
    )
    fig.show()

In [13]:
def empirical_coverage(y, lower_bound, upper_bound):
    """
    Calculate coverage of a given interval
    """
    return np.mean(np.logical_and(y >= lower_bound, y <= upper_bound))

## Read data

In [14]:
# Read data
# ==============================================================================
data_dir = '/content/drive/MyDrive/Python Scripts/Lag Llama experiments/Data'

df = pd.read_csv(data_dir + '/spain_clean.csv')
df['datetime'] = pd.to_datetime(df['datetime'])
df.set_index(keys = 'datetime', inplace=True)
df = df.asfreq('60min')
df.drop(columns = ['period'], inplace = True)

# Aggregate data to daily frequency - highest observed wave
# ==============================================================================
df = (
    df
    .resample(rule="D", closed="left", label="right")
    .agg({"wave_height": "max"})
)

# Train-test split
# ==============================================================================
one_month = (-1)*30 # One month
two_months = (-1)*60 # Two months

end_val = two_months + two_months + one_month
end_train = end_val + two_months + two_months + one_month

df_train = df.iloc[:end_train].copy()
df_val = df.iloc[end_train:end_val].copy()
df_test = df.iloc[end_val:].copy()

print(f"Train dates      : {df_train.index.min()} --- {df_train.index.max()}  (n={len(df_train)})")
print(f"Validation dates : {df_val.index.min()} --- {df_val.index.max()}  (n={len(df_val)})")
print(f"Test dates       : {df_test.index.min()} --- {df_test.index.max()}  (n={len(df_test)})")



# Create the GluonTS dataset
# ==============================================================================
train = create_gluonts_dataset(df_train, freq='D', target_column='wave_height')
test = create_gluonts_dataset(df_test, freq='D', target_column='wave_height')
val = create_gluonts_dataset(df_val, freq='D', target_column='wave_height')

print('Train dataset:', train)
print('Validation dataset:',val)
print('Test dataset:',test)

Train dates      : 2020-06-19 00:00:00 --- 2023-08-24 00:00:00  (n=1162)
Validation dates : 2023-08-25 00:00:00 --- 2024-01-21 00:00:00  (n=150)
Test dates       : 2024-01-22 00:00:00 --- 2024-06-19 00:00:00  (n=150)
Train dataset: [{'start': Period('2020-06-19', 'D'), 'target': array([0.87, 0.9 , 0.7 , ..., 1.09, 1.08, 0.95], dtype=float32)}]
Validation dataset: [{'start': Period('2023-08-25', 'D'), 'target': array([1.26     , 0.97     , 1.22     , 1.6      , 1.52     , 1.24     ,
       1.15     , 0.97     , 0.91     , 1.23     , 2.49     , 1.95     ,
       1.73     , 1.47     , 0.92     , 0.52     , 0.61     , 0.67     ,
       1.17     , 1.07     , 1.06     , 1.15     , 0.85     , 0.93     ,
       1.09     , 1.61     , 1.98     , 1.99     , 2.82     , 2.76     ,
       1.64     , 1.35     , 1.72     , 1.72     , 1.45     , 1.92     ,
       1.24     , 1.08     , 0.93     , 1.07     , 1.46     , 1.61     ,
       1.14     , 0.91     , 1.07     , 0.72     , 0.5      , 0.37     ,
  

## Zero-shot forecasting

In [15]:
# Forecast parameters
checkpoint = 'lag-llama/lag-llama.ckpt'
prediction_length = 7
num_samples = 150
device = torch.device('cuda')

### Context length: 32

In [16]:
context_length = 32

forecasts_dict = recursive_forecast(model_ckpt = checkpoint, context_df=df_train[-48:], test_df = df_test, prediction_length=7, context_length = context_length)

assert len(forecasts_dict['prediction']) == len(df_test) == len(forecasts_dict['p10']) == len(forecasts_dict['p90'])

Forecasting batches:   0%|          | 0/48 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/48 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/55 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/55 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/62 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/62 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/69 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/69 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/76 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/76 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/83 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/83 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/90 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/90 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/97 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/97 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/104 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/104 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/111 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/111 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/118 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/118 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/125 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/125 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/132 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/132 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/139 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/139 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/146 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/146 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/153 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/153 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/160 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/160 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/167 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/167 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/174 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/174 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/181 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/181 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/188 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/188 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/195 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/195 [00:00<?, ?it/s]

In [17]:
df_forecasts = df_test.copy()
df_forecasts['prediction'] = forecasts_dict['prediction']
df_forecasts['p10'] = forecasts_dict['p10']
df_forecasts['p90'] = forecasts_dict['p90']

In [18]:
plot_prob_forecasts(df_forecasts)


In [19]:
# MAE
# ==============================================================================
metric = mean_absolute_error(df_forecasts['wave_height'], df_forecasts['prediction'])
print(f"Backtest error (MAE): {metric}")

# Predicted interval coverage (on test data)
# ==============================================================================
coverage = empirical_coverage(
    y = df_forecasts['wave_height'],
    lower_bound = df_forecasts['p10'],
    upper_bound = df_forecasts['p90']
)
print(f"Predicted interval coverage: {round(100*coverage, 2)} %")

# Area of the interval
# ==============================================================================
area = (df_forecasts['p90'] - df_forecasts['p10']).sum()
print(f"Area of the interval: {round(area, 2)}")

Backtest error (MAE): 0.7807830358028411
Predicted interval coverage: 70.67 %
Area of the interval: 292.1700134277344


### Context length: 64

In [20]:
context_length = 64

forecasts_dict = recursive_forecast(model_ckpt = checkpoint, context_df=df_train[-48:], test_df = df_test, prediction_length=7, context_length = context_length)

assert len(forecasts_dict['prediction']) == len(df_test) == len(forecasts_dict['p10']) == len(forecasts_dict['p90'])

Forecasting batches:   0%|          | 0/48 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/48 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/55 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/55 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/62 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/62 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/69 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/69 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/76 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/76 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/83 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/83 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/90 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/90 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/97 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/97 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/104 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/104 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/111 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/111 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/118 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/118 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/125 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/125 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/132 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/132 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/139 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/139 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/146 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/146 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/153 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/153 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/160 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/160 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/167 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/167 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/174 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/174 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/181 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/181 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/188 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/188 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/195 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/195 [00:00<?, ?it/s]

In [21]:
df_forecasts = df_test.copy()
df_forecasts['prediction'] = forecasts_dict['prediction']
df_forecasts['p10'] = forecasts_dict['p10']
df_forecasts['p90'] = forecasts_dict['p90']

In [22]:
plot_prob_forecasts(df_forecasts)


In [23]:
# MAE
# ==============================================================================
metric = mean_absolute_error(df_forecasts['wave_height'], df_forecasts['prediction'])
print(f"Backtest error (MAE): {metric}")

# Predicted interval coverage (on test data)
# ==============================================================================
coverage = empirical_coverage(
    y = df_forecasts['wave_height'],
    lower_bound = df_forecasts['p10'],
    upper_bound = df_forecasts['p90']
)
print(f"Predicted interval coverage: {round(100*coverage, 2)} %")

# Area of the interval
# ==============================================================================
area = (df_forecasts['p90'] - df_forecasts['p10']).sum()
print(f"Area of the interval: {round(area, 2)}")

Backtest error (MAE): 0.8054317169030507
Predicted interval coverage: 80.67 %
Area of the interval: 361.760009765625


### Context length: 128

In [24]:
context_length = 128

forecasts_dict = recursive_forecast(model_ckpt = checkpoint, context_df=df_train[-48:], test_df = df_test, prediction_length=7, context_length = context_length)

assert len(forecasts_dict['prediction']) == len(df_test) == len(forecasts_dict['p10']) == len(forecasts_dict['p90'])

Forecasting batches:   0%|          | 0/48 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/48 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/55 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/55 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/62 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/62 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/69 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/69 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/76 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/76 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/83 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/83 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/90 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/90 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/97 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/97 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/104 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/104 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/111 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/111 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/118 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/118 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/125 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/125 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/132 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/132 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/139 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/139 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/146 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/146 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/153 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/153 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/160 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/160 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/167 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/167 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/174 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/174 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/181 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/181 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/188 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/188 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/195 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/195 [00:00<?, ?it/s]

In [25]:
df_forecasts = df_test.copy()
df_forecasts['prediction'] = forecasts_dict['prediction']
df_forecasts['p10'] = forecasts_dict['p10']
df_forecasts['p90'] = forecasts_dict['p90']

In [26]:
plot_prob_forecasts(df_forecasts)


In [27]:
# MAE
# ==============================================================================
metric = mean_absolute_error(df_forecasts['wave_height'], df_forecasts['prediction'])
print(f"Backtest error (MAE): {metric}")

# Predicted interval coverage (on test data)
# ==============================================================================
coverage = empirical_coverage(
    y = df_forecasts['wave_height'],
    lower_bound = df_forecasts['p10'],
    upper_bound = df_forecasts['p90']
)
print(f"Predicted interval coverage: {round(100*coverage, 2)} %")

# Area of the interval
# ==============================================================================
area = (df_forecasts['p90'] - df_forecasts['p10']).sum()
print(f"Area of the interval: {round(area, 2)}")

Backtest error (MAE): 0.8524319249471028
Predicted interval coverage: 88.67 %
Area of the interval: 477.04998779296875


# Forecasting with fine-tuned models - Daily data

In [None]:
ckpt_dir = '/content/drive/MyDrive/Python Scripts/Lag Llama experiments/lightning_logs/Weekly - No RoPE'

version_2 = ckpt_dir + '/version_2/checkpoints/epoch=0-step=50.ckpt'
version_6 = ckpt_dir + '/version_6/checkpoints/epoch=0-step=50.ckpt'
version_5 = ckpt_dir + '/version_5/checkpoints/epoch=0-step=50.ckpt'

### Version 2

In [None]:
# Produce forecasts
forecasts_v2 = recursive_forecast(model_ckpt = version_2, context_df=df_train[-48:], test_df = df_test)
df_forecasts_v2 = df_test.copy()
df_forecasts_v2['prediction'] = forecasts_v2['prediction']
df_forecasts_v2['p10'] = forecasts_v2['p10']
df_forecasts_v2['p90'] = forecasts_v2['p90']

Forecasting batches:   0%|          | 0/48 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/48 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/55 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/55 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/62 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/62 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/69 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/69 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/76 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/76 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/83 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/83 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/90 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/90 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/97 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/97 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/104 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/104 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/111 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/111 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/118 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/118 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/125 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/125 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/132 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/132 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/139 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/139 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/146 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/146 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/153 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/153 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/160 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/160 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/167 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/167 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/174 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/174 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/181 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/181 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/188 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/188 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/195 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/195 [00:00<?, ?it/s]

In [None]:
plot_prob_forecasts(df_forecasts_v2)

In [None]:
# Point forecast metric - MAE
# ==============================================================================
metric = mean_absolute_error(df_forecasts_v2['wave_height'], df_forecasts_v2['prediction'])
print(f"Backtest error (MAE): {metric}")

# Predicted interval coverage
# ==============================================================================
coverage = empirical_coverage(
    y = df_forecasts_v2['wave_height'],
    lower_bound = df_forecasts_v2['p10'],
    upper_bound = df_forecasts_v2['p90']
)
print(f"Predicted interval coverage: {round(100*coverage, 2)} %")

# Area of the interval
# ==============================================================================
area = (df_forecasts_v2['p90'] - df_forecasts_v2['p10']).sum()
print(f"Area of the interval: {round(area, 2)}")

Backtest error (MAE): 0.7834628623167674
Predicted interval coverage: 62.67 %
Area of the interval: 230.10000610351562


### Version 6

In [None]:
# Produce forecasts
forecasts_v6 = recursive_forecast(model_ckpt = version_6, context_df=df_train[-100:], test_df = df_test)
df_forecasts_v6 = df_test.copy()
df_forecasts_v6['prediction'] = forecasts_v6['prediction']
df_forecasts_v6['p10'] = forecasts_v6['p10']
df_forecasts_v6['p90'] = forecasts_v6['p90']

Forecasting batches:   0%|          | 0/100 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/100 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/107 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/107 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/114 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/114 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/121 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/121 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/128 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/128 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/135 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/135 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/142 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/142 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/149 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/149 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/156 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/156 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/163 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/163 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/170 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/170 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/177 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/177 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/184 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/184 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/191 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/191 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/198 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/198 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/205 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/205 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/212 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/212 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/219 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/219 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/226 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/226 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/233 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/233 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/240 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/240 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/247 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/247 [00:00<?, ?it/s]

In [None]:
plot_prob_forecasts(df_forecasts_v6)

In [None]:
# Point forecast metric - MAE
# ==============================================================================
metric = mean_absolute_error(df_forecasts_v6['wave_height'], df_forecasts_v6['prediction'])
print(f"Backtest error (MAE): {metric}")

# Predicted interval coverage
# ==============================================================================
coverage = empirical_coverage(
    y = df_forecasts_v6['wave_height'],
    lower_bound = df_forecasts_v6['p10'],
    upper_bound = df_forecasts_v6['p90']
)
print(f"Predicted interval coverage: {round(100*coverage, 2)} %")

# Area of the interval
# ==============================================================================
area = (df_forecasts_v6['p90'] - df_forecasts_v6['p10']).sum()
print(f"Area of the interval: {round(area, 2)}")

Backtest error (MAE): 0.7057815072695415
Predicted interval coverage: 70.67 %
Area of the interval: 213.7899932861328


### Version 5

In [None]:
# Produce forecasts
forecasts_v5 = recursive_forecast(model_ckpt = version_5, context_df=df_train[-200:], test_df = df_test)
df_forecasts_v5 = df_test.copy()
df_forecasts_v5['prediction'] = forecasts_v5['prediction']
df_forecasts_v5['p10'] = forecasts_v5['p10']
df_forecasts_v5['p90'] = forecasts_v5['p90']

Forecasting batches:   0%|          | 0/200 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/200 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/207 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/207 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/214 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/214 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/221 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/221 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/228 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/228 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/235 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/235 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/242 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/242 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/249 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/249 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/256 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/256 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/263 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/263 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/270 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/270 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/277 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/277 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/284 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/284 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/291 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/291 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/298 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/298 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/305 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/305 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/312 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/312 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/319 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/319 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/326 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/326 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/333 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/333 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/340 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/340 [00:00<?, ?it/s]

Forecasting batches:   0%|          | 0/347 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/347 [00:00<?, ?it/s]

In [None]:
plot_prob_forecasts(df_forecasts_v5)

In [None]:
# Point forecast metric - MAE
# ==============================================================================
metric = mean_absolute_error(df_forecasts_v5['wave_height'], df_forecasts_v5['prediction'])
print(f"Backtest error (MAE): {metric}")

# Predicted interval coverage
# ==============================================================================
coverage = empirical_coverage(
    y = df_forecasts_v5['wave_height'],
    lower_bound = df_forecasts_v5['p10'],
    upper_bound = df_forecasts_v5['p90']
)
print(f"Predicted interval coverage: {round(100*coverage, 2)} %")

# Area of the interval
# ==============================================================================
area = (df_forecasts_v5['p90'] - df_forecasts_v5['p10']).sum()
print(f"Area of the interval: {round(area, 2)}")

Backtest error (MAE): 0.6635458576361338
Predicted interval coverage: 63.33 %
Area of the interval: 160.9600067138672
