Import required libraries for data analysis, forecasting, and evaluation metrics.

In [None]:
import numpy as np
import pandas as pd
from nixtla import NixtlaClient
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import mae

## Prerequisites

Initialize Nixtla client with API key from environment variables.

In [None]:
import os

NIXTLA_API_KEY = os.environ["NIXTLA_API_KEY"]
client = NixtlaClient(api_key=NIXTLA_API_KEY)

## Data Preparation

Load and preprocess the M5 sales dataset with exogenous variables.

In [None]:
sales_data = pd.read_csv(
    "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/m5_sales_exog_small.csv"
)
sales_data["ds"] = pd.to_datetime(sales_data["ds"])
sales_data.head()

Visualize the sales data for the first 365 days.

In [None]:
sales_plot = client.plot(
    sales_data,
    max_insample_length=365,
)

sales_plot

## Bounded Forecasts

Apply log transformation to handle the intermittent nature of the data.

In [None]:
log_transformed_data = sales_data.copy()
log_transformed_data["y"] = np.log(log_transformed_data["y"] + 1)
log_transformed_data.head()

Compare original and log-transformed data for a specific product.

In [None]:
import matplotlib.pyplot as plt

# Create a figure and axis for Matplotlib
_, ax = plt.subplots(figsize=(10, 5))

# Plot the original data
client.plot(
    sales_data,
    max_insample_length=30,
    unique_ids=["FOODS_1_001"],
    engine="matplotlib",
    ax=ax,
)

# Plot the transformed data on the same axes
client.plot(
    log_transformed_data,
    max_insample_length=30,
    unique_ids=["FOODS_1_001"],
    engine="matplotlib",
    ax=ax,
)

# Manually change the color of the second line plot
lines = ax.get_lines()
if len(lines) > 1:
    lines[1].set_color("#006400")  # New color for transformed data
    lines[1].set_linestyle("--")

# Add legend with custom labels
handles, labels = ax.get_legend_handles_labels()
labels = ["Original Sales", "Transformed Sales"]
ax.legend(handles, labels)

ax

Split data into training and test sets (last 28 days for testing).

In [None]:
# Select the last 28 observations for each unique_id — used as test data
test_data = log_transformed_data.groupby("unique_id").tail(28)

# Drop the test set indices from the original dataset to form the training set
train_data = log_transformed_data.drop(test_data.index).reset_index(drop=True)

## Generating Forecasts with TimeGPT

Generate forecasts using the base TimeGPT model with 80% confidence interval.

In [None]:
log_forecast = client.forecast(
    df=train_data,
    h=28,
    level=[80],
    model="timegpt-1-long-horizon",
    time_col="ds",
    target_col="y",
    id_col="unique_id",
)

## Reverse Transformation

After obtaining predictions, we reverse the log transformation to return to the original scale.

In [None]:
def reverse_log_transform(df):
    df = df.copy()
    value_cols = [col for col in df if col not in ["ds", "unique_id"]]
    df[value_cols] = np.exp(df[value_cols]) - 1
    return df

base_forecast = reverse_log_transform(log_forecast)
base_forecast.head()

## Evaluation

Define functions to merge forecasts with real data and calculate MAE.

In [None]:
def merge_forecast(real_data, forecast):
    merged_results = pd.merge(
        real_data, forecast, "left", ["unique_id", "ds"]
    )
    return merged_results

def get_mean_mae(real_data, forecast):
    merged_results = merge_forecast(real_data, forecast)
    model_evaluation = evaluate(
        merged_results,
        metrics=[mae],
        models=["TimeGPT"],
        target_col="y",
        id_col="unique_id",
    )
    return model_evaluation.groupby("metric")["TimeGPT"].mean()["mae"]

Calculate MAE for the base model forecasts.

In [None]:
base_mae = get_mean_mae(test_data, base_forecast)
print(base_mae)

## Finetuning the Model

Generate forecasts using a fine-tuned TimeGPT model with 10 finetuning steps.

In [None]:
log_finetuned_forecast = client.forecast(
    df=train_data,
    h=28,
    level=[80],
    finetune_steps=10,
    finetune_loss="mae",
    model="timegpt-1-long-horizon",
    time_col="ds",
    target_col="y",
    id_col="unique_id",
)

Calculate MAE for the fine-tuned model forecasts.

In [None]:
finetuned_forecast = reverse_log_transform(log_finetuned_forecast)
finedtune_mae = get_mean_mae(test_data, finetuned_forecast)
print(finedtune_mae)

## Incorporating Exogenous Variables

Prepare exogenous variables for forecasting by removing target and price columns.

In [None]:
non_exogenous_variables = ["y", "sell_price"]
futr_exog_data = test_data.drop(non_exogenous_variables, axis=1)
futr_exog_data.head()

Generate forecasts using TimeGPT with exogenous variables and fine-tuning.

In [None]:
log_exogenous_forecast = client.forecast(
    df=train_data,
    X_df=futr_exog_data,
    h=28,
    level=[80],
    finetune_steps=10,
    finetune_loss="mae",
    model="timegpt-1-long-horizon",
    time_col="ds",
    target_col="y",
    id_col="unique_id",
)

Calculate MAE for the model with exogenous variables.

In [None]:
exogenous_forecast = reverse_log_transform(log_exogenous_forecast)
exogenous_mae = get_mean_mae(test_data, exogenous_forecast)
print(exogenous_mae)

## Comparing MAE

Create a comparison table of MAE values for all three model variants.

In [None]:
# Define the mean absolute error (MAE) values for different TimeGPT variants
mae_values = {
    "Model Variant": ["Base TimeGPT", "Fine-Tuned TimeGPT", "TimeGPT with Exogenous"],
    "MAE": [base_mae, finedtune_mae, exogenous_mae]
}

mae_table = pd.DataFrame(mae_values)
mae_table
