# Use custom holidays in an ARIMA_PLUS time-series forecasting model


This tutorial shows you how to do the following tasks:

- Create an ARIMA_PLUS time-series forecasting model that uses only built-in holidays.
- Perform AI.FORECAST using TimesFM 2.0 without special holiday handling.
- Create an ARIMA_PLUS time-series forecasting model that uses custom holidays in addition to built-in holidays.
- Visualize the forecasted results from these models.
- Inspect a model to see which holidays it models.
- Evaluate the effects of the custom holidays on the forecasted results.
- Compare the performance of the model that uses only built-in holidays to the - performance of the model that uses custom holidays in addition to built-in holidays.

This tutorial uses the bigquery-public-data.wikipedia.pageviews_* public tables.

# Initialize BigQuery Client

In [None]:
from colabtools import auth,bigquery

scopes = [
    bigquery.SCOPES[0]
]

credentials = auth.get_user_oauth2_credentials(scopes)
project = '' # @param {type:"string"}
bigquery.magics.context.project = project
bigquery.magics.context.credentials = credentials
from google.cloud.bigquery import magics
magics.context.credentials is credentials
bigquery_client = bigquery.Client(project=project, credentials=credentials)

# Prepare the time-series data

In [None]:
create_dataset_query = """
CREATE SCHEMA IF NOT EXISTS `bqml_tutorial`;
"""

query_job = bigquery_client.query(create_dataset_query)
query_job.result()

print(f"Dataset created or already exists. Preparing the time series data...")

table_query = """
CREATE OR REPLACE TABLE `bqml_tutorial.googleio_page_views`
AS
SELECT
  DATETIME_TRUNC(datehour, DAY) AS date,
  SUM(views) AS views
FROM
  `bigquery-public-data.wikipedia.pageviews_*`
WHERE
  datehour >= '2017-01-01'
  AND datehour < '2023-01-01'
  AND title = 'Google_I/O'
GROUP BY
  DATETIME_TRUNC(datehour, DAY)
"""

results_df = bigquery_client.query(table_query).to_dataframe()
print(f"Time series data created.")

# Create a time-series forecasting model that uses built-in holidays

In [None]:
model_training_query = """
CREATE OR REPLACE MODEL `bqml_tutorial.forecast_googleio`
  OPTIONS (
    model_type = 'ARIMA_PLUS',
    holiday_region = 'US',
    time_series_timestamp_col = 'date',
    time_series_data_col = 'views',
    data_frequency = 'DAILY',
    horizon = 365)
AS
SELECT
  *
FROM
  `bqml_tutorial.googleio_page_views`
WHERE
  date < '2022-01-01';
"""

query_job = bigquery_client.query(model_training_query)
query_job.result()

# Create a visualization helper function

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import dates as mdates
import numpy as np
import matplotlib.ticker as ticker

def visualize_query_results(query, bigquery_client):
    """
    Executes a BigQuery query and visualizes the results.

    Args:
        query (str): The BigQuery SQL query.
        bigquery_client (google.cloud.bigquery.Client): The BigQuery client object.
    """
    job = bigquery_client.query(query)
    results_df = job.to_dataframe()

    # Convert 'date' column to datetime objects
    results_df['date'] = pd.to_datetime(results_df['date'])
    results_df = results_df.set_index('date')

    # --- Reindex to handle missing dates ---
    # Create a full date range from min to max date in the data
    date_min = results_df.index.min()
    date_max = results_df.index.max()
    full_date_range = pd.date_range(start=date_min, end=date_max, freq='D')

    # Reindex the DataFrame. Missing dates will be filled with NaN
    results_df_reindexed = results_df.reindex(full_date_range)

    # --- Plotting ---
    plt.figure(figsize=(18, 7))

    # Plotting from the reindexed DataFrame
    plt.plot(results_df_reindexed.index, results_df_reindexed['original_views'], label='original_views', color='blue', linewidth=1.0)
    plt.plot(results_df_reindexed.index, results_df_reindexed['forecasted_views'], label='forecasted_views', color='red', linewidth=1.0) # Using 'red' for adjusted_views

    plt.xlabel('Date', fontsize=12)
    plt.ylabel('Values', fontsize=12)

    # --- Y-axis customization ---
    ax = plt.gca()
    ax.set_ylim(0, max(results_df_reindexed['original_views'].max(), results_df_reindexed['forecasted_views'].max()) * 1.1) # Adjust ylim dynamically

    # Format y-axis ticks to show values in 'k'
    formatter = ticker.FuncFormatter(lambda x, pos: '{:,.0f}k'.format(x/1000))
    ax.yaxis.set_major_formatter(formatter)

    # Set approximately 10 major ticks on the y-axis
    ax.yaxis.set_major_locator(ticker.MaxNLocator(nbins=10))


    # --- X-axis customization ---
    # Add a little padding to the x-axis limits
    plt.xlim(date_min - pd.Timedelta(days=60), date_max + pd.Timedelta(days=60))

    # AutoDateLocator helps pick 'nice' tick locations
    locator = mdates.AutoDateLocator(minticks=12, maxticks=20)
    # Formatter to match the '%b %d, %Y' style
    formatter = mdates.DateFormatter('%b %d, %Y')

    ax.xaxis.set_major_locator(locator)
    ax.xaxis.set_major_formatter(formatter)

    # Rotate and align tick labels for readability
    plt.setp(ax.get_xticklabels(), rotation=30, ha="right", fontsize=10)
    plt.setp(ax.get_yticklabels(), fontsize=10)

    # --- Grid ---
    plt.grid(True, linestyle='-', alpha=0.6, color='#d3d3d3')

    # --- Legend ---
    plt.legend(loc='upper left', fontsize=11)

    # Adjust layout to prevent labels from overlapping
    plt.tight_layout()
    plt.show()

# Visualize the forecasted results without custom holiday

In [None]:
visualize_query_results("""SELECT
  original.date,
  original.views AS original_views,
  explain_forecast.time_series_adjusted_data
    AS forecasted_views,
FROM
  `bqml_tutorial.googleio_page_views` original
INNER JOIN
  (
    SELECT
      *
    FROM
      ML.EXPLAIN_FORECAST(
        MODEL `bqml_tutorial.forecast_googleio`,
        STRUCT(365 AS horizon))
  ) explain_forecast
  ON
    TIMESTAMP(original.date)
    = explain_forecast.time_series_timestamp
ORDER BY
  original.date;""", bigquery_client)

# Visualize the forecasted value by AI.FORECAST without custom holiday

In [None]:
visualize_query_results("""SELECT
  DATE(original.date) AS date, original_views, forecast_value as forecasted_views
FROM
  AI.FORECAST( (
    SELECT
      *
    FROM
      `bqml_tutorial.googleio_page_views`
    WHERE
      date < '2022-01-01'),
    data_col => 'views',
    timestamp_col => 'date',
    MODEL => 'TimesFM 2.0',
    horizon => 365,
    confidence_level => .75 )
FULL JOIN
(
SELECT
  date,
  views AS original_views,
FROM
  `bqml_tutorial.googleio_page_views`
) original
  ON
    DATE(forecast_timestamp)
    = DATE(original.date)
ORDER BY
  original.date;""", bigquery_client)

# Create a time-series forecasting model that uses built-in holidays and custom holidays

In [None]:
model_training_query = """
CREATE OR REPLACE MODEL `bqml_tutorial.forecast_googleio_with_custom_holiday`
  OPTIONS (
    model_type = 'ARIMA_PLUS',
    holiday_region = 'US',
    time_series_timestamp_col = 'date',
    time_series_data_col = 'views',
    data_frequency = 'DAILY',
    horizon = 365)
AS (
  training_data AS (
      SELECT
        *
      FROM
        `bqml_tutorial.googleio_page_views`
      WHERE
        date < '2022-01-01'
    ),
  custom_holiday AS (
      SELECT
        'US' AS region,
        'GoogleIO' AS holiday_name,
        primary_date,
        1 AS preholiday_days,
        2 AS postholiday_days
      FROM
        UNNEST(
          [
            DATE('2017-05-17'),
            DATE('2018-05-08'),
            DATE('2019-05-07'),
            -- cancelled in 2020 due to pandemic
            DATE('2021-05-18'),
            DATE('2022-05-11')])
          AS primary_date
    )
);
"""

query_job = bigquery_client.query(model_training_query)
query_job.result()

# Visualize the forecasted results adjusted with custom holiday

In [None]:
visualize_query_results("""SELECT
  original.date,
  original.views AS original_views,
  explain_forecast.time_series_adjusted_data
    AS forecasted_views,
FROM
  `bqml_tutorial.googleio_page_views` original
INNER JOIN
  (
    SELECT
      *
    FROM
      ML.EXPLAIN_FORECAST(
        MODEL
          `bqml_tutorial.forecast_googleio_with_custom_holiday`,
        STRUCT(365 AS horizon))
  ) explain_forecast
  ON
    TIMESTAMP(original.date)
    = explain_forecast.time_series_timestamp
ORDER BY
  original.date;""", bigquery_client)

# Inspect holiday information

In [None]:
%%bigquery
SELECT *
FROM
  ML.HOLIDAY_INFO(
    MODEL `bqml_tutorial.forecast_googleio_with_custom_holiday`);

# Evaluate the effects of the custom holidays

In [None]:
%%bigquery
SELECT
  time_series_timestamp,
  holiday_effect_GoogleIO,
  holiday_effect_US_Juneteenth,
  holiday_effect_Christmas,
  holiday_effect_NewYear
FROM
  ML.EXPLAIN_FORECAST(
    model
      `bqml_tutorial.forecast_googleio_with_custom_holiday`,
    STRUCT(365 AS horizon))
WHERE holiday_effect != 0;

# Compare model performance

In [None]:
%%bigquery
SELECT
  "original" AS model_type,
  *
FROM
  ml.evaluate(
    MODEL `bqml_tutorial.forecast_googleio`,
    (
      SELECT
        *
      FROM
        `bqml_tutorial.googleio_page_views`
      WHERE
        date >= '2022-05-08'
        AND date < '2022-05-12'
    ),
    STRUCT(
      365 AS horizon,
      TRUE AS perform_aggregation))
UNION ALL
SELECT
  "with_custom_holiday" AS model_type,
  *
FROM
  ml.evaluate(
    MODEL
      `bqml_tutorial.forecast_googleio_with_custom_holiday`,
    (
      SELECT
        *
      FROM
        `bqml_tutorial.googleio_page_views`
      WHERE
        date >= '2022-05-08'
        AND date < '2022-05-12'
    ),
    STRUCT(
      365 AS horizon,
      TRUE AS perform_aggregation));