From ccc9f8dba6c1874f3ce76ea60404e6764d21f720 Mon Sep 17 00:00:00 2001 From: azul Date: Mon, 20 May 2024 14:21:14 -0700 Subject: [PATCH] feat: add tigemen docs (#3196) * feat: add timegen nbs * feat: add codeowner * feat: add prerequisites to all tutorials * feat: ci errors * fix: rm outputs --------- Co-authored-by: Kriti <53083330+fkriti@users.noreply.github.com> --- .github/CODEOWNERS | 1 + .../nixtla/01_quickstart_forecast.ipynb | 112 ++++ .../nixtla/02_finetuning.ipynb | 181 ++++++ .../nixtla/03_anomaly_detection.ipynb | 253 ++++++++ .../nixtla/04_exogenous_variables.ipynb | 444 +++++++++++++ .../nixtla/05_demand_forecasting.ipynb | 605 ++++++++++++++++++ 6 files changed, 1596 insertions(+) create mode 100644 sdk/python/foundation-models/nixtla/01_quickstart_forecast.ipynb create mode 100644 sdk/python/foundation-models/nixtla/02_finetuning.ipynb create mode 100644 sdk/python/foundation-models/nixtla/03_anomaly_detection.ipynb create mode 100644 sdk/python/foundation-models/nixtla/04_exogenous_variables.ipynb create mode 100644 sdk/python/foundation-models/nixtla/05_demand_forecasting.ipynb diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e5c192716c..3cac4cb9d9 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -17,6 +17,7 @@ /sdk/python/foundation-models/cohere/cohere-aisearch-langchain-rag.ipynb @stewart-co @kseniia-cohere sdk/python/foundation-models/cohere/command_faiss_langchain.ipynb @stewart-co @kseniia-cohere sdk/python/foundation-models/cohere/command_tools-langchain.ipynb @stewart-co @kseniia-cohere +/sdk/python/foundation-models/nixtla/ @AzulGarza #### files referenced in docs (DO NOT EDIT, except for Docs team!!!) ############################################################################################# /cli/assets/component/train.yml @sdgilley @msakande @Blackmist @ssalgadodev @lgayhardt @fbsolo-ms1 diff --git a/sdk/python/foundation-models/nixtla/01_quickstart_forecast.ipynb b/sdk/python/foundation-models/nixtla/01_quickstart_forecast.ipynb new file mode 100644 index 0000000000..711531f5c2 --- /dev/null +++ b/sdk/python/foundation-models/nixtla/01_quickstart_forecast.ipynb @@ -0,0 +1,112 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prerequisites\n", + "\n", + "Please make sure to follow these steps to start using TimeGEN: \n", + "\n", + "* Register for a valid Azure account with subscription \n", + "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n", + "* Create a project and resource group\n", + "* Select `TimeGEN-1`.\n", + "\n", + " > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n", + "\n", + "* Deploy with \"Pay-as-you-go\"\n", + "\n", + "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n", + "\n", + "To complete this tutorial, you will need to:\n", + "\n", + "* Install `nixtla` and `pandas`:\n", + "\n", + " ```bash\n", + " pip install nixtla pandas\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Quickstart\n", + "\n", + "To forecast with TimeGEN, simply call the `forecast` method, making sure that you pass your DataFrame, and specify your target and time column names. Then you can plot the predictions using the `plot` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from nixtla import NixtlaClient" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Instantiate the Nixtla Client\n", + "nixtla_client = NixtlaClient(\n", + " base_url=\"you azure ai endpoint\",\n", + " api_key=\"your api_key\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Read the data\n", + "df = pd.read_csv(\n", + " \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/air_passengers.csv\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Forecast\n", + "forecast_df = nixtla_client.forecast(\n", + " df=df,\n", + " h=12,\n", + " time_col=\"timestamp\",\n", + " target_col=\"value\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot predictions\n", + "nixtla_client.plot(\n", + " df=df, forecasts_df=forecast_df, time_col=\"timestamp\", target_col=\"value\"\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/sdk/python/foundation-models/nixtla/02_finetuning.ipynb b/sdk/python/foundation-models/nixtla/02_finetuning.ipynb new file mode 100644 index 0000000000..264a54591e --- /dev/null +++ b/sdk/python/foundation-models/nixtla/02_finetuning.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a3e70828-d972-4231-aa21-89e5ede59366", + "metadata": {}, + "source": [ + "# Prerequisites\n", + "\n", + "Please make sure to follow these steps to start using TimeGEN: \n", + "\n", + "* Register for a valid Azure account with subscription \n", + "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n", + "* Create a project and resource group\n", + "* Select `TimeGEN-1`.\n", + "\n", + " > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n", + "\n", + "* Deploy with \"Pay-as-you-go\"\n", + "\n", + "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n", + "\n", + "To complete this tutorial, you will need to:\n", + "\n", + "* Install `nixtla` and `pandas`:\n", + "\n", + " ```bash\n", + " pip install nixtla pandas\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "id": "da753996-54f8-4244-a34e-7316b0c01827", + "metadata": {}, + "source": [ + "# Fine-tuning" + ] + }, + { + "cell_type": "markdown", + "id": "75a62889-d81e-462e-b235-c1eba1096da9", + "metadata": {}, + "source": [ + "Fine-tuning is a powerful process for utilizing TimeGEN more effectively. Foundation models such as TimeGEN are pre-trained on vast amounts of data, capturing wide-ranging features and patterns. These models can then be specialized for specific contexts or domains. With fine-tuning, the model's parameters are refined to forecast a new task, allowing it to tailor its vast pre-existing knowledge towards the requirements of the new data. Fine-tuning thus serves as a crucial bridge, linking TimeGEN's broad capabilities to your tasks specificities.\n", + "\n", + "Concretely, the process of fine-tuning consists of performing a certain number of training iterations on your input data minimizing the forecasting error. The forecasts will then be produced with the updated model. To control the number of iterations, use the `finetune_steps` argument of the `forecast` method.\n", + "\n", + "To complete this tutorial, you will need to:\n", + "\n", + "* Install `nixtla` and `pandas`:\n", + "\n", + " ```bash\n", + " pip install nixtla pandas\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "id": "10ec4f03", + "metadata": {}, + "source": [ + "## 1. Import packages\n", + "First, we import the required packages and initialize the Nixtla client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98942108-d427-42d6-81f8-fa0bb5859395", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from nixtla import NixtlaClient" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64178d1c-957e-4a04-ab64-fde332b1840c", + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client = NixtlaClient(\n", + " base_url=\"you azure ai endpoint\",\n", + " api_key=\"your api_key\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8c2e5387", + "metadata": {}, + "source": [ + "## 2. Load data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b78cc83e-7d34-4c37-906d-8c7ed1a977fb", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\n", + " \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/air_passengers.csv\"\n", + ")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "09be4766", + "metadata": {}, + "source": [ + "## 3. Fine-tuning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a683abc7-190c-40a6-a4e8-41a4c64bd773", + "metadata": {}, + "outputs": [], + "source": [ + "timegpt_fcst_finetune_df = nixtla_client.forecast(\n", + " df=df,\n", + " h=12,\n", + " finetune_steps=10,\n", + " time_col=\"timestamp\",\n", + " target_col=\"value\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "545ffdac-f166-417b-993f-78f51b0db6a1", + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client.plot(\n", + " df,\n", + " timegpt_fcst_finetune_df,\n", + " time_col=\"timestamp\",\n", + " target_col=\"value\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "62fc9cba-7c6e-4aef-9c68-e05d4fe8f7ba", + "metadata": {}, + "source": [ + "In this code, `finetune_steps=10` means the model will go through 10 iterations of training on your time series data.\n", + "\n", + "Keep in mind that fine-tuning can be a bit of trial and error. You might need to adjust the number of `finetune_steps` based on your specific needs and the complexity of your data. It's recommended to monitor the model's performance during fine-tuning and adjust as needed. Be aware that more `finetune_steps` may lead to longer training times and could potentially lead to overfitting if not managed properly. \n", + "\n", + "Remember, fine-tuning is a powerful feature, but it should be used thoughtfully and carefully." + ] + }, + { + "cell_type": "markdown", + "id": "8c546351", + "metadata": {}, + "source": [ + "For a detailed guide on using a specific loss function for fine-tuning, check out the [Fine-tuning with a specific loss function](https://docs.nixtla.io/docs/tutorials-fine_tuning_with_a_specific_loss_function) tutorial." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sdk/python/foundation-models/nixtla/03_anomaly_detection.ipynb b/sdk/python/foundation-models/nixtla/03_anomaly_detection.ipynb new file mode 100644 index 0000000000..24c39c0062 --- /dev/null +++ b/sdk/python/foundation-models/nixtla/03_anomaly_detection.ipynb @@ -0,0 +1,253 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prerequisites\n", + "\n", + "Please make sure to follow these steps to start using TimeGEN: \n", + "\n", + "* Register for a valid Azure account with subscription \n", + "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n", + "* Create a project and resource group\n", + "* Select `TimeGEN-1`.\n", + "\n", + " > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n", + "\n", + "* Deploy with \"Pay-as-you-go\"\n", + "\n", + "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n", + "\n", + "To complete this tutorial, you will need to:\n", + "\n", + "* Install `nixtla` and `pandas`:\n", + "\n", + " ```bash\n", + " pip install nixtla pandas\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Anomaly detection\n", + "\n", + "Anomaly detection is task of detecting abnormal points, points that deviate from the normal behaviour of the general series. This is crucial in many application, such as cybersecurity or equipment monitoring.\n", + "\n", + "In this tutorial, we explore in detail the anomaly detection capability of TimeGEN.\n", + "\n", + "To complete this tutorial, you will need to:\n", + "\n", + "* Install `nixtla` and `pandas`:\n", + "\n", + " ```bash\n", + " pip install nixtla pandas\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import packages\n", + "\n", + "First, we import the required packages for this tutorial and create an instance of `NixtlaClient`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from nixtla import NixtlaClient" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client = NixtlaClient(\n", + " base_url=\"you azure ai endpoint\",\n", + " api_key=\"your api_key\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load dataset\n", + "\n", + "Now, let's load the dataset for this tutorial. We use the Peyton Manning dataset which tracks the visits to the Wikipedia page of Peyton Mannig." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\n", + " \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/peyton_manning.csv\"\n", + ")\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client.plot(\n", + " df, time_col=\"timestamp\", target_col=\"value\", max_insample_length=365\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Anomaly detection\n", + "\n", + "We now perform anomaly detection. By default, TimeGEN uses a 99% confidence interval. If a point falls outisde of that interval, it is considered to be an anomaly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "anomalies_df = nixtla_client.detect_anomalies(\n", + " df,\n", + " time_col=\"timestamp\",\n", + " target_col=\"value\",\n", + " freq=\"D\",\n", + ")\n", + "\n", + "anomalies_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, 0 is assigned to \"normal\" values, as they fall inside the confidence interval. A label of 1 is then assigned to abnormal points.\n", + "\n", + "We can also plot the anomalies using `NixtlaClient`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client.plot(df, anomalies_df, time_col=\"timestamp\", target_col=\"value\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Anomaly detection with exogenous features\n", + "\n", + "Previously, we performed anomaly detection without using any exogenous features. Now, it is possible to create features specifically for this scnenario to inform the model in its task of anomaly detection.\n", + "\n", + "Here, we create date features that can be used by the model." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is done using the `date_features` argument. We can set it to `True` and it will generate all possible features from the given dates and frequency of the data. Alternatively, we can specify a list of features that we want. In this case, we want only features at the *month* and *year* level." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "anomalies_df_x = nixtla_client.detect_anomalies(\n", + " df,\n", + " time_col=\"timestamp\",\n", + " target_col=\"value\",\n", + " freq=\"D\",\n", + " date_features=[\"month\", \"year\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then, we can plot the weights of each feature to understand its impact on anomaly detection." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client.weights_x.plot.barh(x=\"features\", y=\"weights\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Modifying the confidence intervals\n", + "\n", + "We can tweak the confidence intervals using the `level` argument. This takes any values between 0 and 100, including decimal numbers.\n", + "\n", + "Reducing the confidence interval resutls in more anomalies being detected, while increasing it will reduce the number of anomalies.\n", + "\n", + "Here, for example, we reduce the interval to 70%, and we will notice more anomalies being plotted (red dots)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "anomalies_df = nixtla_client.detect_anomalies(\n", + " df,\n", + " time_col=\"timestamp\",\n", + " target_col=\"value\",\n", + " freq=\"D\",\n", + " level=70,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client.plot(df, anomalies_df, time_col=\"timestamp\", target_col=\"value\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/sdk/python/foundation-models/nixtla/04_exogenous_variables.ipynb b/sdk/python/foundation-models/nixtla/04_exogenous_variables.ipynb new file mode 100644 index 0000000000..4573cebeac --- /dev/null +++ b/sdk/python/foundation-models/nixtla/04_exogenous_variables.ipynb @@ -0,0 +1,444 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "745cabf8-eadb-4cd9-98c4-41e13d6b791f", + "metadata": {}, + "source": [ + "# Prerequisites\n", + "\n", + "Please make sure to follow these steps to start using TimeGEN: \n", + "\n", + "* Register for a valid Azure account with subscription \n", + "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n", + "* Create a project and resource group\n", + "* Select `TimeGEN-1`.\n", + "\n", + " > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n", + "\n", + "* Deploy with \"Pay-as-you-go\"\n", + "\n", + "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n", + "\n", + "To complete this tutorial, you will need to:\n", + "\n", + "* Install `nixtla` and `pandas`:\n", + "\n", + " ```bash\n", + " pip install nixtla pandas\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "id": "24c899f2-78c1-43b2-8347-3164e3549c3f", + "metadata": {}, + "source": [ + "# Exogenous variables" + ] + }, + { + "cell_type": "markdown", + "id": "a81fc39a-c6a0-485d-a3f3-c3a6298928a6", + "metadata": {}, + "source": [ + "Exogenous variables or external factors are crucial in time series forecasting as they provide additional information that might influence the prediction. These variables could include holiday markers, marketing spending, weather data, or any other external data that correlate with the time series data you are forecasting.\n", + "\n", + "For example, if you're forecasting ice cream sales, temperature data could serve as a useful exogenous variable. On hotter days, ice cream sales may increase.\n", + "\n", + "To incorporate exogenous variables in TimeGEN, you'll need to pair each point in your time series data with the corresponding external data.\n", + "\n", + "To complete this tutorial, you will need to:\n", + "\n", + "* Install `nixtla` and `pandas`:\n", + "\n", + " ```bash\n", + " pip install nixtla pandas\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "id": "bfa2ede9", + "metadata": {}, + "source": [ + "## 1. Import packages\n", + "First, we import the required packages and initialize the Nixtla client." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a84a0f65-e084-4e65-a0fb-d27c184dde44", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from nixtla import NixtlaClient" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "469d474a-c427-427c-a127-d140aeba0354", + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client = NixtlaClient(\n", + " base_url=\"you azure ai endpoint\",\n", + " api_key=\"your api_key\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "054c3cad", + "metadata": {}, + "source": [ + "## 2. Load data" + ] + }, + { + "cell_type": "markdown", + "id": "bc2bb3db-00e6-44e6-8dc3-a2e0eba7e295", + "metadata": {}, + "source": [ + "Let's see an example on predicting day-ahead electricity prices. The following dataset contains the hourly electricity price (`y` column) for five markets in Europe and US, identified by the `unique_id` column. The columns from `Exogenous1` to `day_6` are exogenous variables that TimeGEN will use to predict the prices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fec19dc-48dd-4337-8678-fe3753b5eb30", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\n", + " \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-with-ex-vars.csv\"\n", + ")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "b8f00038", + "metadata": {}, + "source": [ + "## 3. Forecasting electricity prices using exogenous variables" + ] + }, + { + "cell_type": "markdown", + "id": "eed41a2f-67ce-4812-8073-18b271b1592d", + "metadata": {}, + "source": [ + "To produce forecasts we also have to add the future values of the exogenous variables. Let's read this dataset. In this case, we want to predict 24 steps ahead, therefore each `unique_id` will have 24 observations.\n", + "\n", + "::: {.callout-important}\n", + "If you want to use exogenous variables when forecasting with TimeGEN, you need to have the future values of those exogenous variables too.\n", + "::: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95c03577-25f3-479f-a76e-fd5e4632da96", + "metadata": {}, + "outputs": [], + "source": [ + "future_ex_vars_df = pd.read_csv(\n", + " \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-future-ex-vars.csv\"\n", + ")\n", + "future_ex_vars_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "99f1e41d-e5bf-4d01-aa68-1a7a7fbb579b", + "metadata": {}, + "source": [ + "Let's call the `forecast` method, adding this information:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d252a0e0-f393-4957-8173-230972fc7a40", + "metadata": {}, + "outputs": [], + "source": [ + "fcst_ex_vars_df = nixtla_client.forecast(\n", + " df=df, X_df=future_ex_vars_df, h=24, level=[80, 90]\n", + ")\n", + "fcst_ex_vars_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18f36e5c-f41f-4888-b279-97558b71c1bf", + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client.plot(\n", + " df[[\"unique_id\", \"ds\", \"y\"]],\n", + " fcst_ex_vars_df,\n", + " max_insample_length=365,\n", + " level=[80, 90],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e945ad3c-63fd-4e51-9815-336306f60463", + "metadata": {}, + "source": [ + "We can also show the importance of the features." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef1c9df3-eff2-4984-a88f-00274b21b3cd", + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client.weights_x.plot.barh(x=\"features\", y=\"weights\")" + ] + }, + { + "cell_type": "markdown", + "id": "80c432bd", + "metadata": {}, + "source": [ + "This plot shows that `Exogenous1` and `Exogenous2` are the most important for this forecasting task, as they have the largest weight." + ] + }, + { + "cell_type": "markdown", + "id": "38cd05d6", + "metadata": {}, + "source": [ + "## 4. How to generate future exogenous variables?" + ] + }, + { + "cell_type": "markdown", + "id": "b4b6b0e4", + "metadata": {}, + "source": [ + "In the example above, we just loaded the future exogenous variables. Often, these are not available because these variables are unknown. Hence, we need to forecast these too. \n", + "\n", + "::: {.callout-important}\n", + "If you would only include historic exogenous variables in your model, you would be _implicitly_ making assumptions about the future of these exogenous variables in your forecast. That's why TimeGEN requires you to explicitly incorporate the future of these exogenous variables too, so that you make your assumptions about these variables _explicit_.\n", + "::: \n", + "\n", + "Below, we'll show you how we can also forecast `Exogenous1` and `Exogenous2` separately, so that you can generate the future exogenous variables in case they are not available." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce9a9bce", + "metadata": {}, + "outputs": [], + "source": [ + "# We read the data and create separate dataframes for the historic exogenous that we want to forecast separately.\n", + "df = pd.read_csv(\n", + " \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-with-ex-vars.csv\"\n", + ")\n", + "df_exog1 = df[[\"unique_id\", \"ds\", \"Exogenous1\"]]\n", + "df_exog2 = df[[\"unique_id\", \"ds\", \"Exogenous2\"]]" + ] + }, + { + "cell_type": "markdown", + "id": "2b5abf7f", + "metadata": {}, + "source": [ + "Next, we can use TimeGEN to forecast `Exogenous1` and `Exogenous2`. In this case, we assume these quantities can be separately forecast." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91eaa3f6", + "metadata": {}, + "outputs": [], + "source": [ + "fcst_ex1 = nixtla_client.forecast(df=df_exog1, h=24, target_col=\"Exogenous1\")\n", + "fcst_ex2 = nixtla_client.forecast(df=df_exog2, h=24, target_col=\"Exogenous2\")" + ] + }, + { + "cell_type": "markdown", + "id": "44b4b4fd", + "metadata": {}, + "source": [ + "We can now start creating `X_df`, which contains the future exogenous variables." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53cfe8e9", + "metadata": {}, + "outputs": [], + "source": [ + "fcst_ex1 = fcst_ex1.rename(columns={\"TimeGPT\": \"Exogenous1\"})\n", + "fcst_ex2 = fcst_ex2.rename(columns={\"TimeGPT\": \"Exogenous2\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f62cec4a", + "metadata": {}, + "outputs": [], + "source": [ + "X_df = fcst_ex1.merge(fcst_ex2)" + ] + }, + { + "cell_type": "markdown", + "id": "697fc8e4", + "metadata": {}, + "source": [ + "Next, we also need to add the `day_0` to `day_6` future exogenous variables. These are easy: this is just the weekday, which we can extract from the `ds` column." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "322e0197", + "metadata": {}, + "outputs": [], + "source": [ + "# We have 7 days, for each day a separate column denoting 1/0\n", + "for i in range(7):\n", + " X_df[f\"day_{i}\"] = 1 * (pd.to_datetime(X_df[\"ds\"]).dt.weekday == i)" + ] + }, + { + "cell_type": "markdown", + "id": "f8113866", + "metadata": {}, + "source": [ + "We have now created `X_df`, let's investigate it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38ef56b7", + "metadata": {}, + "outputs": [], + "source": [ + "X_df.head(10)" + ] + }, + { + "cell_type": "markdown", + "id": "ac313e02", + "metadata": {}, + "source": [ + "Let's compare it to our pre-loaded version:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2a0f524", + "metadata": {}, + "outputs": [], + "source": [ + "future_ex_vars_df.head(10)" + ] + }, + { + "cell_type": "markdown", + "id": "716d3d38", + "metadata": {}, + "source": [ + "As you can see, the values for `Exogenous1` and `Exogenous2` are slightly different, which makes sense because we've made a forecast of these values with TimeGEN." + ] + }, + { + "cell_type": "markdown", + "id": "f78ab3ff", + "metadata": {}, + "source": [ + "Let's create a new forecast of our electricity prices with TimeGEN using our new `X_df`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a104659d", + "metadata": {}, + "outputs": [], + "source": [ + "fcst_ex_vars_df_new = nixtla_client.forecast(df=df, X_df=X_df, h=24, level=[80, 90])\n", + "fcst_ex_vars_df_new.head()" + ] + }, + { + "cell_type": "markdown", + "id": "b2ba53a1", + "metadata": {}, + "source": [ + "Let's create a combined dataframe with the two forecasts and plot the values to compare the forecasts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62f20711", + "metadata": {}, + "outputs": [], + "source": [ + "fcst_ex_vars_df = fcst_ex_vars_df.rename(\n", + " columns={\"TimeGPT\": \"TimeGPT-provided_exogenous\"}\n", + ")\n", + "fcst_ex_vars_df_new = fcst_ex_vars_df_new.rename(\n", + " columns={\"TimeGPT\": \"TimeGPT-forecasted_exogenous\"}\n", + ")\n", + "\n", + "forecasts = fcst_ex_vars_df[[\"unique_id\", \"ds\", \"TimeGPT-provided_exogenous\"]].merge(\n", + " fcst_ex_vars_df_new[[\"unique_id\", \"ds\", \"TimeGPT-forecasted_exogenous\"]]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54fcf5cd", + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client.plot(\n", + " df[[\"unique_id\", \"ds\", \"y\"]],\n", + " forecasts,\n", + " max_insample_length=365,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "019c6510", + "metadata": {}, + "source": [ + "As you can see, we obtain a slightly different forecast if we use our forecasted exogenous variables. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sdk/python/foundation-models/nixtla/05_demand_forecasting.ipynb b/sdk/python/foundation-models/nixtla/05_demand_forecasting.ipynb new file mode 100644 index 0000000000..fad4738c6a --- /dev/null +++ b/sdk/python/foundation-models/nixtla/05_demand_forecasting.ipynb @@ -0,0 +1,605 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prerequisites\n", + "\n", + "Please make sure to follow these steps to start using TimeGEN: \n", + "\n", + "* Register for a valid Azure account with subscription \n", + "* Make sure you have access to [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home)\n", + "* Create a project and resource group\n", + "* Select `TimeGEN-1`.\n", + "\n", + " > Notice that some models may not be available in all the regions in Azure AI and Azure Machine Learning. On those cases, you can create a workspace or project in the region where the models are available and then consume it with a connection from a different one. To learn more about using connections see [Consume models with connections](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deployments-connections)\n", + "\n", + "* Deploy with \"Pay-as-you-go\"\n", + "\n", + "Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.\n", + "\n", + "To complete this tutorial, you will need to:\n", + "\n", + "* Install `nixtla` and `pandas`:\n", + "\n", + " ```bash\n", + " pip install nixtla pandas\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Forecasting Demand" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this tutorial, we show how to use TimeGEN on an intermittent series where we have many values at zero. Here, we use a subset of the M5 dataset that tracks the demand for food items in a Californian store. The dataset also includes exogenous variables like the sell price and the type of event occuring at a particular day.\n", + "\n", + "TimeGEN achieves the best performance at a MAE of 0.49, which represents a **14% improvement** over the best statistical model specifically built to handle intermittent time series data.\n", + "\n", + "To complete this tutorial, you will need to:\n", + "\n", + "* Install `nixtla`, `pandas`, `numpy`, `utilsforecast`, `statsforecast`:\n", + "\n", + " ```bash\n", + " pip install nixtla pandas numpy utilsforecast statsforecast\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initial setup\n", + "\n", + "We start off by importing the required packages for this tutorial and create an instace of `NixtlaClient`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from nixtla import NixtlaClient\n", + "\n", + "from utilsforecast.losses import mae\n", + "from utilsforecast.evaluation import evaluate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client = NixtlaClient(\n", + " base_url=\"you azure ai endpoint\",\n", + " api_key=\"your api_key\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now read the dataset and plot it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\n", + " \"https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/m5_sales_exog_small.csv\"\n", + ")\n", + "df[\"ds\"] = pd.to_datetime(df[\"ds\"])\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client.plot(\n", + " df,\n", + " max_insample_length=365,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the figure above, we can see the intermittent nature of this dataset, with many periods with zero demand.\n", + "\n", + "Now, let's use TimeGEN to forecast the demand of each product." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bounded forecasts\n", + "\n", + "To avoid getting negative predictions coming from the model, we use a log transformation on the data. That way, the model will be forced to predict only positive values.\n", + "\n", + "Note that due to the presence of zeros in our dataset, we add one to all points before taking the log." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_transformed = df.copy()\n", + "\n", + "df_transformed[\"y\"] = np.log(df_transformed[\"y\"] + 1)\n", + "\n", + "df_transformed.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's keep the last 28 time steps for the test set and use the rest as input to the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_df = df_transformed.groupby(\"unique_id\").tail(28)\n", + "\n", + "input_df = df_transformed.drop(test_df.index).reset_index(drop=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Forecasting with TimeGEN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "start = time.time()\n", + "\n", + "fcst_df = nixtla_client.forecast(\n", + " df=input_df,\n", + " h=28,\n", + " level=[80], # Generate a 80% confidence interval\n", + " finetune_steps=10, # Specify the number of steps for fine-tuning\n", + " finetune_loss=\"mae\", # Use the MAE as the loss function for fine-tuning\n", + " time_col=\"ds\",\n", + " target_col=\"y\",\n", + " id_col=\"unique_id\",\n", + ")\n", + "\n", + "end = time.time()\n", + "\n", + "TimeGEN_duration = end - start\n", + "\n", + "print(f\"Time (TimeGEN): {TimeGEN_duration}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great! TimeGEN was done in **5.8 seconds** and we now have predictions. However, those predictions are transformed, so we need to inverse the transformation to get back to the orignal scale. Therefore, we take the exponential and subtract one from each data point." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cols = [col for col in fcst_df.columns if col not in [\"ds\", \"unique_id\"]]\n", + "\n", + "for col in cols:\n", + " fcst_df[col] = np.exp(fcst_df[col]) - 1\n", + "\n", + "fcst_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before measuring the performance metric, let's plot the predictions against the actual values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nixtla_client.plot(\n", + " test_df, fcst_df, models=[\"TimeGPT\"], level=[80], time_col=\"ds\", target_col=\"y\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we can measure the mean absolute error (MAE) of the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fcst_df[\"ds\"] = pd.to_datetime(fcst_df[\"ds\"])\n", + "\n", + "test_df = pd.merge(test_df, fcst_df, \"left\", [\"unique_id\", \"ds\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "evaluation = evaluate(\n", + " test_df, metrics=[mae], models=[\"TimeGPT\"], target_col=\"y\", id_col=\"unique_id\"\n", + ")\n", + "\n", + "average_metrics = evaluation.groupby(\"metric\")[\"TimeGPT\"].mean()\n", + "average_metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Forecasting with statistical models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The library `statsforecast` by Nixtla provides a suite of statistical models specifically built for intermittent forecasting, such as Croston, IMAPA and TSB. Let's use these models and see how they perform against TimeGEN." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from statsforecast import StatsForecast\n", + "from statsforecast.models import CrostonClassic, CrostonOptimized, IMAPA, TSB" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we use four models: two versions of Croston, IMAPA and TSB." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "models = [CrostonClassic(), CrostonOptimized(), IMAPA(), TSB(0.1, 0.1)]\n", + "\n", + "sf = StatsForecast(models=models, freq=\"D\", n_jobs=-1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then, we can fit the models on our data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "start = time.time()\n", + "\n", + "sf.fit(df=input_df)\n", + "\n", + "sf_preds = sf.predict(h=28)\n", + "\n", + "end = time.time()\n", + "\n", + "sf_duration = end - start\n", + "\n", + "print(f\"Statistical models took :{sf_duration}s\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, fitting and predicting with four statistical models took 5.2 seconds, while TimeGEN took 5.8 seconds, so TimeGEN was only 0.6 seconds slower." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Again, we need to inverse the transformation. Remember that the training data was previously transformed using the log function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cols = [col for col in sf_preds.columns if col not in [\"ds\", \"unique_id\"]]\n", + "\n", + "for col in cols:\n", + " sf_preds[col] = np.exp(sf_preds[col]) - 1\n", + "\n", + "sf_preds.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's combine the predictions from all methods and see which performs best." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_df = pd.merge(test_df, sf_preds, \"left\", [\"unique_id\", \"ds\"])\n", + "test_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "evaluation = evaluate(\n", + " test_df,\n", + " metrics=[mae],\n", + " models=[\"TimeGPT\", \"CrostonClassic\", \"CrostonOptimized\", \"IMAPA\", \"TSB\"],\n", + " target_col=\"y\",\n", + " id_col=\"unique_id\",\n", + ")\n", + "\n", + "average_metrics = evaluation.groupby(\"metric\")[\n", + " [\"TimeGPT\", \"CrostonClassic\", \"CrostonOptimized\", \"IMAPA\", \"TSB\"]\n", + "].mean()\n", + "average_metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the table above, we can see that TimeGEN achieves the lowest MAE, achieving a 12.8% improvement over the best performing statistical model.\n", + "\n", + "Now, this was done without using any of the available exogenous features. While the statsitical models do not support them, let's try including them in TimeGEN." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Forecasting with exogenous variables using TimeGEN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To forecast with exogenous variables, we need to specify their future values over the forecast horizon. Therefore, let's simply take the types of events, as those dates are known in advance. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "futr_exog_df = test_df.drop(\n", + " [\n", + " \"TimeGPT\",\n", + " \"CrostonClassic\",\n", + " \"CrostonOptimized\",\n", + " \"IMAPA\",\n", + " \"TSB\",\n", + " \"y\",\n", + " \"TimeGPT-lo-80\",\n", + " \"TimeGPT-hi-80\",\n", + " \"sell_price\",\n", + " ],\n", + " axis=1,\n", + ")\n", + "futr_exog_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then, we simply call the `forecast` method and pass the `futr_exog_df` in the `X_df` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "start = time.time()\n", + "\n", + "fcst_df = nixtla_client.forecast(\n", + " df=input_df,\n", + " X_df=futr_exog_df,\n", + " h=28,\n", + " level=[80], # Generate a 80% confidence interval\n", + " finetune_steps=10, # Specify the number of steps for fine-tuning\n", + " finetune_loss=\"mae\", # Use the MAE as the loss function for fine-tuning\n", + " time_col=\"ds\",\n", + " target_col=\"y\",\n", + " id_col=\"unique_id\",\n", + ")\n", + "\n", + "end = time.time()\n", + "\n", + "TimeGEN_duration = end - start\n", + "\n", + "print(f\"Time (TimeGEN): {TimeGEN_duration}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great! Remember that the predictions are transformed, so we have to inverse the transformation again." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fcst_df.rename(\n", + " columns={\n", + " \"TimeGPT\": \"TimeGPT_ex\",\n", + " },\n", + " inplace=True,\n", + ")\n", + "\n", + "cols = [col for col in fcst_df.columns if col not in [\"ds\", \"unique_id\"]]\n", + "\n", + "for col in cols:\n", + " fcst_df[col] = np.exp(fcst_df[col]) - 1\n", + "\n", + "fcst_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, let's evaluate the performance of TimeGEN with exogenous features." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_df[\"TimeGPT_ex\"] = fcst_df[\"TimeGPT_ex\"].values\n", + "test_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "evaluation = evaluate(\n", + " test_df,\n", + " metrics=[mae],\n", + " models=[\n", + " \"TimeGPT\",\n", + " \"CrostonClassic\",\n", + " \"CrostonOptimized\",\n", + " \"IMAPA\",\n", + " \"TSB\",\n", + " \"TimeGPT_ex\",\n", + " ],\n", + " target_col=\"y\",\n", + " id_col=\"unique_id\",\n", + ")\n", + "\n", + "average_metrics = evaluation.groupby(\"metric\")[\n", + " [\"TimeGPT\", \"CrostonClassic\", \"CrostonOptimized\", \"IMAPA\", \"TSB\", \"TimeGPT_ex\"]\n", + "].mean()\n", + "average_metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From the table above, we can see that using exogenous features improved the performance of TimeGEN. Now, it represents a 14% improvement over the best statistical model. \n", + "\n", + "Using TimeGEN with exogenous features took 6.8 seconds. This is 1.6 seconds slower than statitstical models, but it resulted in much better predictions." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}