In [None]:
import os

import pandas as pd
from nixtla import NixtlaClient

# Anomaly detection

## Import packages

Import required libraries for data manipulation and Nixtla client initialization.

In [None]:
NIXTLA_API_KEY = os.environ["NIXTLA_API_KEY"]
nixtla_client = NixtlaClient(api_key=NIXTLA_API_KEY)

Initialize Nixtla client with API key from environment variables.

## Load dataset

Now, let's load the dataset for this tutorial.

In [None]:
# Read the dataset
wikipedia = pd.read_csv("https://datasets-nixtla.s3.amazonaws.com/peyton-manning.csv", parse_dates=["ds"])
wikipedia.head(10)

Load the Peyton Manning Wikipedia page views dataset and display first 10 rows.

In [None]:
wikipedia_plot = nixtla_client.plot(wikipedia)
wikipedia_plot

Plot the time series data to visualize the patterns.

In [None]:
anomalies_df = nixtla_client.detect_anomalies(
    wikipedia,
    freq="D",
    model="timegpt-1",
)
anomalies_df.head()

## Anomaly detection

Detect anomalies in the time series using TimeGPT model with default settings.

In [None]:
anomaly_plot = nixtla_client.plot(wikipedia, anomalies_df)
anomaly_plot

Visualize the detected anomalies on the time series plot.

In [None]:
anomalies_df_exogenous = nixtla_client.detect_anomalies(
    wikipedia,
    freq="D",
    date_features=["month", "year"],
    date_features_to_one_hot=True,
    model="timegpt-1",
)

## Anomaly detection with exogenous features

Detect anomalies using TimeGPT with additional date-based features (month and year).

In [None]:
feature_plot = nixtla_client.weights_x.plot.barh(
    x="features",
    y="weights"
)

feature_plot

Plot the feature importance weights to understand which features contribute most to anomaly detection.

In [None]:
# Without exogenous features
print("Number of anomalies without exogenous features:", anomalies_df.anomaly.sum())

# With exogenous features
print("Number of anomalies with exogenous features:", anomalies_df_exogenous.anomaly.sum())

Compare the number of anomalies detected with and without exogenous features.

In [None]:
anomalies_exogenous_plot = nixtla_client.plot(wikipedia, anomalies_df_exogenous)
anomalies_exogenous_plot

Visualize the anomalies detected using the model with exogenous features.

In [None]:
anomalies_exogenous_plot.savefig("images/anomalies_exogenous_plot.svg", format="svg", bbox_inches="tight")

In [None]:
anomalies_df_70 = nixtla_client.detect_anomalies(wikipedia, freq="D", level=70)

## Modifying the confidence intervals

Detect anomalies using a lower confidence interval (70%) to see how it affects the results.

In [None]:
# Print and compare anomaly counts
print("Number of anomalies with 99% confidence interval:", anomalies_df.anomaly.sum())
print("Number of anomalies with 70% confidence interval:", anomalies_df_70.anomaly.sum())

Compare the number of anomalies detected with different confidence intervals (99% vs 70%).

In [None]:
anomalies_70_plot = nixtla_client.plot(wikipedia, anomalies_df_70)
anomalies_70_plot

Visualize the anomalies detected using the 70% confidence interval.

In [None]:
anomalies_70_plot.savefig("images/anomalies_70_plot.svg", format="svg", bbox_inches="tight")