# Nixtla Client

In [None]:
#| default_exp nixtla_client

In [None]:
#| hide 
%load_ext autoreload
%autoreload 2

In [None]:
#| export
import logging
import math
import os
import warnings
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union

import httpx
import numpy as np
import pandas as pd
import utilsforecast.processing as ufp
from pydantic import NonNegativeInt, PositiveInt
from utilsforecast.compat import DataFrame, pl_DataFrame
from utilsforecast.feature_engineering import _add_time_features, time_features
from utilsforecast.validation import validate_format, validate_freq

from nixtla.core.api_error import ApiError
from nixtla.core.http_client import HttpClient
from nixtla.utils import _restrict_input_samples

In [None]:
#| export
# logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logging.getLogger('httpx').setLevel(logging.ERROR)

In [None]:
#| hide
from itertools import product

from dotenv import load_dotenv
from fastcore.test import test_eq, test_fail, test_warns
from nbdev.showdoc import show_doc

In [None]:
#| hide
load_dotenv()
logging.getLogger('statsforecast').setLevel(logging.ERROR)

In [None]:
#| exporti
_LOSS = Literal["default", "mae", "mse", "rmse", "mape", "smape"]
_MODEL = Literal["azureai", "timegpt-1", "timegpt-1-long-horizon"]

_date_features_by_freq = {
    # Daily frequencies
    'B': ['year', 'month', 'day', 'weekday'],
    'C': ['year', 'month', 'day', 'weekday'],
    'D': ['year', 'month', 'day', 'weekday'],
    # Weekly
    'W': ['year', 'week', 'weekday'],
    # Monthly
    'M': ['year', 'month'],
    'SM': ['year', 'month', 'day'],
    'BM': ['year', 'month'],
    'CBM': ['year', 'month'],
    'MS': ['year', 'month'],
    'SMS': ['year', 'month', 'day'],
    'BMS': ['year', 'month'],
    'CBMS': ['year', 'month'],
    # Quarterly
    'Q': ['year', 'quarter'],
    'BQ': ['year', 'quarter'],
    'QS': ['year', 'quarter'],
    'BQS': ['year', 'quarter'],
    # Yearly
    'A': ['year'],
    'Y': ['year'],
    'BA': ['year'],
    'BY': ['year'],
    'AS': ['year'],
    'YS': ['year'],
    'BAS': ['year'],
    'BYS': ['year'],
    # Hourly
    'BH': ['year', 'month', 'day', 'hour', 'weekday'],
    'H': ['year', 'month', 'day', 'hour'],
    # Minutely
    'T': ['year', 'month', 'day', 'hour', 'minute'],
    'min': ['year', 'month', 'day', 'hour', 'minute'],
    # Secondly
    'S': ['year', 'month', 'day', 'hour', 'minute', 'second'],
    # Milliseconds
    'L': ['year', 'month', 'day', 'hour', 'minute', 'second', 'millisecond'],
    'ms': ['year', 'month', 'day', 'hour', 'minute', 'second', 'millisecond'],
    # Microseconds
    'U': ['year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond'],
    'us': ['year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond'],
    # Nanoseconds
    'N': []
}

In [None]:
#| export
class NixtlaClient:

    def __init__(
        self,
        api_key: Optional[str] = None,
        base_url: Optional[str] = None,
        timeout: int = 60,
        max_retries: int = 6,
    ):
        if api_key is None:
            api_key = os.environ['NIXTLA_API_KEY']
        if base_url is None:
            base_url = os.getenv('NIXTLA_BASE_URL', 'https://api.nixtla.io')
        self._client_kwargs = {
            'base_url': base_url,
            'headers': {'Authorization': f'Bearer {api_key}'},
            'timeout': timeout,
        }
        self.max_retries = max_retries
        self._model_params: Dict[Tuple[str, str], Tuple[int, int]] = {}

    def _make_request(self, client: HttpClient, endpoint: str, payload: Dict[str, Any]) -> Dict[str, Any]:
        resp = client.request(
            method='post',
            url=endpoint,
            json=payload,
            max_retries=self.max_retries,
        )
        resp_body = resp.json()
        if resp.status_code != 200:
            raise ApiError(status_code=resp.status_code, body=resp_body)
        if 'data' in resp_body:
            resp_body = resp_body['data']
        return resp_body

    def _make_partitioned_requests(
        self,
        client: HttpClient,
        endpoint: str,
        payloads: List[Dict[str, Any]],
    ) -> Dict[str, Any]:
        from tqdm.auto import tqdm

        num_partitions = len(payloads)
        results = num_partitions * [None]
        max_workers = min(10, num_partitions)
        with ThreadPoolExecutor(max_workers) as executor:
            future2pos = {
                executor.submit(self._make_request, client, endpoint, payload): i
                for i, payload in enumerate(payloads)
            }
            for future in tqdm(as_completed(future2pos), total=len(future2pos)):
                pos = future2pos[future]
                results[pos] = future.result()
        resp = {"mean": np.hstack([res["mean"] for res in results])}
        for k in ('sizes', 'anomaly', 'y'):
            if k in results[0]:
                resp[k] = np.hstack([res[k] for res in results])
        first_res = results[0]
        if first_res["intervals"] is None:
            resp["intervals"] = None
        else:
            resp["intervals"] = {}
            for k in first_res["intervals"].keys():
                resp["intervals"][k] = np.hstack(
                    [res["intervals"][k] for res in results]
                )
        if "weights_x" not in first_res:
            resp["weights_x"] = None
        else:
            resp["weights_x"] = [res["weights_x"] for res in results]
        return resp

    def _get_model_params(self, model: str, freq: str) -> Tuple[int, int]:
        key = (model, freq)
        if key not in self._model_params:
            logger.info('Querying model metadata...')
            payload = {'model': model, 'freq': freq}
            with httpx.Client(**self._client_kwargs) as httpx_client:
                client = HttpClient(httpx_client=httpx_client)
                params = self._make_request(client, 'model_params', payload)['detail']
            self._model_params[key] = (params['input_size'], params['horizon'])
        return self._model_params[key]

    @staticmethod
    def _maybe_infer_freq(
        df: DataFrame,
        freq: Optional[str],
        id_col: str,
        time_col: str,
    ) -> str:
        if freq is not None and freq not in ['W', 'M', 'Q', 'Y', 'A']:
            return freq
        if isinstance(df, pl_DataFrame):
            raise ValueError(
                "Cannot infer frequency for a polars DataFrame, please set the "
                "`freq` argument to a valid polars offset.\nYou can find them at "
                "https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.offset_by.html"
            )
        assert isinstance(df, pd.DataFrame)
        sizes = df[id_col].value_counts(sort=True)
        times = df.loc[df[id_col] == sizes.index[0], time_col]
        if times.dt.tz is not None:
            times = times.dt.tz_convert('UTC').dt.tz_localize(None)
        inferred_freq = pd.infer_freq(times)
        if inferred_freq is None:
            raise RuntimeError(
                'Could not infer the frequency of the time column. This could be due '
                'to inconsistent intervals. Please check your data for missing, '
                'duplicated or irregular timestamps'
            )
        if freq is not None:
            # check we have the same base frequency
            # except when we have yearly frequency (A, and Y means the same)
            if (freq != inferred_freq[0] and freq != 'Y') or (freq == 'Y' and inferred_freq[0] != 'A'):
                raise RuntimeError(f'Failed to infer special date, inferred freq {inferred_freq}')
        logger.info(f'Inferred freq: {inferred_freq}')
        return inferred_freq

    @staticmethod
    def _standardize_freq(freq: str) -> str:
        return freq.replace('mo', 'MS')

    @staticmethod
    def _tail(proc: ufp.ProcessedDF, n: int) -> ufp.ProcessedDF:
        n_series = proc.indptr.size - 1
        new_sizes = np.minimum(np.diff(proc.indptr), n)
        new_indptr = np.append(0, new_sizes.cumsum())
        new_data = np.empty_like(proc.data, shape=(new_indptr[-1], proc.data.shape[1]))
        for i in range(n_series):
            new_data[new_indptr[i] : new_indptr[i + 1]] = proc.data[
                proc.indptr[i + 1] - new_sizes[i] : proc.indptr[i + 1]
            ]
        return ufp.ProcessedDF(
            uids=proc.uids,
            last_times=proc.last_times,
            data=new_data,
            indptr=new_indptr,
            sort_idxs=None,
        )

    @staticmethod
    def _partition_series(
        payload: Dict[str, Any], n_part: int, h: int
    ) -> List[Dict[str, Any]]:
        parts = []
        series = payload.pop("series")
        n_series = len(series["sizes"])
        n_part = min(n_part, n_series)
        series_per_part = math.ceil(n_series / n_part)
        prev_size = 0
        for i in range(0, n_series, series_per_part):
            sizes = series["sizes"][i : i + series_per_part]
            curr_size = sum(sizes)
            part_idxs = slice(prev_size, prev_size + curr_size)
            prev_size += curr_size
            part_series = {
                "y": series["y"][part_idxs],
                "sizes": sizes,
            }
            if series["X"] is None:
                part_series["X"] = None
                if h > 0:
                    part_series["X_future"] = None
            else:
                part_series["X"] = [x[part_idxs] for x in series["X"]]
                if h > 0:
                    part_series["X_future"] = [
                        x[i * h : (i + series_per_part) * h] for x in series["X_future"]
                    ]
            parts.append({"series": part_series, **payload})
        return parts

    @staticmethod
    def _maybe_add_date_features(
        df: DataFrame,
        X_df: Optional[DataFrame],
        features: Union[bool, List[Union[str, Callable]]],
        one_hot: Union[bool, List[str]],
        freq: str,
        h: int,
        id_col: str,
        time_col: str,
        target_col: str,
    ) -> Tuple[DataFrame, Optional[DataFrame]]:
        if not features:
            return df, X_df
        if isinstance(features, list):
            date_features = features
        else:
            date_features = _date_features_by_freq.get(freq, [])
            if not date_features:
                warnings.warn(
                    f'Non default date features for {freq} '
                    'please provide a list of date features'
                )
        # add features
        if X_df is None:
            df, X_df = time_features(
                df=df,
                freq=freq,
                features=date_features,
                h=h,
                id_col=id_col,
                time_col=time_col,
            )
        else:
            df = _add_time_features(df, features=date_features, time_col=time_col)
            X_df = _add_time_features(X_df, features=date_features,time_col=time_col)
        # one hot
        if isinstance(one_hot, list):
            features_one_hot = one_hot
        elif one_hot:
            features_one_hot = [f for f in date_features if not callable(f)]
        else:
            features_one_hot = []
        if features_one_hot:
            X_df = ufp.assign_columns(X_df, target_col, 0)
            full_df = ufp.vertical_concat([df, X_df])
            if isinstance(full_df, pd.DataFrame):
                full_df = pd.get_dummies(
                    full_df, columns=features_one_hot, dtype='float32'
                )
            else:
                full_df = full_df.to_dummies(columns=features_one_hot)
            df = ufp.take_rows(full_df, slice(0, df.shape[0]))
            X_df = ufp.take_rows(full_df, slice(df.shape[0], full_df.shape[0]))
            X_df = ufp.drop_columns(X_df, target_col)
        return df, X_df

    @staticmethod
    def _validate_exog(
        df: DataFrame,
        X_df: Optional[DataFrame],
        id_col: str,
        time_col: str,
        target_col: str,
    ) -> Tuple[DataFrame, Optional[DataFrame], Optional[List[str]]]:
        exogs_df = [c for c in df.columns if c not in (id_col, time_col, target_col)]
        if X_df is None:
            if exogs_df:
                warnings.warn(
                    f'`df` contains the following exogenous features: {exogs_df}, '
                    'but `X_df` was not provided. They will be ignored.'
                )
                df = df[[id_col, time_col, target_col]]
            return df, None, None
        exogs_X = [c for c in X_df.columns if c not in (id_col, time_col)]
        missing_df = set(exogs_X) - set(exogs_df)
        if missing_df:
            raise ValueError(
                'The following exogenous features are present in `X_df` '
                f'but not in `df`: {missing_df}.'
            )
        missing_X_df = set(exogs_df) - set(exogs_X)
        if missing_X_df:
            raise ValueError(
                'The following exogenous features are present in `df` '
                f'but not in `X_df`: {missing_X_df}.'
            )
        if exogs_df != exogs_X:
            # rearrange columns
            X_df = X_df[[id_col, time_col, *exogs_df]]
        return df, X_df, exogs_df

    @staticmethod
    def _validate_input_size(
        df: DataFrame,
        id_col: str,
        model_input_size: int,
        model_horizon: int,
    ) -> None:
        min_size = ufp.counts_by_id(df, id_col)['counts'].min()
        if min_size < model_input_size + model_horizon:
            raise ValueError(
                'Your time series data is too short '
                'Please make sure that your each serie contains '
                f'at least {model_input_size + model_horizon} observations.'
            )

    @staticmethod
    def _prepare_level_and_quantiles(
        level: Optional[List[Union[int, float]]], 
        quantiles: Optional[List[float]],
    ) -> Tuple[List[Union[int, float]], Optional[List[float]]]:
        if level is not None and quantiles is not None:
            raise ValueError(
                "You should provide `level` or `quantiles`, but not both."
            )
        if quantiles is None:
            return level, quantiles
        # we recover level from quantiles
        if not all(0 < q < 1 for q in quantiles):
            raise ValueError("`quantiles` should be floats between 0 and 1.")
        level = [abs(int(100 - 200 * q)) for q in quantiles]
        return level, quantiles

    @staticmethod
    def _maybe_convert_level_to_quantiles(
        df: DataFrame,
        quantiles: Optional[List[float]],
    ) -> DataFrame:
        if quantiles is None:
            return df
        out_cols = [c for c in df.columns if '-lo-' not in c and '-hi-' not in c]
        df = ufp.copy_if_pandas(df, deep=False)
        for q in sorted(quantiles):
            if q == 0.5:
                col = 'TimeGPT'
            else:
                lv = int(100 - 200 * q)
                hi_or_lo = 'lo' if lv > 0 else 'hi'
                lv = abs(lv)
                col = f"TimeGPT-{hi_or_lo}-{lv}"
            q_col = f"TimeGPT-q-{int(q * 100)}"
            df = ufp.assign_columns(df, q_col, df[col])
            out_cols.append(q_col)
        return df[out_cols]

    @staticmethod
    def _forecast_payload_to_in_sample(payload):
        in_sample_payload = {
            k: v
            for k, v in payload.items()
            if k not in ('h', 'finetune_steps', 'finetune_loss')
        }
        del in_sample_payload['series']['X_future']
        return in_sample_payload

    @staticmethod
    def _maybe_add_intervals(
        df: DataFrame,
        intervals: Optional[Dict[str, list[float]]],
    ) -> DataFrame:
        if intervals is None:
            return df
        intervals_df = type(df)({f'TimeGPT-{k}': v for k, v in intervals.items()})
        return ufp.horizontal_concat([df, intervals_df])

    @staticmethod
    def _parse_in_sample_output(
        in_sample_output: Dict[str, Union[list[float], Dict[str, list[float]]]],
        df: DataFrame,
        processed: ufp.ProcessedDF,
        id_col: str,
        time_col: str,
        target_col: str,
    ) -> DataFrame:
        times = df[time_col].to_numpy()
        targets = df[target_col].to_numpy()
        if processed.sort_idxs is not None:
            times = times[processed.sort_idxs]
            targets = targets[processed.sort_idxs]
        iterables = zip(processed.indptr[1:], in_sample_output['sizes'], strict=True)
        keep_idxs = np.hstack([np.arange(end - size, end) for end, size in iterables])
        times = times[keep_idxs]
        targets = targets[keep_idxs]
        uids = ufp.repeat(processed.uids, in_sample_output['sizes'])
        out = type(df)(
            {
                id_col: uids,
                time_col: times,
                target_col: targets,
                'TimeGPT': in_sample_output['mean'],
            }
        )
        return NixtlaClient._maybe_add_intervals(out, in_sample_output['intervals'])

    def _maybe_assign_weights(
        self,
        weights: Optional[Union[list[float, list[list[float]]]]],
        df: DataFrame,
        x_cols: Optional[List[str]],
    ) -> None:
        if weights is None:
            return
        if isinstance(weights[0], list):
            self.weights_x = [
                type(df)({'features': x_cols, 'weights': w}) for w in weights
            ]
        else:
            self.weights_x = type(df)(
                {'features': x_cols, 'weights': weights}
            )

    @staticmethod
    def _maybe_drop_id(df: DataFrame, id_col: str, drop: bool) -> DataFrame:
        if drop:
            df = ufp.drop_columns(df, id_col)
        return df

    def _run_validations(
        self,
        df: DataFrame,
        id_col: str,
        time_col: str,
        target_col: str,
        validate_api_key: bool,
    ) -> Tuple[DataFrame, bool]:
        if validate_api_key and not self.validate_api_key(log=False):
            raise Exception('API Key not valid, please email ops@nixtla.io')
        drop_id = id_col not in df.columns
        if drop_id:
            df = ufp.copy_if_pandas(df, deep=False)
            df = ufp.assign_columns(df, id_col, 0)
        if (
            isinstance(df, pd.DataFrame)
            and time_col not in df
            and pd.api.types.is_datetime64_any_dtype(df.index)
        ):
            df.index.name = time_col
            df = df.reset_index()
        validate_format(df=df, id_col=id_col, time_col=time_col, target_col=target_col)
        if ufp.is_nan_or_none(df[target_col]).any():
            raise ValueError(f'Target column ({target_col}) cannot contain missing values.')
        return df, drop_id

    def _preprocess(
        self,
        df: DataFrame,
        X_df: Optional[DataFrame],
        h: int,
        freq: str,
        date_features: Union[bool, List[Union[str, Callable]]],
        date_features_to_one_hot: Union[bool, List[str]],
        id_col: str,
        time_col: str,
        target_col: str,
    ) -> Tuple[ufp.ProcessedDF, Optional[DataFrame]]:
        df, X_df = self._maybe_add_date_features(
            df=df,
            X_df=X_df,
            features=date_features,
            one_hot=date_features_to_one_hot,
            freq=freq,
            h=h,
            id_col=id_col,
            time_col=time_col,
            target_col=target_col,
        )
        processed = ufp.process_df(
            df=df, id_col=id_col, time_col=time_col, target_col=target_col
        )
        if X_df is not None:
            processed_X = ufp.process_df(
                df=X_df, id_col=id_col, time_col=time_col, target_col=None,
            )
            X_future = processed_X.data.T.tolist()
        else:
            X_future = None
        return processed, X_future

    def validate_api_key(self, log: bool = True) -> bool:
        """Returns True if your api_key is valid."""
        try:
            with httpx.Client(**self._client_kwargs) as httpx_client:
                client = HttpClient(httpx_client=httpx_client)
                validation = self._make_request(client, 'validate_token', {})
        except:
            validation = {}
        if 'support' in validation and log:
            logger.info(f'Happy Forecasting! :), {validation["support"]}')
        return (
            validation.get('message', '') == 'success'
            or 'Forecasting! :)' in validation.get('detail', '')
        )

    def forecast(
        self,
        df: DataFrame,
        h: PositiveInt,
        freq: Optional[str] = None,    
        id_col: str = 'unique_id',
        time_col: str = 'ds',
        target_col: str = 'y',
        X_df: Optional[DataFrame] = None,
        level: Optional[List[Union[int, float]]] = None,
        quantiles: Optional[List[float]] = None,
        finetune_steps: NonNegativeInt = 0,
        finetune_loss: _LOSS = 'default',
        clean_ex_first: bool = True,
        validate_api_key: bool = False,
        add_history: bool = False,
        date_features: Union[bool, List[Union[str, Callable]]] = False,
        date_features_to_one_hot: Union[bool, List[str]] = False,
        model: _MODEL = 'timegpt-1',
        num_partitions: Optional[int] = None,
    ) -> DataFrame:
        """Forecast your time series using TimeGPT.

        Parameters
        ----------
        df : pandas or polars DataFrame
            The DataFrame on which the function will operate. Expected to contain at least the following columns:
            - time_col:
                Column name in `df` that contains the time indices of the time series. This is typically a datetime
                column with regular intervals, e.g., hourly, daily, monthly data points.
            - target_col:
                Column name in `df` that contains the target variable of the time series, i.e., the variable we 
                wish to predict or analyze.
            Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
            - id_col:
                Column name in `df` that identifies unique time series. Each unique value in this column
                corresponds to a unique time series.
        h : int
            Forecast horizon.
        freq : str
            Frequency of the data. By default, the freq will be inferred automatically.
            See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases).
        id_col : str (default='unique_id')
            Column that identifies each serie.
        time_col : str (default='ds')
            Column that identifies each timestep, its values can be timestamps or integers.
        target_col : str (default='y')
            Column that contains the target.
        X_df : pandas or polars DataFrame, optional (default=None)
            DataFrame with [`unique_id`, `ds`] columns and `df`'s future exogenous.
        level : List[float], optional (default=None)
            Confidence levels between 0 and 100 for prediction intervals.
        quantiles : List[float], optional (default=None)
            Quantiles to forecast, list between (0, 1).
            `level` and `quantiles` should not be used simultaneously.
            The output dataframe will have the quantile columns
            formatted as TimeGPT-q-(100 * q) for each q.
            100 * q represents percentiles but we choose this notation
            to avoid having dots in column names.
        finetune_steps : int (default=0)
            Number of steps used to finetune learning TimeGPT in the
            new data.
        finetune_loss : str (default='default')
            Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`.
        clean_ex_first : bool (default=True)
            Clean exogenous signal before making forecasts using TimeGPT.
        validate_api_key : bool (default=False)
            If True, validates api_key before sending requests.
        add_history : bool (default=False)
            Return fitted values of the model.
        date_features : bool or list of str or callable, optional (default=False)
            Features computed from the dates. 
            Can be pandas date attributes or functions that will take the dates as input.
            If True automatically adds most used date features for the 
            frequency of `df`.
        date_features_to_one_hot : bool or list of str (default=False)
            Apply one-hot encoding to these date features.
            If `date_features=True`, then all date features are
            one-hot encoded by default.
        model : str (default='timegpt-1')
            Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`. 
            We recommend using `timegpt-1-long-horizon` for forecasting 
            if you want to predict more than one seasonal 
            period given the frequency of your data.
        num_partitions : int (default=None)
            Number of partitions to use.
            If None, the number of partitions will be equal
            to the available parallel resources in distributed environments.
        
        Returns
        -------
        pandas or polars DataFrame
            DataFrame with TimeGPT forecasts for point predictions and probabilistic
            predictions (if level is not None).
        """
        self.__dict__.pop('weights_x', None)
        logger.info('Validating inputs...')
        df, drop_id = self._run_validations(
            df=df,
            id_col=id_col,
            time_col=time_col,
            target_col=target_col,
            validate_api_key=validate_api_key,
        )
        df, X_df, x_cols = self._validate_exog(
            df, X_df, id_col=id_col, time_col=time_col, target_col=target_col
        )
        level, quantiles = self._prepare_level_and_quantiles(level, quantiles)
        freq = self._maybe_infer_freq(df, freq=freq, id_col=id_col, time_col=time_col)
        standard_freq = self._standardize_freq(freq)
        model_input_size, model_horizon = self._get_model_params(model, standard_freq)
        if finetune_steps > 0 or level is not None or add_history:
            self._validate_input_size(df, id_col, model_input_size, model_horizon)
        if h > model_horizon:
            logger.warning(
                'The specified horizon "h" exceeds the model horizon. '
                'This may lead to less accurate forecasts. '
                'Please consider using a smaller horizon.'  
            )

        logger.info('Preprocessing dataframes...')
        processed, X_future = self._preprocess(
            df=df,
            X_df=X_df,
            h=h,
            freq=standard_freq,
            date_features=date_features,
            date_features_to_one_hot=date_features_to_one_hot,
            id_col=id_col,
            time_col=time_col,
            target_col=target_col,
        )
        restrict_input = finetune_steps == 0 and X_df is None and not add_history
        if restrict_input:
            logger.info('Restricting input...')
            new_input_size = _restrict_input_samples(
                level=level,
                input_size=model_input_size,
                model_horizon=model_horizon,
                h=h,
            )
            processed = self._tail(processed, new_input_size)
        if processed.data.shape[1] > 1:
            X = processed.data[:, 1:].T.tolist()
        else:
            X = None

        logger.info('Calling Forecast Endpoint...')
        payload = {
            'series': {
                'y': processed.data[:, 0].tolist(),
                'sizes': np.diff(processed.indptr).tolist(),
                'X': X,
                'X_future': X_future,
            },
            'model': model,
            'h': h,
            'freq': standard_freq,
            'clean_ex_first': clean_ex_first,
            'level': level,
            'finetune_steps': finetune_steps,
            'finetune_loss': finetune_loss,
        }
        with httpx.Client(**self._client_kwargs) as httpx_client:
            client = HttpClient(httpx_client=httpx_client)
            if num_partitions is None:
                resp = self._make_request(client, 'v2/forecast', payload)
                if add_history:
                    in_sample_payload = self._forecast_payload_to_in_sample(payload)
                    logger.info('Calling Historical Forecast Endpoint...')
                    in_sample_resp = self._make_request(
                        client, 'v2/historic_forecast', in_sample_payload,
                    )
            else:
                payloads = self._partition_series(payload, num_partitions, h)
                resp = self._make_partitioned_requests(client, 'v2/forecast', payloads)
                if add_history:
                    in_sample_payloads = [
                        self._forecast_payload_to_in_sample(p) for p in payloads
                    ]
                    logger.info('Calling Historical Forecast Endpoint...')
                    in_sample_resp = self._make_partitioned_requests(
                        client, 'v2/historic_forecast', in_sample_payloads,
                    )

        # assemble result
        out = ufp.make_future_dataframe(
            uids=processed.uids,
            last_times=type(processed.uids)(processed.last_times),
            freq=freq,
            h=h,
            id_col=id_col,
            time_col=time_col,
        )
        out = ufp.assign_columns(out, 'TimeGPT', resp['mean'])
        out = self._maybe_add_intervals(out, resp['intervals'])
        out = self._maybe_convert_level_to_quantiles(out, quantiles)
        if add_history:
            in_sample_df = self._parse_in_sample_output(
                in_sample_output=in_sample_resp,
                df=df,
                processed=processed,
                id_col=id_col,
                time_col=time_col,
                target_col=target_col,
            )
            in_sample_df = ufp.drop_columns(in_sample_df, target_col)
            out = ufp.vertical_concat([in_sample_df, out])
            out = ufp.sort(out, by=[id_col, time_col])
        out = self._maybe_drop_id(df=out, id_col=id_col, drop=drop_id)
        self._maybe_assign_weights(weights=resp['weights_x'], df=df, x_cols=x_cols)
        return out

    def detect_anomalies(
        self,
        df: DataFrame,
        freq: Optional[str] = None,    
        id_col: str = 'unique_id',
        time_col: str = 'ds',
        target_col: str = 'y',
        level: Union[int, float] = 99,
        clean_ex_first: bool = True,
        validate_api_key: bool = False,
        date_features: Union[bool, List[str]] = False,
        date_features_to_one_hot: Union[bool, List[str]] = False,
        model: str = 'timegpt-1',
        num_partitions: Optional[int] = None,
    ):
        """Detect anomalies in your time series using TimeGPT.

        Parameters
        ----------
        df : pandas or polars DataFrame
            The DataFrame on which the function will operate. Expected to contain at least the following columns:
            - time_col:
                Column name in `df` that contains the time indices of the time series. This is typically a datetime
                column with regular intervals, e.g., hourly, daily, monthly data points.
            - target_col:
                Column name in `df` that contains the target variable of the time series, i.e., the variable we 
                wish to predict or analyze.
            Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
            - id_col:
                Column name in `df` that identifies unique time series. Each unique value in this column
                corresponds to a unique time series.
        freq : str
            Frequency of the data. By default, the freq will be inferred automatically.
            See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases).
        id_col : str (default='unique_id')
            Column that identifies each serie.
        time_col : str (default='ds')
            Column that identifies each timestep, its values can be timestamps or integers.
        target_col : str (default='y')
            Column that contains the target.
        level : float (default=99)
            Confidence level between 0 and 100 for detecting the anomalies.
        clean_ex_first : bool (default=True)
            Clean exogenous signal before making forecasts
            using TimeGPT.
        validate_api_key : bool (default=False)
            If True, validates api_key before sending requests.
        date_features : bool or list of str or callable, optional (default=False)
            Features computed from the dates. 
            Can be pandas date attributes or functions that will take the dates as input.
            If True automatically adds most used date features for the 
            frequency of `df`.
        date_features_to_one_hot : bool or list of str (default=False)
            Apply one-hot encoding to these date features.
            If `date_features=True`, then all date features are
            one-hot encoded by default.
        model : str (default='timegpt-1')
            Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`. 
            We recommend using `timegpt-1-long-horizon` for forecasting 
            if you want to predict more than one seasonal 
            period given the frequency of your data.
        num_partitions : int (default=None)
            Number of partitions to use.
            If None, the number of partitions will be equal
            to the available parallel resources in distributed environments.
        
        Returns
        -------
        pandas or polars DataFrame
            DataFrame with anomalies flagged with 1 detected by TimeGPT.
        """
        self.__dict__.pop('weights_x', None)
        df, drop_id = self._run_validations(
            df=df,
            id_col=id_col,
            time_col=time_col,
            target_col=target_col,
            validate_api_key=validate_api_key,
        )
        freq = self._maybe_infer_freq(df, freq=freq, id_col=id_col, time_col=time_col)
        standard_freq = self._standardize_freq(freq)
        model_input_size, model_horizon = self._get_model_params(model, standard_freq)

        logger.info('Preprocessing dataframes...')
        processed, _ = self._preprocess(
            df=df,
            X_df=None,
            h=0,
            freq=standard_freq,
            date_features=date_features,
            date_features_to_one_hot=date_features_to_one_hot,
            id_col=id_col,
            time_col=time_col,
            target_col=target_col,
        )
        if processed.data.shape[1] > 1:
            X = processed.data[:, 1:].T.tolist()
            x_cols = [c for c in df.columns if c not in (id_col, time_col, target_col)]
        else:
            X = None
            x_cols = None

        logger.info('Calling Anomaly Detector Endpoint...')
        payload = {
            'series': {
                'y': processed.data[:, 0].tolist(),
                'sizes': np.diff(processed.indptr).tolist(),
                'X': X,
            },
            'model': model,
            'freq': standard_freq,
            'clean_ex_first': clean_ex_first,
            'level': level,
        }
        with httpx.Client(**self._client_kwargs) as httpx_client:
            client = HttpClient(httpx_client=httpx_client)
            if num_partitions is None:
                resp = self._make_request(client, 'v2/anomaly_detection', payload)
            else:
                payloads = self._partition_series(payload, num_partitions, h=0)
                resp = self._make_partitioned_requests(client, 'v2/anomaly_detection', payloads)

        # assemble result
        out = self._parse_in_sample_output(
            in_sample_output=resp,
            df=df,
            processed=processed,
            id_col=id_col,
            time_col=time_col,
            target_col=target_col,
        )
        out = ufp.assign_columns(out, 'anomaly', resp['anomaly'])
        out = self._maybe_drop_id(df=out, id_col=id_col, drop=drop_id)
        self._maybe_assign_weights(weights=resp['weights_x'], df=df, x_cols=x_cols)
        return out


    def cross_validation(
        self,
        df: DataFrame,
        h: int,
        freq: Optional[str] = None,
        id_col: str = "unique_id",
        time_col: str = "ds",
        target_col: str = "y",
        level: Optional[List[Union[int, float]]] = None,
        quantiles: Optional[List[float]] = None,
        validate_api_key: bool = False,
        n_windows: int = 1,
        step_size: Optional[int] = None,
        finetune_steps: int = 0,
        finetune_loss: str = 'default',
        clean_ex_first: bool = True,
        date_features: Union[bool, List[str]] = False,
        date_features_to_one_hot: Union[bool, List[str]] = False,
        model: str = 'timegpt-1',
        num_partitions: Optional[int] = None,
    ):
        """Perform cross validation in your time series using TimeGPT.
        
        Parameters
        ----------
        df : pandas or polars DataFrame
            The DataFrame on which the function will operate. Expected to contain at least the following columns:
            - time_col:
                Column name in `df` that contains the time indices of the time series. This is typically a datetime
                column with regular intervals, e.g., hourly, daily, monthly data points.
            - target_col:
                Column name in `df` that contains the target variable of the time series, i.e., the variable we
                wish to predict or analyze.
            Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
            - id_col:
                Column name in `df` that identifies unique time series. Each unique value in this column
                corresponds to a unique time series.
        h : int
            Forecast horizon.
        freq : str
            Frequency of the data. By default, the freq will be inferred automatically.
            See [pandas' available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases).
        id_col : str (default='unique_id')
            Column that identifies each serie.
        time_col : str (default='ds')
            Column that identifies each timestep, its values can be timestamps or integers.
        target_col : str (default='y')
            Column that contains the target.
        level : float (default=99)
            Confidence level between 0 and 100 for prediction intervals.
        quantiles : List[float], optional (default=None)
            Quantiles to forecast, list between (0, 1).
            `level` and `quantiles` should not be used simultaneously.
            The output dataframe will have the quantile columns
            formatted as TimeGPT-q-(100 * q) for each q.
            100 * q represents percentiles but we choose this notation
            to avoid having dots in column names.
        validate_api_key : bool (default=False)
            If True, validates api_key before sending requests.
        n_windows : int (defaul=1)
            Number of windows to evaluate.
        step_size : int, optional (default=None)
            Step size between each cross validation window. If None it will be equal to `h`.
        finetune_steps : int (default=0)
            Number of steps used to finetune TimeGPT in the
            new data.
        finetune_loss : str (default='default')
            Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`.
        clean_ex_first : bool (default=True)
            Clean exogenous signal before making forecasts
            using TimeGPT.
        date_features : bool or list of str or callable, optional (default=False)
            Features computed from the dates.
            Can be pandas date attributes or functions that will take the dates as input.
            If True automatically adds most used date features for the
            frequency of `df`.
        date_features_to_one_hot : bool or list of str (default=False)
            Apply one-hot encoding to these date features.
            If `date_features=True`, then all date features are
            one-hot encoded by default.
        model : str (default='timegpt-1')
            Model to use as a string. Options are: `timegpt-1`, and `timegpt-1-long-horizon`. 
            We recommend using `timegpt-1-long-horizon` for forecasting 
            if you want to predict more than one seasonal 
            period given the frequency of your data.
        num_partitions : int (default=None)
            Number of partitions to use.
            If None, the number of partitions will be equal
            to the available parallel resources in distributed environments.
        
        Returns
        -------
        pandas or polars DataFrame
            DataFrame with cross validation forecasts.
        """
        df, drop_id = self._run_validations(
            df=df,
            id_col=id_col,
            time_col=time_col,
            target_col=target_col,
            validate_api_key=validate_api_key,
        )
        freq = self._maybe_infer_freq(df, freq=freq, id_col=id_col, time_col=time_col)
        standard_freq = self._standardize_freq(freq)
        level, quantiles = self._prepare_level_and_quantiles(level, quantiles)
        model_input_size, model_horizon = self._get_model_params(model, standard_freq)
        if step_size is None:
            step_size = h

        logger.info('Preprocessing dataframes...')
        processed, _ = self._preprocess(
            df=df,
            X_df=None,
            h=0,
            freq=standard_freq,
            date_features=date_features,
            date_features_to_one_hot=date_features_to_one_hot,
            id_col=id_col,
            time_col=time_col,
            target_col=target_col,
        )
        if processed.data.shape[1] > 1:
            X = processed.data[:, 1:].T.tolist()
            x_cols = [c for c in df.columns if c not in (id_col, time_col, target_col)]
        else:
            X = None
            x_cols = None

        logger.info('Calling Cross Validation Endpoint...')
        payload = {
            'series': {
                'y': processed.data[:, 0].tolist(),
                'sizes': np.diff(processed.indptr).tolist(),
                'X': X,
            },
            'model': model,
            'h': h,
            'n_windows': n_windows,
            'step_size': step_size,
            'freq': standard_freq,
            'clean_ex_first': clean_ex_first,
            'level': level,
            'finetune_steps': finetune_steps,
            'finetune_loss': finetune_loss,
        }
        with httpx.Client(**self._client_kwargs) as httpx_client:
            client = HttpClient(httpx_client=httpx_client)
            if num_partitions is None:
                resp = self._make_request(client, 'v2/cross_validation', payload)
            else:
                payloads = self._partition_series(payload, num_partitions, h=0)
                resp = self._make_partitioned_requests(client, 'v2/cross_validation', payloads)

        # assemble result
        out = ufp.cv_times(
            times=df[time_col].to_numpy(),
            uids=processed.uids,
            indptr=processed.indptr,
            h=h,
            test_size=h + step_size * (n_windows - 1),
            step_size=step_size,
        )
        out = ufp.assign_columns(out, 'y', resp['y'])
        out = ufp.assign_columns(out, 'TimeGPT', resp['mean'])
        out = self._maybe_add_intervals(out, resp['intervals'])
        out = self._maybe_drop_id(df=out, id_col=id_col, drop=drop_id)
        return self._maybe_convert_level_to_quantiles(out, quantiles)

    def plot(
        self,
        df: Optional[DataFrame] = None,
        forecasts_df: Optional[DataFrame] = None,
        id_col: str = 'unique_id',
        time_col: str = 'ds',
        target_col: str = 'y',
        unique_ids: Union[Optional[List[str]], np.ndarray] = None,
        plot_random: bool = True,
        models: Optional[List[str]] = None,
        level: Optional[List[float]] = None,
        max_insample_length: Optional[int] = None,
        plot_anomalies: bool = False,
        engine: str = 'matplotlib',
        resampler_kwargs: Optional[Dict] = None,
    ):
        """Plot forecasts and insample values.

        Parameters
        ----------
        df : pandas or polars DataFrame, optional (default=None)
            The DataFrame on which the function will operate. Expected to contain at least the following columns:
            - time_col:
                Column name in `df` that contains the time indices of the time series. This is typically a datetime
                column with regular intervals, e.g., hourly, daily, monthly data points.
            - target_col:
                Column name in `df` that contains the target variable of the time series, i.e., the variable we 
                wish to predict or analyze.
            Additionally, you can pass multiple time series (stacked in the dataframe) considering an additional column:
            - id_col:
                Column name in `df` that identifies unique time series. Each unique value in this column
                corresponds to a unique time series.
        forecasts_df : pandas or polars DataFrame, optional (default=None)
            DataFrame with columns [`unique_id`, `ds`] and models.
        id_col : str (default='unique_id')
            Column that identifies each serie.
        time_col : str (default='ds')
            Column that identifies each timestep, its values can be timestamps or integers.
        target_col : str (default='y')
            Column that contains the target.
        unique_ids : List[str], optional (default=None)
            Time Series to plot.
            If None, time series are selected randomly.
        plot_random : bool (default=True)
            Select time series to plot randomly.
        models : List[str], optional (default=None)
            List of models to plot.
        level : List[float], optional (default=None)
            List of prediction intervals to plot if paseed.
        max_insample_length : int, optional (default=None)
            Max number of train/insample observations to be plotted.
        plot_anomalies : bool (default=False)
            Plot anomalies for each prediction interval.
        engine : str (default='plotly')
            Library used to plot. 'plotly', 'plotly-resampler' or 'matplotlib'.
        resampler_kwargs : dict
            Kwargs to be passed to plotly-resampler constructor.
            For further custumization ("show_dash") call the method,
            store the plotting object and add the extra arguments to
            its `show_dash` method.
        """
        try:
            from utilsforecast.plotting import plot_series
        except ModuleNotFoundError:
            raise Exception(
                'You have to install additional dependencies to use this method, '
                'please install them using `pip install "nixtla[plotting]"`'
            )
        if df is not None and id_col not in df.columns:
            df = ufp.copy_if_pandas(df, deep=False)
            df = ufp.assign_columns(df, id_col, 'ts_0')
        if forecasts_df is not None:
            forecasts_df = ufp.copy_if_pandas(forecasts_df, deep=False)
            forecasts_df = ufp.assign_columns(forecasts_df, id_col, 'ts_0')
            if 'anomaly' in forecasts_df.columns:
                # special case to plot outputs
                # from detect_anomalies
                df = None
                forecasts_df = ufp.drop_columns(forecasts_df, 'anomaly')
                cols = forecasts_df.columns
                cols = cols[cols.str.contains('TimeGPT-lo-')]
                level = cols.str.replace('TimeGPT-lo-', '')[0]
                level = float(level) if '.' in level else int(level)
                level = [level]
                plot_anomalies = True
                models = ['TimeGPT']
        return plot_series(
            df=df,
            forecasts_df=forecasts_df,
            ids=unique_ids,
            plot_random=plot_random,
            models=models,
            level=level,
            max_insample_length=max_insample_length,
            plot_anomalies=plot_anomalies,
            engine=engine,
            resampler_kwargs=resampler_kwargs,
            palette="tab20b",
            id_col=id_col,
            time_col=time_col,
            target_col=target_col,
        )

In [None]:
show_doc(NixtlaClient.__init__, title_level=2, name='NixtlaClient')

In [None]:
#| hide
from contextlib import contextmanager

In [None]:
#| hide
@contextmanager
def delete_env_var(key):
    original_value = os.environ.get(key)
    rm = False
    if key in os.environ:
        del os.environ[key]
        rm = True
    try:
        yield
    finally:
        if rm:
            os.environ[key] = original_value
# test api_key fail
with delete_env_var('NIXTLA_API_KEY'), delete_env_var('TIMEGPT_TOKEN'):
    test_fail(
        lambda: NixtlaClient(),
        contains='NIXTLA_API_KEY',
    )

In [None]:
#| hide
nixtla_client = NixtlaClient()

In [None]:
show_doc(NixtlaClient.validate_api_key, title_level=2, name='NixtlaClient.validate_api_key')

In [None]:
#| hide
nixtla_client.validate_api_key()

In [None]:
#| hide
_nixtla_client = NixtlaClient(api_key="invalid")
test_eq(_nixtla_client.validate_api_key(), False)

In [None]:
#| hide
_nixtla_client = NixtlaClient(
    api_key=os.environ['NIXTLA_API_KEY_CUSTOM'], 
    base_url=os.environ['NIXTLA_BASE_URL_CUSTOM'],
)
_nixtla_client.validate_api_key()

In [None]:
#| hide
test_fail(
    lambda: NixtlaClient(api_key='transphobic').forecast(df=pd.DataFrame(), h=None, validate_api_key=True),
    contains='nixtla'
)

In [None]:
#| hide
# test input_size
test_eq(
    nixtla_client._get_model_params(model='timegpt-1', freq='D'),
    (28, 7),
)

Now you can start to make forecasts! Let's import an example:

In [None]:
#| hide
df = pd.read_csv(
    'https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/air_passengers.csv',
    parse_dates=['timestamp'],
)
df.head()

In [None]:
#| hide
from utilsforecast.data import generate_series

In [None]:
#| hide
# test date_features with multiple series
# and different ends
test_series = generate_series(n_series=2, min_length=5, max_length=20)
h = 12
fcst_test_series = nixtla_client.forecast(test_series, h=12, date_features=['dayofweek'])
uids = test_series['unique_id']
for uid in uids:
    test_eq(
        fcst_test_series.query('unique_id == @uid')['ds'].tolist(),
        pd.date_range(periods=h + 1, start=test_series.query('unique_id == @uid')['ds'].max())[1:].tolist(),
    )

In [None]:
nixtla_client = NixtlaClient()

In [None]:
#| hide
# test quantiles
test_fail(
    lambda: nixtla_client.forecast(
        df=df, 
        h=12, 
        time_col='timestamp', 
        target_col='value', 
        level=[80], 
        quantiles=[0.2, 0.3]
    ),
    contains='not both'
)
test_qls = list(np.arange(0.1, 1, 0.1))
exp_q_cols = [f"TimeGPT-q-{int(100 * q)}" for q in test_qls]
def test_method_qls(method, **kwargs):
    df_qls = method(
        df=df, 
        h=12, 
        time_col='timestamp', 
        target_col='value', 
        quantiles=test_qls,
        **kwargs
    )
    assert all(col in df_qls.columns for col in exp_q_cols)
    # test monotonicity of quantiles
    df_qls.apply(lambda x: x.is_monotonic_increasing, axis=1).sum() == len(exp_q_cols)
test_method_qls(nixtla_client.forecast)
test_method_qls(nixtla_client.forecast, add_history=True)
test_method_qls(nixtla_client.cross_validation)

In [None]:
#| hide
# test num partitions
# we need to be sure that we can recover the same results
# using a for loop
# A: be aware that num partitons can produce different results
# when used finetune_steps
def test_num_partitions_same_results(method: Callable, num_partitions: int, **kwargs):
    res_partitioned = method(**kwargs, num_partitions=num_partitions)
    res_no_partitioned = method(**kwargs, num_partitions=1)
    sort_by = ['unique_id', 'ds']
    if 'cutoff' in res_partitioned:
        sort_by.extend(['cutoff'])
    pd.testing.assert_frame_equal(
        res_partitioned.sort_values(sort_by).reset_index(drop=True), 
        res_no_partitioned.sort_values(sort_by).reset_index(drop=True),
        rtol=1e-2,
        atol=1e-2,
    )

freqs = {'D': 7, 'W-THU': 52, 'Q-DEC': 8, '15T': 4 * 24 * 7}
for freq, h in freqs.items():
    df_freq = generate_series(
        10, 
        min_length=500 if freq != '15T' else 1_200, 
        max_length=550 if freq != '15T' else 2_000,
    )
    #df_freq['y'] = df_freq['y'].astype(np.float32)
    df_freq['ds'] = df_freq.groupby('unique_id', observed=True)['ds'].transform(
        lambda x: pd.date_range(periods=len(x), freq=freq, end='2023-01-01')
    )
    min_size = df_freq.groupby('unique_id', observed=True).size().min()
    test_num_partitions_same_results(
        nixtla_client.detect_anomalies,
        level=98,
        df=df_freq,
        num_partitions=2,
    )
    test_num_partitions_same_results(
        nixtla_client.cross_validation,
        h=7,
        n_windows=2,
        df=df_freq,
        num_partitions=2,
    )
    test_num_partitions_same_results(
        nixtla_client.forecast,
        df=df_freq,
        h=7,
        add_history=True,
        num_partitions=2,
    )

In [None]:
#| hide
from itertools import product
from time import time, sleep
from unittest.mock import patch
import httpx

from requests.exceptions import HTTPError

In [None]:
#| hide
def test_retry_behavior(side_effect, max_retries=6, max_wait_time=60, should_retry=True, sleep_seconds=5):
    mock_nixtla_client = NixtlaClient(max_retries=max_retries, timeout=max_wait_time)
    init_time = time()
    with patch('httpx.Client.request', side_effect=side_effect):
        test_fail(
            lambda: mock_nixtla_client.forecast(df=df, h=12, time_col='timestamp', target_col='value'),
        )
    total_mock_time = time() - init_time
    if should_retry:
        approx_expected_time = min(max_retries - 1, max_wait_time)
        upper_expected_time = min(max_retries, max_wait_time)
        assert total_mock_time >= approx_expected_time, "It is not retrying as expected"
        # preprocessing time before the first api call should be less than 60 seconds
        assert total_mock_time - upper_expected_time - (max_retries - 1) * sleep_seconds <= sleep_seconds
    else:
        assert total_mock_time <= max_wait_time 

In [None]:
#| hide
# we want the api to retry in these cases
def raise_api_error_with_text(*args, **kwargs):
    raise ApiError(
        status_code=503, 
        body="""
        <html><head>
        <meta http-equiv="content-type" content="text/html;charset=utf-8">
        <title>503 Server Error</title>
        </head>
        <body text=#000000 bgcolor=#ffffff>
        <h1>Error: Server Error</h1>
        <h2>The service you requested is not available at this time.<p>Service error -27.</h2>
        <h2></h2>
        </body></html>
        """)
test_retry_behavior(raise_api_error_with_text)

In [None]:
#| hide
# we want the api to not retry in these cases
# here A is assuming that the endpoint responds always
# with a json
def raise_api_error_with_json(*args, **kwargs):
    raise ApiError(
        status_code=503, 
        body=dict(detail='Please use numbers'),
    )
test_retry_behavior(raise_api_error_with_json, should_retry=False)

In [None]:
#| hide
# test resilience of api calls

def raise_read_timeout_error(*args, **kwargs):
    print(f'raising ReadTimeout error after {sleep_seconds} seconds')
    sleep(sleep_seconds)
    raise ReadTimeout
    
def raise_http_error(*args, **kwargs):
    print('raising HTTP error')
    raise HTTPError(response=dict(status_code=503))
    
combs = [
    (2, 5, 30),
    (10, 1, 5),
]
side_effects = [raise_read_timeout_error, raise_http_error]

for (max_retries, retry_interval, max_wait_time), side_effect in product(combs, side_effects):
    test_retry_behavior(
        max_retries=max_retries, 
        retry_interval=retry_interval, 
        max_wait_time=max_wait_time, 
        side_effect=side_effect,
    )
    

In [None]:
show_doc(NixtlaClient.plot, name='NixtlaClient.plot', title_level=2)

In [None]:
nixtla_client = NixtlaClient()

In [None]:
#| hide
nixtla_client.plot(df, time_col='timestamp', target_col='value', engine='plotly')

In [None]:
show_doc(NixtlaClient.forecast, title_level=2)

In [None]:
#| hide
# test we recover the same <mean> forecasts
# with and without restricting input
# (add_history)
def test_equal_fcsts_add_history(**kwargs):
    fcst_no_rest_df = nixtla_client.forecast(**kwargs, add_history=True)
    fcst_no_rest_df = fcst_no_rest_df.groupby('unique_id', observed=True).tail(kwargs['h']).reset_index(drop=True)
    fcst_rest_df = nixtla_client.forecast(**kwargs)
    pd.testing.assert_frame_equal(
        fcst_no_rest_df,
        fcst_rest_df,
    )
    return fcst_rest_df

freqs = {'D': 7, 'W-THU': 52, 'Q-DEC': 8, '15T': 4 * 24 * 7}
for freq, h in freqs.items():
    df_freq = generate_series(
        10, 
        min_length=500 if freq != '15T' else 1_200, 
        max_length=550 if freq != '15T' else 2_000,
    )
    df_freq['ds'] = df_freq.groupby('unique_id', observed=True)['ds'].transform(
        lambda x: pd.date_range(periods=len(x), freq=freq, end='2023-01-01')
    )
    kwargs = dict(
        df=df_freq,
        h=h,
    )
    fcst_1_df = test_equal_fcsts_add_history(**{**kwargs, 'model': 'timegpt-1'})
    fcst_2_df = test_equal_fcsts_add_history(**{**kwargs, 'model': 'timegpt-1-long-horizon'})
    test_fail(
        lambda: pd.testing.assert_frame_equal(fcst_1_df, fcst_2_df),
        contains='(column name="TimeGPT") are different',
    )
    # add test num_partitions    

In [None]:
#| hide
#test same results custom url
nixtla_client_custom = NixtlaClient(
    api_key=os.environ['NIXTLA_API_KEY_CUSTOM'], 
    base_url=os.environ['NIXTLA_BASE_URL_CUSTOM'],
)
# forecast method
fcst_kwargs = dict(
    df=df, 
    h=12, 
    level=[90, 95], 
    add_history=True, 
    time_col='timestamp', 
    target_col='value',
)
fcst_df = nixtla_client.forecast(**fcst_kwargs)
fcst_df_custom = nixtla_client_custom.forecast(**fcst_kwargs)
pd.testing.assert_frame_equal(
    fcst_df,
    fcst_df_custom,
)
# anomalies method
anomalies_kwargs = dict(
    df=df, 
    level=99,
    time_col='timestamp', 
    target_col='value',
)
anomalies_df = nixtla_client.detect_anomalies(**anomalies_kwargs)
anomalies_df_custom = nixtla_client_custom.detect_anomalies(**anomalies_kwargs)
pd.testing.assert_frame_equal(
    anomalies_df,
    anomalies_df_custom,
)

In [None]:
#| hide
# test different results for different models
fcst_kwargs['model'] = 'timegpt-1'
fcst_timegpt_1 = nixtla_client.forecast(**fcst_kwargs)
fcst_kwargs['model'] = 'timegpt-1-long-horizon'
fcst_timegpt_long = nixtla_client.forecast(**fcst_kwargs)
test_fail(
    lambda: pd.testing.assert_frame_equal(fcst_timegpt_1[['TimeGPT']], fcst_timegpt_long[['TimeGPT']]),
    contains='(column name="TimeGPT") are different'
)

In [None]:
#| hide
# test different results for different models
# cross validation
cv_kwargs = dict(
    df=df, 
    h=12, 
    level=[90, 95], 
    time_col='timestamp', 
    target_col='value',
)
cv_kwargs['model'] = 'timegpt-1'
cv_timegpt_1 = nixtla_client.cross_validation(**cv_kwargs)
cv_kwargs['model'] = 'timegpt-1-long-horizon'
cv_timegpt_long = nixtla_client.cross_validation(**cv_kwargs)
test_fail(
    lambda: pd.testing.assert_frame_equal(cv_timegpt_1[['TimeGPT']], cv_timegpt_long[['TimeGPT']]),
    contains='(column name="TimeGPT") are different'
)

In [None]:
#| hide
# test different results for different models
# anomalies
anomalies_kwargs['model'] = 'timegpt-1'
anomalies_timegpt_1 = nixtla_client.detect_anomalies(**anomalies_kwargs)
anomalies_kwargs['model'] = 'timegpt-1-long-horizon'
anomalies_timegpt_long = nixtla_client.detect_anomalies(**anomalies_kwargs)
test_fail(
    lambda: pd.testing.assert_frame_equal(anomalies_timegpt_1[['TimeGPT']], anomalies_timegpt_long[['TimeGPT']]),
    contains='(column name="TimeGPT") are different'
)

In [None]:
#| hide
# test unsupported model
fcst_kwargs['model'] = 'a-model'
test_fail(
    lambda: nixtla_client.forecast(**fcst_kwargs),
    contains='unsupported model',
)

In [None]:
#| hide
# test unsupported model
anomalies_kwargs['model'] = 'my-awesome-model'
test_fail(
    lambda: nixtla_client.detect_anomalies(**anomalies_kwargs),
    contains='unsupported model',
)

In [None]:
#| hide
# test make future dataframe for one series
df_ = df.rename(columns={'timestamp': 'ds', 'value': 'y'})
df_.insert(0, 'unique_id', 'AirPassengers')
df_actual_future = df_.tail(12)[['unique_id', 'ds']]
df_history = df_.drop(df_actual_future.index)
df_future = _NixtlaClientModel(client=nixtla_client.client, h=12, freq='MS').make_future_dataframe(df_history)
pd.testing.assert_frame_equal(
    df_actual_future.reset_index(drop=True),
    df_future,
)

In [None]:
#| hide
# test add date features
date_features = ['year', 'month']
df_date_features, future_df = _NixtlaClientModel(
    client=nixtla_client.client,
    h=12, 
    freq='MS', 
    date_features=date_features,
    date_features_to_one_hot=None,
).add_date_features(df_,  X_df=None)
assert all(col in df_date_features for col in date_features)
assert all(col in future_df for col in date_features)

In [None]:
show_doc(NixtlaClient.cross_validation, title_level=2)

In [None]:
#| hide
# cross validation tests
df_copy = df_.copy()
pd.testing.assert_frame_equal(
    df_copy,
    df_,
)
df_test = df_.groupby('unique_id').tail(12)
df_train = df_.drop(df_test.index)
hyps = [
    # finetune steps is unstable due
    # to numerical reasons
    # dict(finetune_steps=2),
    dict(),
    dict(clean_ex_first=False),
    dict(date_features=['month']),
    dict(level=[80, 90]),
    #dict(level=[80, 90], finetune_steps=2),
]

In [None]:
#| hide
# test exogenous variables cv
df_ex_ = df_.copy()
df_ex_['exogenous_var'] = df_ex_['y'] + np.random.normal(size=len(df_ex_))
x_df_test = df_test.drop(columns='y').merge(df_ex_.drop(columns='y'))
for hyp in hyps:
    main_logger.info(f'Hyperparameters: {hyp}')
    main_logger.info('\n\nPerforming forecast\n')
    fcst_test = nixtla_client.forecast(df_train.merge(df_ex_.drop(columns='y')), h=12, X_df=x_df_test, **hyp)
    fcst_test = df_test[['unique_id', 'ds', 'y']].merge(fcst_test)
    fcst_test = fcst_test.sort_values(['unique_id', 'ds']).reset_index(drop=True)
    main_logger.info('\n\nPerforming Cross validation\n')
    fcst_cv = nixtla_client.cross_validation(df_ex_, h=12, **hyp)
    fcst_cv = fcst_cv.sort_values(['unique_id', 'ds']).reset_index(drop=True)
    main_logger.info('\n\nVerify difference\n')
    pd.testing.assert_frame_equal(
        fcst_test,
        fcst_cv.drop(columns='cutoff'),
        #rtol=1e-2,
    )


In [None]:
#| hide
for hyp in hyps:
    fcst_test = nixtla_client.forecast(df_train, h=12, **hyp)
    fcst_test = df_test[['unique_id', 'ds', 'y']].merge(fcst_test)
    fcst_test = fcst_test.sort_values(['unique_id', 'ds']).reset_index(drop=True)
    fcst_cv = nixtla_client.cross_validation(df_, h=12, **hyp)
    fcst_cv = fcst_cv.sort_values(['unique_id', 'ds']).reset_index(drop=True)
    pd.testing.assert_frame_equal(
        fcst_test,
        fcst_cv.drop(columns='cutoff'),
        rtol=1e-2,
    )

In [None]:
#| hide
for hyp in hyps:
    fcst_test = nixtla_client.forecast(df_train, h=12, **hyp)
    fcst_test.insert(2, 'y', df_test['y'].values)
    fcst_test = fcst_test.sort_values(['unique_id', 'ds']).reset_index(drop=True)
    fcst_cv = nixtla_client.cross_validation(df_, h=12, **hyp)
    fcst_cv = fcst_cv.sort_values(['unique_id', 'ds']).reset_index(drop=True)
    pd.testing.assert_frame_equal(
        fcst_test,
        fcst_cv.drop(columns='cutoff'),
        rtol=1e-2,
    )

In [None]:
#| hide
from nixtla.date_features import SpecialDates

In [None]:
#| hide
# test add callables
date_features = [SpecialDates({'first_dates': ['2021-01-1'], 'second_dates': ['2021-01-01']})]
df_daily = df_.copy()
df_daily['ds'] = pd.date_range(end='2021-01-01', periods=len(df_daily))
df_date_features, future_df = _NixtlaClientModel(
    client=nixtla_client.client,
    h=12, 
    freq='D', 
    date_features=date_features,
    date_features_to_one_hot=None,
).add_date_features(df_,  X_df=None)
assert all(col in df_date_features for col in ['first_dates', 'second_dates'])
assert all(col in future_df for col in ['first_dates', 'second_dates'])

In [None]:
#| hide
# test add date features one hot encoded
date_features = ['year', 'month']
date_features_to_one_hot = ['month']
df_date_features, future_df = _NixtlaClientModel(
    client=nixtla_client.client,
    h=12, 
    freq='D', 
    date_features=date_features,
    date_features_to_one_hot=date_features_to_one_hot,
).add_date_features(df_,  X_df=None)

In [None]:
#| hide
# test future dataframe for multiple series
df_ = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-with-ex-vars.csv')
df_actual_future = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-future-ex-vars.csv')
df_future = _NixtlaClientModel(
        client=nixtla_client.client, 
        h=24, 
        freq='H',
    ).make_future_dataframe(df_[['unique_id', 'ds', 'y']])
pd.testing.assert_frame_equal(
    df_actual_future[['unique_id', 'ds']],
    df_future,
)

In [None]:
# test pass dataframe with index
df_ds_index = df_.set_index('ds')[['unique_id', 'y']]
df_ds_index.index = pd.DatetimeIndex(df_ds_index.index)
fcst_inferred_df_index = nixtla_client.forecast(df_ds_index, h=10)
anom_inferred_df_index = nixtla_client.detect_anomalies(df_ds_index)
fcst_inferred_df = nixtla_client.forecast(df_[['ds', 'unique_id', 'y']], h=10)
anom_inferred_df = nixtla_client.detect_anomalies(df_[['ds', 'unique_id', 'y']])
pd.testing.assert_frame_equal(fcst_inferred_df_index, fcst_inferred_df, atol=1e-3)
pd.testing.assert_frame_equal(anom_inferred_df_index, anom_inferred_df, atol=1e-3)
df_ds_index = df_ds_index.groupby('unique_id').tail(80)
for freq in ['Y', 'W-MON', 'Q-DEC', 'H']:
    df_ds_index.index = np.concatenate(
        df_ds_index['unique_id'].nunique() * [pd.date_range(end='2023-01-01', periods=80, freq=freq)]
    )
    df_ds_index.index.name = 'ds'
    fcst_inferred_df_index = nixtla_client.forecast(df_ds_index, h=10)
    df_test = df_ds_index.reset_index()
    fcst_inferred_df = nixtla_client.forecast(df_test, h=10)
    pd.testing.assert_frame_equal(fcst_inferred_df_index, fcst_inferred_df, atol=1e-3)

In [None]:
#| hide
# test add date features with exogenous variables 
# and multiple series
date_features = ['year', 'month']
df_date_features, future_df = _NixtlaClientModel(
    client=nixtla_client.client,
    h=24, 
    freq='H', 
    date_features=date_features,
    date_features_to_one_hot=None,
).add_date_features(df_,  X_df=df_actual_future)
assert all(col in df_date_features for col in date_features)
assert all(col in future_df for col in date_features)
pd.testing.assert_frame_equal(
    df_date_features[df_.columns],
    df_,
)
pd.testing.assert_frame_equal(
    future_df[df_actual_future.columns],
    df_actual_future,
)

In [None]:
#| hide
# test add date features one hot with exogenous variables 
# and multiple series
date_features = ['month', 'day']
df_date_features, future_df = _NixtlaClientModel(
    client=nixtla_client.client,
    h=24, 
    freq='H', 
    date_features=date_features,
    date_features_to_one_hot=date_features,
).add_date_features(df_,  X_df=df_actual_future)
pd.testing.assert_frame_equal(
    df_date_features[df_.columns],
    df_,
)
pd.testing.assert_frame_equal(
    future_df[df_actual_future.columns],
    df_actual_future,
)

In [None]:
#| hide
# test warning horizon too long
nixtla_client.forecast(df=df.tail(3), h=100, time_col='timestamp', target_col='value')

In [None]:
#| hide 
# test short horizon with add_history
test_fail(
    lambda: nixtla_client.forecast(df=df.tail(3), h=12, time_col='timestamp', target_col='value', add_history=True),
    contains='be sure'
)

In [None]:
#| hide 
# test short horizon with finetunning
test_fail(
    lambda: nixtla_client.forecast(df=df.tail(3), h=12, time_col='timestamp', target_col='value', finetune_steps=10, finetune_loss='mae'),
    contains='be sure'
)

In [None]:
#| hide 
# test short horizon with level
test_fail(
    lambda: nixtla_client.forecast(df=df.tail(3), h=12, time_col='timestamp', target_col='value', level=[80, 90]),
    contains='be sure'
)

In [None]:
#| hide
# test custom url
# same results
_timegpt_fcst_df = _nixtla_client.forecast(df=df, h=12, time_col='timestamp', target_col='value')
timegpt_fcst_df = nixtla_client.forecast(df=df, h=12, time_col='timestamp', target_col='value')
pd.testing.assert_frame_equal(
    _timegpt_fcst_df,
    timegpt_fcst_df,
)

In [None]:
#| hide
# test using index as time_col
# same results
df_test = df.copy()
df_test["timestamp"] = pd.to_datetime(df_test["timestamp"])
df_test.set_index(df_test["timestamp"], inplace=True)
df_test.drop(columns="timestamp", inplace=True)

# Using user_provided time_col and freq
timegpt_anomalies_df_1 = nixtla_client.detect_anomalies(df, time_col='timestamp', target_col='value', freq= 'M')
# Infer time_col and freq from index
timegpt_anomalies_df_2 = nixtla_client.detect_anomalies(df_test, time_col='timestamp', target_col='value')

pd.testing.assert_frame_equal(
    timegpt_anomalies_df_1,
    timegpt_anomalies_df_2 
)

In [None]:
#| hide
# test for showing the correct warning if X_df is missing but df has exogenous columns
class WarningsHandler(logging.Handler):
    def handle(self, record):
        if record.levelno == logging.WARN:
          warnings.warn(record.getMessage())
        return record

log = logging.getLogger()
log.addHandler(WarningsHandler())

df = generate_series(n_series=2, min_length=5, max_length=20, n_static_features=3)
missing_exogenous = df.drop(columns=['unique_id', 'ds', 'y']).columns
missing_exogenous_str = ', '.join(missing_exogenous)
expected_warning = ('You did not provide X_df. '
            'Exogenous variables in df are ignored. '
            'To surpress this warning, please add X_df '
          f'with exogenous variables: {missing_exogenous_str}')

with warnings.catch_warnings(record=True) as w:
    forecasts = nixtla_client.forecast(df, h=5)
    is_expected_warning_raised = [(expected_warning in str(warning.message)) for warning in w]
    assert np.any(is_expected_warning_raised)

In [None]:
show_doc(NixtlaClient.detect_anomalies, title_level=2)