In [None]:
#default_exp models

In [None]:
#hide
from nbdev import *
%load_ext autoreload
%autoreload 2

# Models

> Uniserie models implementations.

In [None]:
#hide
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

In [None]:
#export
from itertools import count
from numbers import Number
from typing import Collection, List, Optional, Sequence, Tuple

import numpy as np
import pandas as pd
from numba import njit
from scipy.optimize import minimize

from statsforecast.arima import auto_arima_f, forecast_arima

In [None]:
#exporti
@njit
def _ses_fcst_mse(x: np.ndarray, alpha: float) -> Tuple[float, float]:
    """Perform simple exponential smoothing on a series.

    This function returns the one step ahead prediction
    as well as the mean squared error of the fit.
    """
    smoothed = x[0]
    n = x.size
    mse = 0.

    for i in range(1, n):
        smoothed = (alpha * x[i - 1] + (1 - alpha) * smoothed).item()
        error = x[i] - smoothed
        mse += error * error

    mse /= n
    forecast = alpha * x[-1] + (1 - alpha) * smoothed
    return forecast, mse


def _ses_mse(alpha: float, x: np.ndarray) -> float:
    """Compute the mean squared error of a simple exponential smoothing fit."""
    _, mse = _ses_fcst_mse(x, alpha)
    return mse


@njit
def _ses_forecast(x: np.ndarray, alpha: float) -> float:
    """One step ahead forecast with simple exponential smoothing."""
    forecast, _ = _ses_fcst_mse(x, alpha)
    return forecast


@njit
def _demand(x: np.ndarray) -> np.ndarray:
    """Extract the positive elements of a vector."""
    return x[x > 0]


@njit
def _intervals(x: np.ndarray) -> np.ndarray:
    """Compute the intervals between non zero elements of a vector."""
    y = []

    ctr = 1
    for val in x:
        if val == 0:
            ctr += 1
        else:
            y.append(ctr)
            ctr = 1

    y = np.array(y)
    return y


@njit
def _probability(x: np.ndarray) -> np.ndarray:
    """Compute the element probabilities of being non zero."""
    return (x != 0).astype(np.int32)


def _optimized_ses_forecast(x: np.ndarray,
                            bounds: Sequence[Tuple[float, float]] = [(0.1, 0.3)]
                            ) -> float:
    """Searches for the optimal alpha and computes SES one step forecast."""
    alpha = minimize(
        fun=_ses_mse,
        x0=(0,),
        args=(x,),
        bounds=bounds,
        method='L-BFGS-B'
    ).x[0]
    forecast = _ses_forecast(x, alpha)
    return forecast


@njit
def _chunk_sums(array: np.ndarray, chunk_size: int) -> np.ndarray:
    """Splits an array into chunks and returns the sum of each chunk."""
    n = array.size
    n_chunks = n // chunk_size
    sums = np.empty(n_chunks)
    for i, start in enumerate(range(0, n, chunk_size)):
        sums[i] = array[start : start + chunk_size].sum()
    return sums

In [50]:
#export
@njit
def ses(X, h, future_xreg, residuals, alpha):
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    fcst, _ = _ses_fcst_mse(y, alpha)
    mean = np.full(h, fcst, np.float32)
    return {'mean': mean}


def adida(X, h, future_xreg, residuals):
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    if (y == 0).all():
        return {'mean': np.repeat(np.float32(0), h)}
    y_intervals = _intervals(y)
    mean_interval = y_intervals.mean()
    aggregation_level = round(mean_interval)
    lost_remainder_data = len(y) % aggregation_level
    y_cut = y[lost_remainder_data:]
    aggregation_sums = _chunk_sums(y_cut, aggregation_level)
    sums_forecast = _optimized_ses_forecast(aggregation_sums)
    forecast = sums_forecast / aggregation_level
    mean = np.repeat(forecast, h)
    return {'mean': mean}


@njit
def historic_average(X, h, future_xreg, residuals):
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    mean = np.repeat(y.mean(), h)
    return {'mean': mean}


@njit
def croston_classic(X, h, future_xreg, residuals):
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    yd = _demand(y)
    yi = _intervals(y)
    ydp = _ses_forecast(yd, 0.1)
    yip = _ses_forecast(yi, 0.1)
    mean = ydp / yip
    return {'mean': mean}


@njit
def croston_sba(X, h, future_xreg, residuals):
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    mean = croston_classic(y, h, future_xreg, residuals)
    mean['mean'] *= 0.95
    return mean


def croston_optimized(X, h, future_xreg, residuals):
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    yd = _demand(y)
    yi = _intervals(y)
    ydp = _optimized_ses_forecast(yd)
    yip = _optimized_ses_forecast(yi)
    mean = ydp / yip
    return {'mean': mean}


@njit
def seasonal_window_average(
    X: np.ndarray,
    h: int,
    future_xreg,
    residuals,
    season_length: int,
    window_size: int,
) -> np.ndarray:
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    min_samples = season_length * window_size
    if y.size < min_samples:
        return {'mean': np.full(h, np.nan, np.float32)}
    season_avgs = np.zeros(season_length, np.float32)
    for i, value in enumerate(y[-min_samples:]):
        season = i % season_length
        season_avgs[season] += value / window_size
    out = np.empty(h, np.float32)
    for i in range(h):
        out[i] = season_avgs[i % season_length]
    return {'mean': out}


@njit
def seasonal_naive(X, h, future_xreg, residuals, season_length):
    return seasonal_window_average(X, h, future_xreg, residuals, season_length, 1)


def imapa(X, h, future_xreg, residuals):
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    if (y == 0).all():
        return {'mean': np.repeat(np.float32(0), h)}
    y_intervals = _intervals(y)
    mean_interval = y_intervals.mean().item()
    max_aggregation_level = round(mean_interval)
    forecasts = np.empty(max_aggregation_level, np.float32)
    for aggregation_level in range(1, max_aggregation_level + 1):
        lost_remainder_data = len(y) % aggregation_level
        y_cut = y[lost_remainder_data:]
        aggregation_sums = _chunk_sums(y_cut, aggregation_level)
        forecast = _optimized_ses_forecast(aggregation_sums)
        forecasts[aggregation_level - 1] = (forecast / aggregation_level)
    forecast = forecasts.mean()
    mean = np.repeat(forecast, h)
    return {'mean': mean}


@njit
def naive(X, h, future_xreg, residuals):
    y = X[:, 0] if X.ndim == 2 else X
    mean = np.repeat(y[-1], h).astype(np.float32)
    if residuals:
        res = np.full(y.size, np.nan, np.float32)
        res[1:] = (y - np.roll(y, 1))[1:]
        return {'mean': mean, 'residuals': res}
    return {'mean': mean}


@njit
def random_walk_with_drift(X, h, future_xreg, residuals):
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    slope = (y[-1] - y[0]) / (y.size - 1)
    mean = slope * (1 + np.arange(h)) + y[-1]
    return {'mean': mean}


@njit
def window_average(X, h, future_xreg, residuals, window_size):
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    if y.size < window_size:
        return {'mean': np.full(h, np.nan, np.float32)}
    wavg = y[-window_size:].mean()
    mean = np.repeat(wavg, h)
    return {'mean': mean}


@njit
def seasonal_exponential_smoothing(X, h, future_xreg, residuals, season_length, alpha):
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    if y.size < season_length:
        return {'mean': np.full(h, np.nan, np.float32)}
    season_vals = np.empty(season_length, np.float32)
    for i in range(season_length):
        season_vals[i] = _ses_forecast(y[i::season_length], alpha)
    out = np.empty(h, np.float32)
    for i in range(h):
        out[i] = season_vals[i % season_length]
    return {'mean': out}


@njit
def tsb(X, h, future_xreg, residuals, alpha_d, alpha_p):
    if residuals:
        raise NotImplementedError('return residuals')
    y = X[:, 0] if X.ndim == 2 else X
    if (y == 0).all():
        return {'mean': np.repeat(np.float32(0), h)}
    yd = _demand(y)
    yp = _probability(y)
    ypf = _ses_forecast(yp, alpha_p)
    ydf = _ses_forecast(yd, alpha_d)
    forecast = np.float32(ypf * ydf)
    mean = np.repeat(forecast, h)
    return {'mean': mean}

In [None]:
#export
def auto_arima(X: np.ndarray, h: int, future_xreg=None, residuals: bool = False, season_length: int = 1, 
               approximation: bool = False, level: Optional[Tuple[int]] = None) -> np.ndarray:
    y = X[:, 0] if X.ndim == 2 else X
    xreg = X[:, 1:] if (X.ndim == 2 and X.shape[1] > 1) else None
    mod = auto_arima_f(
        y, 
        xreg=xreg,
        period=season_length, 
        approximation=approximation,
        allowmean=False, allowdrift=False #not implemented yet
    )
    fcst = forecast_arima(mod, h, xreg=future_xreg, level=level)
    mean = fcst['mean']
    if residuals:
        from statsforecast.arima import fitted_arima
        res = fitted_arima(mod)
        return {'mean': mean, 'residuals': res}
    if level is None:
        return {'mean': mean}
    return {
        'mean': mean,
        **{f'lo-{l}': fcst['lower'][f'{l}%'] for l in reversed(level)},
        **{f'hi-{l}': fcst['upper'][f'{l}%'] for l in level},
    }    

In [None]:
from statsforecast.utils import AirPassengers as ap

In [None]:
auto_arima(ap, 12, season_length=12)

External regressors

In [None]:
drift = np.arange(1, ap.size + 1)
X = np.vstack([ap, np.log(drift), np.sqrt(drift)]).T

In [None]:
newdrift = np.arange(ap.size + 1, ap.size + 7 + 1).reshape(-1, 1)
newxreg = np.concatenate([np.log(newdrift), np.sqrt(newdrift)], axis=1)

In [None]:
auto_arima(X, 7, future_xreg=newxreg, season_length=12)

Confidence intervals

In [None]:
pd.DataFrame(auto_arima(ap, 12, season_length=12, level=(80, 95)))