In [None]:
#| default_exp theta

In [None]:
#| export
import math
import os

import numpy as np
from numba import njit
from statsforecast.ets import nelder_mead
from statsmodels.tsa.seasonal import seasonal_decompose

  from tqdm.autonotebook import tqdm


# Theta Model

## thetacalc

In [None]:
#| exporti
# Global variables 
STM = 0
OTM = 1
DSTM = 2
DOTM = 3
TOL = 1.0e-10
HUGEN = 1.0e10
NA = -99999.0
smalno = np.finfo(float).eps
NOGIL = os.environ.get('NUMBA_RELEASE_GIL', 'False').lower() in ['true']
CACHE = os.environ.get('NUMBA_CACHE', 'False').lower() in ['true']

In [None]:
#| hide
from fastcore.test import test_eq
from statsforecast.utils import AirPassengers as ap

In [None]:
#| exporti
def initstate(y, modeltype, level_0, alpha, theta):
    states = np.zeros((1, 5), dtype=np.float32)
    states[0, 0] = alpha * y[0] + (1 - alpha) * level_0 # level
    states[0, 1] = y[0] #mean y
    if modeltype in [DSTM, DOTM]:
        # dynamic models
        states[0, 2] = y[0] # An
        states[0, 3] = 0 # Bn
        states[0, 4] = y[0] # mu
    else:
        # nodynamic models
        n = len(y)
        Bn = 6 * (2 * np.mean(np.arange(1, n + 1) * y) - (1 + n) * np.mean(y)) / ( n ** 2 - 1)
        An = np.mean(y) - ( n + 1) * Bn / 2
        states[0, 2] = An
        states[0, 3] = Bn
        states[0, 4] = level_0 + (1 - 1 / theta) * (An + Bn)
        
    return states

In [None]:
#| hide
level_0 = ap[0] / 2
alpha = 0.5
theta = 2
initstate(ap, modeltype=STM, level_0=level_0, alpha=alpha, theta=theta)
initstate(ap, modeltype=OTM, level_0=level_0, alpha=alpha, theta=theta)
initstate(ap, modeltype=DSTM, level_0=level_0, alpha=alpha, theta=theta)
initstate(ap, modeltype=DOTM, level_0=level_0, alpha=alpha, theta=theta)

array([[ 84., 112., 112.,   0., 112.]], dtype=float32)

In [None]:
#| exporti
#@njit(nogil=NOGIL, cache=CACHE)
def thetacalc(y: np.ndarray,
              states: np.ndarray, # states
              modeltype: int, 
              level_0: float, 
              alpha: float,
              theta: float, 
              e: np.ndarray, 
              amse: np.ndarray, 
              nmse: int) -> float:
    denom = np.zeros(nmse)
    f = np.zeros(nmse)
    # update first state
    states[0, :] = initstate(y=y, modeltype=modeltype, 
                             level_0=level_0, 
                             alpha=alpha, theta=theta) 
    
    amse[:nmse] = 0.
    e[0] = y[0] - states[0, 4]
    n = len(y)
    for i in range(1, n):
        # one step forecast 
        thetafcst(states=states, i=i, modeltype=modeltype, f=f, h=nmse, alpha=alpha, theta=theta)
        if math.fabs(f[0] - NA) < TOL:
            mse = NA
            return mse
        e[i] = y[i] - f[0]
        for j in range(nmse):
            if (i + j) < n:
                denom[j] += 1.
                tmp = y[i + j] - f[j]
                amse[j] = (amse[j] * (denom[j] - 1.0) + (tmp * tmp)) / denom[j]
        # update state
        thetaupdate(states=states, i=i, modeltype=modeltype, 
                    alpha=alpha, theta=theta, y=y[i], usemu=0)
    mse = np.sum(e[3:] ** 2) / np.mean(np.abs(y))
    return mse

In [None]:
#| exporti
#@njit(nogil=NOGIL, cache=CACHE)
def thetafcst(states, i, 
              modeltype, 
              f, h, 
              alpha, theta):
    # obs:
    # forecast are obtained in a recursive manner
    # this is not standard, for example in ets
    #forecasts
    new_states = np.zeros((h, states.shape[1]), dtype=np.float32)
    new_states = np.vstack([states[:i], new_states])
    for i_h in range(h):
        thetaupdate(states=new_states, i=i + i_h, modeltype=modeltype, 
                    alpha=alpha, theta=theta, y=0, usemu=1)
        f[i_h] = new_states[i + i_h, 4]  # mu is the forecast

In [None]:
#| exporti
#@njit(nogil=NOGIL, cache=CACHE)
def thetaupdate(states, i,
                modeltype, # kind of model 
                alpha, theta,
                y, usemu):
    # states
    # level, meany, An, Bn, mu
    # get params
    level = states[i - 1, 0]
    meany = states[i - 1, 1]
    An = states[i - 1, 2]
    Bn = states[i - 1, 3]
    # update mu
    states[i, 4] = level + (1 - 1 / theta) * (An * ((1 - alpha) ** i) + Bn * (1 - (1 - alpha)**(i + 1)) / alpha)
    if usemu:
        y = states[i, 4]
    # update level
    states[i, 0] = alpha * y + (1 - alpha) * level
    # update meany
    states[i, 1] = (i * meany + y) / (i + 1)
    # update Bn and An
    if modeltype in [DSTM, DOTM]:
        # dynamic models
        states[i, 3] = ((i - 1) * Bn + 6 * (y - meany) / (i + 1)) / (i + 2)
        states[i, 2] = states[i + 1, 1] - states[i + 1, 3] * (i + 2) / 2
    else:
        states[i, 2] = An
        states[i, 3] = Bn


In [None]:
#| exporti
#@njit(nogil=NOGIL, cache=CACHE)
def thetaforecast(states, n, modeltype, 
                  f, h, alpha, theta):
    # compute forecasts
    new_states = thetafcst(
        states=states, i=n, modeltype=modeltype, 
        f=f, h=h, 
        alpha=alpha,
        theta=theta
    ) 
    return new_states

In [None]:
#| hide
#simple theta model tests
nmse_ = len(ap)
amse_ = np.zeros(30)
e_ = np.zeros(len(ap))
level_0 = ap[0] / 2
alpha = 0.5
theta = 2.
init_states = np.zeros((len(ap), 5), dtype=np.float32)
mse = thetacalc(
    y=ap,
    states=init_states, 
    modeltype=STM, 
    level_0=level_0, alpha=alpha, theta=theta,
    e=e_, amse=amse_, nmse=3
)
#verify we recover the fitted values
np.testing.assert_array_equal(
    ap - e_,
    init_states[:, -1]
)
#verify we get same fitted values than R
# use stm(AirPassengers, s=F, estimation=F, h = 12)
# to recover
np.testing.assert_array_almost_equal(
    init_states[:, -1][[0, 1, -1]],
    np.array([101.1550, 107.9061, 449.1692]), 
    decimal=2
)
# recover mse
test_eq(np.sum(e_[3:] ** 2) / np.mean(np.abs(ap)), mse)

In [None]:
#| hide
#nonseasonal forecast test
h = 5
fcsts = np.zeros(h, dtype=np.float32)
thetaforecast(
    states=init_states, n=len(ap), 
    modeltype=STM, 
    f=fcsts, h=h, 
    alpha=alpha,
    theta=theta
)
# test same forecast than R's
np.testing.assert_array_almost_equal(
    fcsts,
    np.array([441.9132, 443.2418, 444.5704, 445.8990, 447.2276]),
    decimal=3
)

In [None]:
#| hide
#simple seasonal test
nmse_ = len(ap)
amse_ = np.zeros(30)
lik_ = 0.
e_ = np.zeros(len(ap))
alpha_0 = 1.996411
alpha_1 = 1.206694
beta_0 = 0.
beta_1 = 0.
m = 12
init_states_s_ses = np.zeros((12 * 2 + len(ap), 2), dtype=np.float32)
init_states_s_ses[:m] = initstate(ap, m, 'S')
cescalc(y=ap, 
        states=init_states_s_ses, m=12, 
        season=SIMPLE, alpha_0=alpha_0, 
        alpha_1=alpha_1, beta_0=beta_0, 
        beta_1=beta_1,
        e=e_, amse=amse_, nmse=3, backfit=1)
np.testing.assert_array_equal(
    init_states_s_ses[[0, 11, 145, 143 + 12]],
    np.array([
        [130.49458 ,  36.591137],
        [135.21922 , 121.62022 ],
        [423.57788 , 252.81241 ],
        [505.3621  ,  95.29781 ]
    ], dtype=np.float32)
)

In [None]:
#| hide
#simple seasonal forecast test
h = 13
fcsts = np.zeros(h, dtype=np.float32)
cesforecast(states=init_states_s_ses, n=len(ap), m=12, 
            season=SIMPLE, 
            f=fcsts, h=h, 
            alpha_0=alpha_0, alpha_1=alpha_1, 
            beta_0=beta_0, beta_1=beta_1)
#taken from R using ces(AirPassengers, h=13, seasonality = 'simple')
np.testing.assert_array_almost_equal(
    fcsts,
    np.array([
        446.2768, 423.5779, 481.4365, 514.7730, 533.5008,
        589.0500, 688.2703, 674.5891, 580.9486, 516.0776,
        449.7246, 505.3621, 507.9884
    ], dtype=np.float32), 
    decimal=2
)

In [None]:
#| hide
#partial seasonal test
nmse_ = len(ap)
amse_ = np.zeros(30)
lik_ = 0.
e_ = np.zeros(len(ap))
alpha_0 = 1.476837
alpha_1 = 1.
beta_0 = 0.91997
beta_1 = 0.
m = 12
init_states_p_seas = np.zeros((12 + len(ap), 3), dtype=np.float32)
init_states_p_seas[:m] = initstate(ap, m, 'P')
cescalc(y=ap, 
        states=init_states_p_seas, m=12, 
        season=2, alpha_0=alpha_0, 
        alpha_1=alpha_1, beta_0=beta_0, 
        beta_1=beta_1,
        e=e_, amse=amse_, nmse=3, backfit=1)
np.testing.assert_array_equal(
    init_states_p_seas[[0, 11, 145, 143 + 12]],
    np.array([
        [122.580666,  83.00358 ,  -9.710966],
        [122.580666,  78.11936 ,  -4.655848],
        [438.5037  , 300.70374 , -25.55726 ],
        [438.5037  , 296.92316 ,  -7.581563]
    ], dtype=np.float32)
)

In [None]:
#| hide
#partial seasonal forecast test
h = 13
fcsts = np.zeros(h, dtype=np.float32)
cesforecast(states=init_states_p_seas, n=len(ap), m=12, 
            season=PARTIAL, 
            f=fcsts, h=h, 
            alpha_0=alpha_0, alpha_1=alpha_1, 
            beta_0=beta_0, beta_1=beta_1)
#taken from R using ces(AirPassengers, h=13, seasonality = 'partial')
np.testing.assert_array_almost_equal(
    fcsts,
    np.array([
        437.6247, 412.9464, 445.5811, 498.5370, 493.0405, 550.7443, 
        629.2205, 607.1793, 512.3455, 462.1260, 383.4097, 430.9221, 437.6247
    ], dtype=np.float32), 
    decimal=2
)

In [None]:
#| hide
#full seasonal test
nmse_ = len(ap)
amse_ = np.zeros(30)
lik_ = 0.
e_ = np.zeros(len(ap))
alpha_0 = 1.350795
alpha_1 = 1.009169
beta_0 = 1.777909
beta_1 = 0.973739
m = 12
init_states_f_seas = np.zeros((12 * 2 + len(ap), 4), dtype=np.float32)
init_states_f_seas[:m] = initstate(ap, m, 'F')
cescalc(y=ap,
        states=init_states_f_seas, m=12, 
        season=3, alpha_0=alpha_0, 
        alpha_1=alpha_1, beta_0=beta_0, 
        beta_1=beta_1,
        e=e_, amse=amse_, nmse=3, backfit=1)
np.testing.assert_array_equal(
    init_states_f_seas[[0, 11, 145, 143 + 12]],
    np.array([
        [ 227.74284 ,  167.7603  ,  -94.299805,  -39.623283],
        [ 211.48921 ,  155.72342 ,  -91.62251 ,  -82.953064],
        [ 533.1726  ,  372.95758 , -139.31824 , -125.856834],
        [ 564.9041  ,  404.3251  , -130.9048  , -137.33    ]
    ], dtype=np.float32)
)

In [None]:
#| hide
#full seasonal forecast test
h = 13
fcsts = np.zeros(h, dtype=np.float32)
cesforecast(states=init_states_f_seas, n=len(ap), m=12, 
            season=FULL, 
            f=fcsts, h=h, 
            alpha_0=alpha_0, alpha_1=alpha_1, 
            beta_0=beta_0, beta_1=beta_1)
#taken from R using ces(AirPassengers, h=13, seasonality = 'full')
np.testing.assert_array_almost_equal(
    fcsts,
    np.array([
        450.9262, 429.2925, 465.4771, 510.1799, 517.9913, 578.5654,
        655.9219, 638.6218, 542.0985, 498.1064, 431.3293, 477.3273,
        501.3757
    ], dtype=np.float32), 
    decimal=2
)

In [None]:
#| exporti
@njit(nogil=NOGIL, cache=CACHE)
def initparamces(alpha_0: float, alpha_1: float, 
                 beta_0: float, beta_1: float,
                 seasontype: str):
    if np.isnan(alpha_0):
        alpha_0 = 1.3
        optimize_alpha_0 = 1
    else:
        optimize_alpha_0 = 0
    if np.isnan(alpha_1):
        alpha_1 = 1.
        optimize_alpha_1 = 1
    else:
        optimize_alpha_1 = 0
    if seasontype == 'P':
        if np.isnan(beta_0):
            beta_0 = 0.1
            optimize_beta_0 = 1
        else:
            optimize_beta_0 = 0
        beta_1 = np.nan # no optimize
        optimize_beta_1 = 0
    elif seasontype == 'F':
        if np.isnan(beta_0):
            beta_0 = 1.3
            optimize_beta_0 = 1
        else:
            optimize_beta_0 = 0
        if np.isnan(beta_1):
            beta_1 = 1.
            optimize_beta_1 = 1
        else:
            optimize_beta_1 = 0
    else:
        #no optimize
        optimize_beta_0 = 0
        optimize_beta_1 = 0
        beta_0 = np.nan
        beta_1 = np.nan
    return {'alpha_0': alpha_0, 'optimize_alpha_0': optimize_alpha_0,
            'alpha_1': alpha_1, 'optimize_alpha_1': optimize_alpha_1,
            'beta_0': beta_0, 'optimize_beta_0': optimize_beta_0,
            'beta_1': beta_1, 'optimize_beta_1': optimize_beta_1}

In [None]:
#| hide
initparamces(alpha_0=np.nan, alpha_1=np.nan, 
             beta_0=np.nan, beta_1=np.nan, 
             seasontype='N')

In [None]:
#| exporti
@njit(nogil=NOGIL, cache=CACHE)
def switch_ces(x: str):
    return {'N': 0, 'S': 1, 'P': 2, 'F': 3}[x]

In [None]:
#| hide
switch_ces('N')

In [None]:
#| exporti
@njit(nogil=NOGIL, cache=CACHE)
def pegelsresid_ces(y: np.ndarray, 
                    m: int, 
                    init_states: np.ndarray, 
                    n_components: int,
                    seasontype: str, 
                    alpha_0: float, alpha_1: float,
                    beta_0: float, beta_1: float, 
                    nmse: int):
    states = np.zeros((len(y) + 2 * m, n_components), dtype=np.float32)
    states[:m] = init_states
    e = np.full_like(y, fill_value=np.nan)
    amse = np.full(nmse, fill_value=np.nan)
    lik = cescalc(y=y, states=states, m=m, 
                  season=switch_ces(seasontype), 
                  alpha_0=alpha_0, alpha_1=alpha_1, 
                  beta_0=beta_0, beta_1=beta_1, e=e, 
                  amse=amse, nmse=nmse, backfit=1)
    if not np.isnan(lik):
        if np.abs(lik + 99999) < 1e-7:
            lik = np.nan
    return amse, e, states, lik

In [None]:
#| export
@njit(nogil=NOGIL, cache=CACHE)
def ces_target_fn(
        optimal_param,
        init_alpha_0,
        init_alpha_1,
        init_beta_0,
        init_beta_1,
        opt_alpha_0,
        opt_alpha_1,
        opt_beta_0,
        opt_beta_1,
        y,
        m,
        init_states, 
        n_components, 
        seasontype,
        nmse
    ):
    states = np.zeros((len(y) + 2 * m, n_components), dtype=np.float32)
    states[:m] = init_states
    j = 0
    if opt_alpha_0:
        alpha_0 = optimal_param[j]
        j+=1
    else:
        alpha_0 = init_alpha_0
        
    if opt_alpha_1:
        alpha_1 = optimal_param[j]
        j+=1
    else:
        alpha_1 = init_alpha_1
        
    if opt_beta_0:
        beta_0 = optimal_param[j]
        j+=1
    else:
        beta_0 = init_beta_0
        
    if opt_beta_1:
        beta_1 = optimal_param[j]
        j+=1
    else:
        beta_1 = init_beta_1
        
    e = np.full_like(y, fill_value=np.nan)
    amse = np.full(nmse, fill_value=np.nan)
    lik = cescalc(y=y, states=states, m=m, 
                  season=switch_ces(seasontype), 
                  alpha_0=alpha_0, alpha_1=alpha_1, 
                  beta_0=beta_0, beta_1=beta_1, e=e, 
                  amse=amse, nmse=nmse, backfit=1)
    if lik < -1e10: 
        lik = -1e10 
    if math.isnan(lik): 
        lik = -np.inf
    if math.fabs(lik + 99999) < 1e-7: 
        lik = -np.inf
    return lik

In [None]:
#| exporti
def optimize_ces_target_fn(
        init_par, optimize_params, y, m, init_states,
        n_components, seasontype, nmse
    ):
    x0 = [init_par[key] for key, val in optimize_params.items() if val]
    x0 = np.array(x0, dtype=np.float32)
    if not len(x0):
        return
    
    init_alpha_0 = init_par['alpha_0']
    init_alpha_1 = init_par['alpha_1']
    init_beta_0 = init_par['beta_0']
    init_beta_1 = init_par['beta_1']
    
    opt_alpha_0 = optimize_params['alpha_0']
    opt_alpha_1 = optimize_params['alpha_1']
    opt_beta_0 = optimize_params['beta_0']
    opt_beta_1 = optimize_params['beta_1']
    
    res = nelder_mead(
        ces_target_fn, x0, 
        args=(init_alpha_0, init_alpha_1, init_beta_0, init_beta_1,
              opt_alpha_0, opt_alpha_1, opt_beta_0, opt_beta_1,
              y, m, init_states, n_components, seasontype, nmse),
        tol_std=1e-4, 
        lower=np.array([0.01, 0.01, 0.01, 0.01]),
        upper=np.array([1.8, 1.9, 1.5, 1.5]),
        max_iter=1_000,
        adaptive=True,
    )
    return res

In [None]:
#| exporti
def cesmodel(y: np.ndarray, m: int, 
             seasontype: str, 
             alpha_0: float, alpha_1: float,
             beta_0: float, beta_1: float, nmse: int):
    if seasontype == 'N':
        m = 1
    #initial parameters
    par = initparamces(alpha_0, alpha_1, beta_1, beta_0, seasontype)
    optimize_params = {key.replace('optimize_', ''): val for key, val in par.items() if 'optim' in key}
    par = {key: val for key, val in par.items() if 'optim' not in key}
    # initial states
    init_state = initstate(y, m, seasontype)
    n_components = init_state.shape[1]
    # parameter optimization
    fred = optimize_ces_target_fn(
        init_par=par, optimize_params=optimize_params, y=y, m=m, init_states=init_state, 
        n_components=n_components, seasontype=seasontype, nmse=nmse
    )
    if fred is not None:
        fit_par = fred.x
    j = 0
    if optimize_params['alpha_0']:
        par['alpha_0'] = fit_par[j]
        j += 1
    if optimize_params['alpha_1']:
        par['alpha_1'] = fit_par[j]
        j += 1
    if optimize_params['beta_0']:
        par['beta_0'] = fit_par[j]
        j += 1
    if optimize_params['beta_1']:
        par['beta_1'] = fit_par[j]
        j += 1
    
    amse, e, states, lik = pegelsresid_ces(
        y=y, m=m, init_states=init_state, 
        n_components=n_components, seasontype=seasontype,
        nmse=nmse, **par
    )
    np_ = n_components + 1
    ny = len(y)
    aic = lik + 2 * np_
    bic = lik + np.log(ny) * np_
    if ny - np_ - 1 != 0.:
        aicc = aic + 2 * np_ * (np_ + 1) / (ny - np_ - 1)
    else:
        aicc = np.inf
    
    mse = amse[0]
    amse = np.mean(amse)
    
    return dict(loglik=-0.5 * lik, aic=aic, bic=bic, aicc=aicc,
                mse=mse, amse=amse, fit=fred, residuals=e,
                m=m, states=states, par=par, n=len(y), 
                seasontype=seasontype)

In [None]:
#| hide
res = cesmodel(
    y=ap, m=12, seasontype='N',
    alpha_0=np.nan,
    alpha_1=np.nan,
    beta_0=np.nan, 
    beta_1=np.nan,
    nmse=3
)

In [None]:
#| exporti
def pegelsfcast_C(h, obj, npaths=None, level=None, bootstrap=None):
    forecast = np.full(h, fill_value=np.nan)
    m = obj['m']
    n = obj['n']
    states = obj['states']
    cesforecast(states=states, n=n, m=m, season=switch_ces(obj['seasontype']), 
                h=h, f=forecast, **obj['par'])
    return forecast

In [None]:
#| exporti
def forecast_ces(obj, h):
    fcst = pegelsfcast_C(h, obj)
    out = {'mean': fcst}
    return out

In [None]:
forecast_ces(res, 12)

In [None]:
#| exporti
def auto_ces(y, m, model='Z', 
             alpha_0=None, alpha_1=None, 
             beta_0=None, beta_1=None,
             opt_crit='lik', nmse=3, 
             ic='aicc'):
    # converting params to floats 
    # to improve numba compilation
    if alpha_0 is None:
        alpha_0 = np.nan
    if alpha_1 is None:
        alpha_1 = np.nan
    if beta_0 is None:
        beta_0 = np.nan
    if beta_1 is None:
        beta_1 = np.nan
    if nmse < 1 or nmse > 30:
        raise ValueError('nmse out of range')
    #refit model not implement yet
    if model not in ['Z', 'N', 'S', 'P', 'F']:
        raise ValueError('Invalid model type')

    seasontype = model
    if m < 1 or len(y) <= m or m == 1:
        seasontype = 'N'
    n = len(y)
    npars = 2 
    if seasontype == 'P':
        npars += 1 
    if seasontype  in ['F', 'Z']:
        npars += 2 
    #ses for non-optimized tiny datasets
    if n <= npars:
        #we need HoltWintersZZ function
        raise NotImplementedError('tiny datasets')
    if seasontype == 'Z':
        seasontype = ['N', 'S', 'P', 'F']
    best_ic = np.inf
    for stype in seasontype:
        fit = cesmodel(y=y, m=m, seasontype=stype,
                       alpha_0=alpha_0, alpha_1=alpha_1,
                       beta_0=beta_0, beta_1=beta_1, nmse=nmse)
        fit_ic = fit[ic]
        if not np.isnan(fit_ic):
            if fit_ic < best_ic:
                model = fit
                best_ic = fit_ic
    if np.isinf(best_ic):
        raise Exception('no model able to be fitted')
    return model

In [None]:
#| hide
import matplotlib.pyplot as plt
res = auto_ces(ap, m=12, model='F')
fcst = forecast_ces(res, 12)
plt.plot(np.arange(0, len(ap)), ap)
plt.plot(np.arange(len(ap), len(ap) + 12), fcst['mean'])