In [7]:
# Libraries
# ==============================================================================
import pandas as pd
import numpy as np
import statsmodels
import scipy
import cython
import pmdarima
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima.arima import ARIMA
np.set_printoptions(precision=10)
pd.set_option("display.precision", 10)

print(f"Numpy version: {np.__version__}")
print(f"Statsmodels version: {statsmodels.__version__}")
print(f"Scipy version: {scipy.__version__}")
print(f"cython version: {cython.__version__}")
print(f"pdmarima version: {pmdarima.__version__}")

Numpy version: 1.23.5
Statsmodels version: 0.13.5
Scipy version: 1.8.1
cython version: 0.29.33
pdmarima version: 2.0.2


In [8]:
# Download data
# ==============================================================================
url = ('https://raw.githubusercontent.com/JoaquinAmatRodrigo/skforecast/master/data/h2o.csv')
data = pd.read_csv(url, sep=',', header=0, names=['y', 'datetime'])

# Data preprocessing
# ==============================================================================
data['datetime'] = pd.to_datetime(data['datetime'], format='%Y/%m/%d')
data = data.set_index('datetime')
data = data.asfreq('MS')
data = data['y']
data = data.sort_index()

# Train-test dates
# ==============================================================================
end_train = '2005-06-01 23:59:00'

print(f"Train dates : {data.index.min()} --- {data.loc[:end_train].index.max()}  (n={len(data.loc[:end_train])})")
print(f"Test dates  : {data.loc[end_train:].index.min()} --- {data.index.max()}  (n={len(data.loc[end_train:])})")


Train dates : 1991-07-01 00:00:00 --- 2005-06-01 00:00:00  (n=168)
Test dates  : 2005-07-01 00:00:00 --- 2008-06-01 00:00:00  (n=36)


## SARIMAX(endog = data, trend=None, order=(1, 0, 0))

In [9]:
# SARIMAX(endog = data, trend=None, order=(1, 0, 0))
# ======================================================================================
model = SARIMAX(endog = data, trend=None, order=(1, 0, 0))
model_fit = model.fit(
    disp = False,
    maxiter = 1000,
    start_params = [0.98257102, 0.02409318],
    method = 'nm', ftol = 1e-19,
    #method = 'lbfgs', pgtol = 1e-19
)
print(model.start_params)
print(model_fit.arparams)
print(model_fit.maparams)
print(model_fit.fittedvalues)

[0.9825710245 0.0240931825]
[0.979504292]
[]
datetime
1991-07-01    0.0000000000
1991-08-01    0.4209860472
1991-09-01    0.3926891477
1991-10-01    0.4233015953
1991-11-01    0.4824479825
                  ...     
2008-02-01    1.1949374455
2008-03-01    0.7462079187
2008-04-01    0.6361243699
2008-05-01    0.8109188698
2008-06-01    0.7995252759
Freq: MS, Length: 204, dtype: float64




## SARIMAX(endog = data, trend=None, order=(1, 1, 0))

In [10]:
# SARIMAX(endog = data, trend=None, order=(1, 1, 0))
# ======================================================================================
model = SARIMAX(endog = data, trend=None, order=(1, 1, 0))
model_fit = model.fit(
    disp=False,
    maxiter=1000,
    start_params=[-0.102745578, 0.024146424],
    method = 'nm', ftol = 1e-19,
    #method = 'lbfgs', pgtol = 1e-19
)
print(model.start_params)
print(model_fit.arparams)
print(model_fit.maparams)
print(model_fit.fittedvalues)

[-0.102745578  0.024146424]
[-0.1022599468]
[]
datetime
1991-07-01    0.0000000000
1991-08-01    0.4297949989
1991-09-01    0.4038601876
1991-10-01    0.4289630699
1991-11-01    0.4863681354
                  ...     
2008-02-01    1.2155078268
2008-03-01    0.8086692246
2008-04-01    0.6609276886
2008-05-01    0.8096385080
2008-06-01    0.8174444877
Freq: MS, Length: 204, dtype: float64


## SARIMAX(endog = data, trend=None, order=(1, 1, 1))

In [11]:
# SARIMAX(endog = data, trend=None, order=(1, 1, 1))
# ======================================================================================
model = SARIMAX(endog = data, trend=None, order=(1, 1, 1))
model_fit = model.fit(
    disp=False,
    maxiter=1000,
    start_params=[0.4875082624, -0.5989362163,0.0243603503],
    method = 'nm', ftol = 1e-19,
    #method = 'lbfgs', pgtol = 1e-19
    )
print(model.start_params)
print(model_fit.arparams)
print(model_fit.maparams)
print(model_fit.fittedvalues)

[ 0.4875082624 -0.5989362163  0.0243603503]
[0.6177132372]
[-0.9557200142]
datetime
1991-07-01    0.0000000000
1991-08-01    0.4297949980
1991-09-01    0.4063654072
1991-10-01    0.4298862871
1991-11-01    0.4760827058
                  ...     
2008-02-01    1.1274931215
2008-03-01    0.8283149805
2008-04-01    0.7509712113
2008-05-01    0.8646092162
2008-06-01    0.8552828441
Freq: MS, Length: 204, dtype: float64


## SARIMAX(endog = data, trend='t', order=(1, 1, 1))

In [12]:
# SARIMAX(endog = data, trend=None, order=(1, 1, 1))
# ======================================================================================
model = SARIMAX(endog = data, trend='t', order=(1, 1, 1))
model_fit = model.fit(
    disp=False,
    maxiter=1000,
    start_params=[ 0.0017420639,  0.4889999095, -0.6005869814,  0.0243575915],
    method = 'nm', ftol = 1e-19,
    #method = 'lbfgs', pgtol = 1e-19
    )
print(model.start_params)
print(model_fit.arparams)
print(model_fit.maparams)
print(model_fit.fittedvalues)

[-5.0512540678e-07  4.8750952742e-01 -5.9893656779e-01  2.4360346789e-02]
[0.6230455177]
[-0.9767430564]
datetime
1991-07-01    0.0000173640
1991-08-01    0.4298123621
1991-09-01    0.4063608818
1991-10-01    0.4299647832
1991-11-01    0.4762422798
                  ...     
2008-02-01    1.1525708038
2008-03-01    0.8593619961
2008-04-01    0.7857724962
2008-05-01    0.8992579770
2008-06-01    0.8914088057
Freq: MS, Length: 204, dtype: float64




## SARIMAX(endog = data, trend='c', order=(5, 1, 1))

In [13]:
model = SARIMAX(endog = data, trend='c', order=(5, 1, 1))
model_fit = model.fit(
    disp=False,
    maxiter=1000,
    start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281, -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
    method = 'nm', ftol = 1e-19,
    #method = 'lbfgs', pgtol = 1e-19
)
print(model.start_params)
print(model_fit.arparams)
print(model_fit.maparams)
print(model_fit.fittedvalues)

[ 0.0021900556 -0.064092174  -0.0753559759 -0.0571171281 -0.1501473047
 -0.1703502764 -0.0757780361  0.0234484813]
[ 0.5212214715 -0.0115796151 -0.0182522661 -0.133766377  -0.2364905463]
[-0.9767836515]
datetime
1991-07-01    0.0024623379
1991-08-01    0.4322573368
1991-09-01    0.4065795367
1991-10-01    0.4334624015
1991-11-01    0.4876379688
                  ...     
2008-02-01    1.0752513473
2008-03-01    0.8236887871
2008-04-01    0.7533300582
2008-05-01    0.8510151391
2008-06-01    0.8973227134
Freq: MS, Length: 204, dtype: float64




## pmd.arima

In [15]:
model = ARIMA(
    order = (5, 1, 1),
    trend = 'c',
    maxiter = 1000,
    start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281, -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
    method = 'nm',
    ftol = 1e-19,
)

model.fit(data)
print(model.start_params)
print(model.arparams)
print(model.maparams)
print(model.fittedvalues)

[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281, -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813]
<bound method ARIMA.arparams of ARIMA(maxiter=1000, method='nm', order=(5, 1, 1),
      start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281,
                    -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
      trend='c')>
<bound method ARIMA.maparams of ARIMA(maxiter=1000, method='nm', order=(5, 1, 1),
      start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281,
                    -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
      trend='c')>
<bound method ARIMA.fittedvalues of ARIMA(maxiter=1000, method='nm', order=(5, 1, 1),
      start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281,
                    -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
      trend='c')>




# JAVI

In [1]:
# Libraries
# ==============================================================================
import pandas as pd
import numpy as np
import statsmodels
import scipy
import cython
import pmdarima
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima.arima import ARIMA
np.set_printoptions(precision=10)
pd.set_option("display.precision", 10)

print(f"Numpy version: {np.__version__}")
print(f"Statsmodels version: {statsmodels.__version__}")
print(f"Scipy version: {scipy.__version__}")
print(f"cython version: {cython.__version__}")
print(f"pdmarima version: {pmdarima.__version__}")

Numpy version: 1.23.5
Statsmodels version: 0.13.5
Scipy version: 1.8.1
cython version: 0.29.32
pdmarima version: 2.0.1


In [2]:
# Download data
# ==============================================================================
url = ('https://raw.githubusercontent.com/JoaquinAmatRodrigo/skforecast/master/data/h2o.csv')
data = pd.read_csv(url, sep=',', header=0, names=['y', 'datetime'])

# Data preprocessing
# ==============================================================================
data['datetime'] = pd.to_datetime(data['datetime'], format='%Y/%m/%d')
data = data.set_index('datetime')
data = data.asfreq('MS')
data = data['y']
data = data.sort_index()

# Train-test dates
# ==============================================================================
end_train = '2005-06-01 23:59:00'

print(f"Train dates : {data.index.min()} --- {data.loc[:end_train].index.max()}  (n={len(data.loc[:end_train])})")
print(f"Test dates  : {data.loc[end_train:].index.min()} --- {data.index.max()}  (n={len(data.loc[end_train:])})")


Train dates : 1991-07-01 00:00:00 --- 2005-06-01 00:00:00  (n=168)
Test dates  : 2005-07-01 00:00:00 --- 2008-06-01 00:00:00  (n=36)


## SARIMAX(endog = data, trend=None, order=(1, 0, 0))

In [None]:
[0.9825710245 0.0240931825]
[0.979504292]
[]
datetime
1991-07-01    0.0000000000
1991-08-01    0.4209860472
1991-09-01    0.3926891477
1991-10-01    0.4233015953
1991-11-01    0.4824479825
                  ...     
2008-02-01    1.1949374455
2008-03-01    0.7462079187
2008-04-01    0.6361243699
2008-05-01    0.8109188698
2008-06-01    0.7995252759
Freq: MS, Length: 204, dtype: float64

In [3]:
# SARIMAX(endog = data, trend=None, order=(1, 0, 0))
# ======================================================================================
model = SARIMAX(endog = data, trend=None, order=(1, 0, 0))
model_fit = model.fit(
    disp = False,
    maxiter = 1000,
    start_params = [0.98257102, 0.02409318],
    method = 'nm', ftol = 1e-19,
    #method = 'lbfgs', pgtol = 1e-19
)
print(model.start_params)
print(model_fit.arparams)
print(model_fit.maparams)
print(model_fit.fittedvalues)

[0.9825710245 0.0240931825]
[0.979504292]
[]
datetime
1991-07-01    0.0000000000
1991-08-01    0.4209860472
1991-09-01    0.3926891477
1991-10-01    0.4233015953
1991-11-01    0.4824479825
                  ...     
2008-02-01    1.1949374455
2008-03-01    0.7462079187
2008-04-01    0.6361243699
2008-05-01    0.8109188698
2008-06-01    0.7995252759
Freq: MS, Length: 204, dtype: float64




## SARIMAX(endog = data, trend=None, order=(1, 1, 0))

In [None]:
[-0.102745578  0.024146424]
[-0.1022599468]
[]
datetime
1991-07-01    0.0000000000
1991-08-01    0.4297949989
1991-09-01    0.4038601876
1991-10-01    0.4289630699
1991-11-01    0.4863681354
                  ...     
2008-02-01    1.2155078268
2008-03-01    0.8086692246
2008-04-01    0.6609276886
2008-05-01    0.8096385080
2008-06-01    0.8174444877

In [4]:
# SARIMAX(endog = data, trend=None, order=(1, 1, 0))
# ======================================================================================
model = SARIMAX(endog = data, trend=None, order=(1, 1, 0))
model_fit = model.fit(
    disp=False,
    maxiter=1000,
    start_params=[-0.102745578, 0.024146424],
    method = 'nm', ftol = 1e-19,
    #method = 'lbfgs', pgtol = 1e-19
)
print(model.start_params)
print(model_fit.arparams)
print(model_fit.maparams)
print(model_fit.fittedvalues)

[-0.102745578  0.024146424]
[-0.1022599468]
[]
datetime
1991-07-01    0.0000000000
1991-08-01    0.4297949989
1991-09-01    0.4038601876
1991-10-01    0.4289630699
1991-11-01    0.4863681354
                  ...     
2008-02-01    1.2155078268
2008-03-01    0.8086692246
2008-04-01    0.6609276886
2008-05-01    0.8096385080
2008-06-01    0.8174444877
Freq: MS, Length: 204, dtype: float64


## SARIMAX(endog = data, trend=None, order=(1, 1, 1))

In [None]:
[ 0.4875082624 -0.5989362163  0.0243603503]
[0.6177132372]
[-0.9557200142]
datetime
1991-07-01    0.0000000000
1991-08-01    0.4297949980
1991-09-01    0.4063654072
1991-10-01    0.4298862871
1991-11-01    0.4760827058
                  ...     
2008-02-01    1.1274931215
2008-03-01    0.8283149805
2008-04-01    0.7509712113
2008-05-01    0.8646092162
2008-06-01    0.8552828441
Freq: MS, Length: 204, dtype: float64

In [5]:
# SARIMAX(endog = data, trend=None, order=(1, 1, 1))
# ======================================================================================
model = SARIMAX(endog = data, trend=None, order=(1, 1, 1))
model_fit = model.fit(
    disp=False,
    maxiter=1000,
    start_params=[0.4875082624, -0.5989362163,0.0243603503],
    method = 'nm', ftol = 1e-19,
    #method = 'lbfgs', pgtol = 1e-19
    )
print(model.start_params)
print(model_fit.arparams)
print(model_fit.maparams)
print(model_fit.fittedvalues)

[ 0.4875082624 -0.5989362163  0.0243603503]
[0.6177132372]
[-0.9557200142]
datetime
1991-07-01    0.0000000000
1991-08-01    0.4297949980
1991-09-01    0.4063654072
1991-10-01    0.4298862871
1991-11-01    0.4760827058
                  ...     
2008-02-01    1.1274931215
2008-03-01    0.8283149805
2008-04-01    0.7509712113
2008-05-01    0.8646092162
2008-06-01    0.8552828441
Freq: MS, Length: 204, dtype: float64


## SARIMAX(endog = data, trend='t', order=(1, 1, 1))

In [None]:
[-5.0512540678e-07  4.8750952742e-01 -5.9893656779e-01  2.4360346789e-02]
[0.6230455177]
[-0.9767430564]
datetime
1991-07-01    0.0000173640
1991-08-01    0.4298123621
1991-09-01    0.4063608818
1991-10-01    0.4299647832
1991-11-01    0.4762422798
                  ...     
2008-02-01    1.1525708038
2008-03-01    0.8593619961
2008-04-01    0.7857724962
2008-05-01    0.8992579770
2008-06-01    0.8914088057
Freq: MS, Length: 204, dtype: float64

In [6]:
# SARIMAX(endog = data, trend=None, order=(1, 1, 1))
# ======================================================================================
model = SARIMAX(endog = data, trend='t', order=(1, 1, 1))
model_fit = model.fit(
    disp=False,
    maxiter=1000,
    start_params=[ 0.0017420639,  0.4889999095, -0.6005869814,  0.0243575915],
    method = 'nm', ftol = 1e-19,
    #method = 'lbfgs', pgtol = 1e-19
    )
print(model.start_params)
print(model_fit.arparams)
print(model_fit.maparams)
print(model_fit.fittedvalues)

[-5.0512540678e-07  4.8750952742e-01 -5.9893656779e-01  2.4360346789e-02]
[0.6230455356]
[-0.9767430577]
datetime
1991-07-01    0.0000173640
1991-08-01    0.4298123621
1991-09-01    0.4063608815
1991-10-01    0.4299647834
1991-11-01    0.4762422807
                  ...     
2008-02-01    1.1525708083
2008-03-01    0.8593619929
2008-04-01    0.7857724912
2008-05-01    0.8992579753
2008-06-01    0.8914088039
Freq: MS, Length: 204, dtype: float64


## SARIMAX(endog = data, trend='c', order=(5, 1, 1))

In [None]:
[ 0.0021900556 -0.064092174  -0.0753559759 -0.0571171281 -0.1501473047
 -0.1703502764 -0.0757780361  0.0234484813]
[ 0.5212214715 -0.0115796151 -0.0182522661 -0.133766377  -0.2364905463]
[-0.9767836515]
datetime
1991-07-01    0.0024623379
1991-08-01    0.4322573368
1991-09-01    0.4065795367
1991-10-01    0.4334624015
1991-11-01    0.4876379688
                  ...     
2008-02-01    1.0752513473
2008-03-01    0.8236887871
2008-04-01    0.7533300582
2008-05-01    0.8510151391
2008-06-01    0.8973227134
Freq: MS, Length: 204, dtype: float64

In [7]:
model = SARIMAX(endog = data, trend='c', order=(5, 1, 1))
model_fit = model.fit(
    disp=False,
    maxiter=1000,
    start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281, -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
    method = 'nm', ftol = 1e-19,
    #method = 'lbfgs', pgtol = 1e-19
)
print(model.start_params)
print(model_fit.arparams)
print(model_fit.maparams)
print(model_fit.fittedvalues)

[ 0.0021900556 -0.064092174  -0.0753559759 -0.0571171281 -0.1501473047
 -0.1703502764 -0.0757780361  0.0234484813]
[ 0.5212214715 -0.0115796151 -0.0182522661 -0.133766377  -0.2364905463]
[-0.9767836515]
datetime
1991-07-01    0.0024623379
1991-08-01    0.4322573368
1991-09-01    0.4065795367
1991-10-01    0.4334624015
1991-11-01    0.4876379688
                  ...     
2008-02-01    1.0752513473
2008-03-01    0.8236887871
2008-04-01    0.7533300582
2008-05-01    0.8510151391
2008-06-01    0.8973227134
Freq: MS, Length: 204, dtype: float64




## pmd.arima

In [None]:
[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281, -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813]
<bound method ARIMA.arparams of ARIMA(maxiter=1000, method='nm', order=(5, 1, 1),
      start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281,
                    -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
      trend='c')>
<bound method ARIMA.maparams of ARIMA(maxiter=1000, method='nm', order=(5, 1, 1),
      start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281,
                    -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
      trend='c')>
<bound method ARIMA.fittedvalues of ARIMA(maxiter=1000, method='nm', order=(5, 1, 1),
      start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281,
                    -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
      trend='c')>

In [8]:
model = ARIMA(
    order = (5, 1, 1),
    trend = 'c',
    maxiter = 1000,
    start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281, -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
    method = 'nm',
    ftol = 1e-19,
)

model.fit(data)
print(model.start_params)
print(model.arparams)
print(model.maparams)
print(model.fittedvalues)

[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281, -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813]
<bound method ARIMA.arparams of ARIMA(maxiter=1000, method='nm', order=(5, 1, 1),
      start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281,
                    -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
      trend='c')>
<bound method ARIMA.maparams of ARIMA(maxiter=1000, method='nm', order=(5, 1, 1),
      start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281,
                    -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
      trend='c')>
<bound method ARIMA.fittedvalues of ARIMA(maxiter=1000, method='nm', order=(5, 1, 1),
      start_params=[0.0021900556, -0.064092174, -0.0753559759, -0.0571171281,
                    -0.1501473047, -0.1703502764, -0.0757780361, 0.0234484813],
      trend='c')>


