In [1]:
import pmdarima.arima
from pmdarima.arima import auto_arima

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from statsmodels.tsa.statespace.sarimax import SARIMAX

from data.utils import get_week_range_df, to_week_range
from models.ar_model import ARModelSpecification, create_ar_models_report
from models.persistence_model import create_persistence

# Global set-up
pd.options.mode.chained_assignment = None  # default='warn'
plt.rcParams['axes.grid'] = True
sns.set_style("whitegrid")

# Global set-up
pd.options.mode.chained_assignment = None  # default='warn'
plt.rcParams['axes.grid'] = True
sns.set_style("whitegrid")

In [2]:
GROUND_TRUTH_COLUMN = 'Disease Rate'
BASELINE_SHIFT = 1
PERSISTENCE_COL_NAME = 'Persistence'
LAGS = 20

In [3]:
DF = get_week_range_df('week range')
DF = create_persistence(DF, BASELINE_SHIFT, persistance_col_name=PERSISTENCE_COL_NAME)

In [4]:
train_interval = (to_week_range(2004, 2), to_week_range(2008, 52))
test_interval = [to_week_range(2009, 1), to_week_range(2009, 52)]

In [5]:
TRAIN_INTERVALS = [(to_week_range(2004, 2), to_week_range(2008, 52)),
                   (to_week_range(2005, 1), to_week_range(2009, 52)),
                   (to_week_range(2006, 2), to_week_range(2010, 52)),
                   (to_week_range(2007, 2), to_week_range(2011, 52)),
                   (to_week_range(2008, 2), to_week_range(2012, 52)),
                   (to_week_range(2009, 2), to_week_range(2013, 52)),
                   (to_week_range(2010, 2), to_week_range(2014, 52)),
                   (to_week_range(2011, 2), to_week_range(2015, 52)),
                   (to_week_range(2012, 2), to_week_range(2016, 52)),
                   (to_week_range(2013, 2), to_week_range(2017, 52)),
                   ]
print('# training windows = %d' % len(TRAIN_INTERVALS))

TEST_INTERVALS = [(to_week_range(2009, 1), to_week_range(2009, 52)),
                  (to_week_range(2010, 1), to_week_range(2010, 52)),
                  (to_week_range(2011, 2), to_week_range(2011, 52)),
                  (to_week_range(2012, 2), to_week_range(2012, 52)),
                  (to_week_range(2013, 2), to_week_range(2013, 52)),
                  (to_week_range(2014, 2), to_week_range(2014, 52)),
                  (to_week_range(2015, 2), to_week_range(2015, 52)),
                  (to_week_range(2016, 2), to_week_range(2016, 52)),
                  (to_week_range(2017, 2), to_week_range(2017, 52)),
                  (to_week_range(2018, 2), to_week_range(2018, 52)),
                  ]
print('# testing windows = %d' % len(TEST_INTERVALS))

# training windows = 10
# testing windows = 10


In [31]:
def try_intervals(output_path, test_type, start_p, start_q, max_p, max_q, d, seasonality, start_P, start_Q, max_P, max_Q, D, trace, error_action, stepwise, n_fits, n_jobs, maxiter):
    import contextlib
    with open(output_path, 'w') as f:
        with contextlib.redirect_stdout(f):
            if seasonality:
                print('Seasonality:' + seasonality)
            print('Start INTERVALS: \n\n\n')
            for (begin, end) in TRAIN_INTERVALS:
                model = auto_arima(y=DF[GROUND_TRUTH_COLUMN][begin:end], start_p=start_p, start_q=start_q,
                                   test=test_type,
                                   max_p=max_p, max_q=max_q,
                                   seasonal=seasonality,
                                   m=52,
                                   d=d,
                                   start_P=start_P,
                                   max_P=max_P,
                                   start_Q=start_Q,
                                   max_Q=max_Q,
                                   D=D,
                                   trace=trace,
                                   error_action=error_action,
                                   suppress_warnings=True,
                                   stepwise=stepwise,
                                   n_fits=n_fits,
                                   n_jobs=n_jobs,
                                   maxiter=maxiter)
                print(str(begin) + "  --->  " + str(end))
                print('\n\n')
            print('End INTERVALS.')

In [35]:
test_types = ['adf', 'kpss']
### doar astea 2?

for test in test_types:
    ## if stepwise=False, it will take brute-force all combinations from 1-10 for p, q and P, Q => no need for more loops
    max_p = 10
    max_q = 10
    max_P = 10
    max_Q = 10
    seasonality = False
    d = 1
    D = 1
    if seasonality:
        path = 'outputs/output_seasonal_{test}_({max_p},{d},{max_q})_s({max_P}, {D}, {max_Q}).txt'.format(test=test, max_p=max_p, d=d, max_q=max_q, max_P=max_P, D=D, max_Q=max_Q)
    else:
        path = 'outputs/output_{test}_({max_p},{d},{max_q}).txt'.format(test=test, max_p=max_p, d=d, max_q=max_q)
    try_intervals(path, test, start_p=1, start_q=1, max_p=max_p, max_q=max_q, d=d, seasonality=seasonality, start_P=1, start_Q=1, max_P=max_P, max_Q=max_Q, D=D, trace=True, error_action='ignore', stepwise=False, n_fits=50, n_jobs=5, maxiter=400)



In [12]:
#example
model = auto_arima(y=DF[GROUND_TRUTH_COLUMN][begin:end], start_p=5, start_q=5,
                   # test='adf',
                   max_p=12, max_q=12,
                   seasonal=seasonality,
                   m=52,
                   d=0,
                   start_P=0,
                   max_P=10,
                   start_Q=1,
                   max_Q=10,
                   D=None,
                   trace=True,
                   error_action='ignore',
                   suppress_warnings=True,
                   stepwise=False,
                   n_fits=50,
                   n_jobs=5,
                   maxiter=400)

