# Build ARMA models

This notebook aims at tuning the ARMA models for multiple train/test sample and get the $(p, q)$ with the lowest error on all folds.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
sys.path.append('../script/')

from loading import load_data    
from functions import mean_absolute_percentage_error, normalized_root_mean_squared_error
from feature_functions import create_date_features
from model_functions import create_local_train_test
from model_functions import build_arma_model, pred_arma_train, pred_arma_test, build_arima_model
import statsmodels.api as sm
from model_functions import get_df_arma

%load_ext autoreload
%autoreload 2

# Load the Data
data = load_data()
data.rename(columns={'puissance': 'y'}, inplace=True)
data = data[data['date'] != '2015-09-13 00:00:00'].reset_index(drop=True)

# Get the train
train = data[data['type'] == 'train'].copy()
test = data[data['type'] == 'test'].copy()
train.drop('type', axis=1, inplace=True)
test.drop('type', axis=1, inplace=True)
train = pd.concat([train, test.iloc[[0]]])

train['diff_y_h'] = train['y'].diff(1)

  from pandas.core import datetools


In [5]:
def evaluate_model(n_test, train, test_size, target, params, maxiter=50):
    train_size = train.shape[0]
    mape_errors = []
    nrmse_errors = []
    for i in range(n_test):
        start_test = train_size - test_size*(i+1)
        end_test = start_test + test_size
        local_train = train.iloc[:start_test].copy()
        local_test = train.iloc[start_test:end_test].copy()

        model = build_arima_model(local_train, target, params=params, adfuller_test=False, maxiter=maxiter)
        local_train['y_hat_arma'] = pred_arma_train(model, local_train, local_test)
        local_test['y_hat_arma'] = pred_arma_test(model, local_train, local_test, target)
        local_train['error_arma'] = local_train['y'] - local_train['y_hat_arma']
        local_test['error_arma'] = local_test['y'] - local_test['y_hat_arma']

        mape_arma = mean_absolute_percentage_error(local_test['y'], local_test['y_hat_arma'])
        nrmse_arma = normalized_root_mean_squared_error(local_test['y'], local_test['y_hat_arma'])
        
        mape_errors.append(mape_arma)
        nrmse_errors.append(nrmse_arma)
    return mape_errors, nrmse_errors

In [None]:
test_size = test.shape[0] - 1
train_size = train.shape[0]
n_test = 5
maxiter = 100
target = 'diff_y_h'

mape_errors = {}
nrmse_errors = {}
for q in range(22, 27):
    for d in range(1):
        for p in range(22, 27):
            if (p > 0) or (q > 0):
                params = (p, d, q)
                try:
                    print("Computing ARIMA for p=%d, d=%d, q=%d\n" % (params))
                    mape_error, nrmse_error = evaluate_model(n_test, train, test_size, target, params, maxiter=maxiter)
                    mape_errors[p, d, q] = mape_error
                    nrmse_errors[p, d, q] = nrmse_error
                    print('MAPE: %.2f%%' % (np.mean(mape_error)))
                    print('NRMSE: %.2f%%' % (np.mean(nrmse_error)))
                except ValueError:
                    print("Can't compute for p=%d, d=%d, q=%d" % (params))
                print()

Computing ARIMA for p=22, d=0, q=22

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
Loading...


  if issubdtype(paramsdtype, float):


Can't compute for p=22, d=0, q=22

Computing ARIMA for p=23, d=0, q=22

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
Loading...
Can't compute for p=23, d=0, q=22

Computing ARIMA for p=24, d=0, q=22

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
Loading...
Can't compute for p=24, d=0, q=22

Computing ARIMA for p=25, d=0, q=22

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
Can't compute for p=25, d=0, q=22

Computing ARIMA for p=26, d=0, q=22

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
Can't compute for p=26, d=0, q=22

Computing ARIMA for p=22, d=0, q=23

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567


  if issubdtype(paramsdtype, float):
  elif issubdtype(paramsdtype, complex):


         Current function value: 4.756995
         Iterations: 100
         Function evaluations: 108
         Gradient evaluations: 108




Can't compute for p=22, d=0, q=23

Computing ARIMA for p=23, d=0, q=23

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
         Current function value: 4.709322
         Iterations: 100
         Function evaluations: 107
         Gradient evaluations: 107




Can't compute for p=23, d=0, q=23

Computing ARIMA for p=24, d=0, q=23

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
         Current function value: 4.709157
         Iterations: 100
         Function evaluations: 105
         Gradient evaluations: 105




Can't compute for p=24, d=0, q=23

Computing ARIMA for p=25, d=0, q=23

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
Can't compute for p=25, d=0, q=23

Computing ARIMA for p=26, d=0, q=23

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
Can't compute for p=26, d=0, q=23

Computing ARIMA for p=22, d=0, q=24

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
         Current function value: 4.756586
         Iterations: 100
         Function evaluations: 107
         Gradient evaluations: 107


  return np.sqrt(np.diag(-inv(hess)))
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Can't compute for p=22, d=0, q=24

Computing ARIMA for p=23, d=0, q=24

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
Can't compute for p=23, d=0, q=24

Computing ARIMA for p=24, d=0, q=24

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
Loading...
Can't compute for p=24, d=0, q=24

Computing ARIMA for p=25, d=0, q=24

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
         Current function value: 4.701789
         Iterations: 100
         Function evaluations: 104
         Gradient evaluations: 104




Can't compute for p=25, d=0, q=24

Computing ARIMA for p=26, d=0, q=24

ADF Statistic: -17.374665
p-value: 0.000000
Critical Values:
	1%: -3.431
	5%: -2.862
	10%: -2.567
