# Optimización de Parámetros - ARIMA (uno)

In [None]:
## Get current work directory to load all the custom modules
from pathlib import Path
import sys
BASE_DIR = Path().absolute().resolve(strict=True).parent.parent.parent
if str(BASE_DIR) not in sys.path:
    sys.path.append(str(BASE_DIR))

In [None]:
DATA_PATH = os.path.join(BASE_DIR, 'data', 'autos')
print(DATA_PATH)

In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

In [None]:
import statsmodels.api as sm
# from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import r2_score
from matplotlib import pyplot as plt
import optuna
import datetime

In [None]:

from helpers.general import  print_forecasting_results
from sklearn.model_selection import cross_val_score
from modulos.arima.gruas.general import format_timeseries, show_results_r2, arima_forecasting, total_forecasting, show_optimizer_results

In [None]:
data = pd.read_csv(os.path.join(DATA_PATH, 'producto.csv'), parse_dates=['Periodo'])
data.head()

In [None]:
data['Periodo'][0]

In [None]:
# Pivotando las tablas y llenando de ceros las ventas que no existen.
df_time = format_timeseries(data)
df_time.tail()

## Optimizando un solo Producto

## Baseline ARIMA

In [None]:
df_time.info()

In [None]:
from sklearn.base import BaseEstimator, RegressorMixin
import statsmodels.api as sm
class SMWrapper(BaseEstimator, RegressorMixin):
    """ A universal sklearn-style wrapper for statsmodels regressors """
    def __init__(self, model_class, fit_intercept=True):
        self.model_class = model_class
        self.fit_intercept = fit_intercept
    def fit(self, X, y):
        if self.fit_intercept:
            X = sm.add_constant(X)
        self.model_ = self.model_class(y, X)
        self.results_ = self.model_.fit()
        return self
    def predict(self, X):
        if self.fit_intercept:
            X = sm.add_constant(X)
        return self.results_.predict(X)

In [None]:
   
idArticulo = 4827

result = total_forecasting(df_time[[idArticulo]])
def arima_forecasting(data, ar=2, ii=1, ma=2):
    """
    Forecasting using ARIMA model, 
    return model.fit()
    """
    df = data.copy()
    df.index = pd.DatetimeIndex(df.index).to_period('M')
    model = ARIMA(df, order=(ar, ii, ma))
    results = model.fit()
    return results

# Optimización por [OPTUNA](https://github.com/optuna/optuna/blob/40b097650d312913989b5825aa9c9adf4b15c396/docs/source/index.rst)

In [None]:
# You can increase iteration number.
iteration = 10

optuna.logging.disable_default_handler() # not display log
#optuna.logging.enable_default_handler() # display log

In [None]:

%%time

def objective(trial):
    r_min = 0
    r_max = 6
    ar = trial.suggest_int('ar', r_min, r_max)
    ii = trial.suggest_int('ii', r_min, r_max)
    ma = trial.suggest_int('ma', r_min, r_max)
    idArticulo = 4827
    pred = total_forecasting(df_time[[idArticulo]], ar, ii, ma)
    score = r2_score(df_time[idArticulo], pred.apply(lambda x: round(x,0)) )
    return score
SEED=5050

#study = optuna.create_study(direction='maximize')
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=SEED))
study.optimize(objective, n_trials=iteration)

In [None]:
from modulos.LR.gruas.generals import make_lags, make_timeserie_arima, make_timeserie, cross_validation_ts_mape_r2, split_data_train, cross_validation_ts_mape_r2_ARIMA

In [None]:
mape, score_r2 = cross_validation_ts_mape_r2_ARIMA(
        ARIMA, order=(2, 3, 3), ts=df_time[[idArticulo]], test_size=.2)

In [None]:
pd.DataFrame(df_time[[idArticulo]].iloc[-1])

In [None]:
study.best_value

In [None]:
study.best_params

In [None]:
show_optimizer_results([trial.value for trial in study.trials])

In [None]:
idArticulo = 4827
# {'ar': 7, 'ii': 1, 'ma': 10} 
result = arima_forecasting(df_time[[idArticulo]],  **study.best_params)

In [None]:
r2 = show_results_r2(df_time[[idArticulo]], result.fittedvalues. apply(lambda x: round(x,0)),idArticulo)


In [None]:
results = pd.DataFrame()
row = {'idArticulo':idArticulo, 'hyper':study.best_params, 'r2':r2 , 'model': 'ARIMA'}
results = results.append(row, ignore_index=True)
results.head()
results.to_csv(os.path.join(DATA_PATH, 'result', 'arima', 'arima_one.csv'), index=False)