In [1]:
import pandas as pd
# Importando datos
train = pd.read_csv('data/train_cleaned_v2.csv')
train.drop(['train_idx', 'valid'], axis=1, inplace=True)
# Importo test
test = pd.read_csv('data/testing_set.csv')
test.drop('test_idx', axis=1, inplace=True)

# PMDARIMA

In [2]:
# pip install pmdarima
from pmdarima.arima import auto_arima

In [17]:
# Creo el modelo
model = auto_arima(train['Close'], trace=True, error_action='ignore', suppress_warnings=True)

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-24389.508, Time=1.28 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-24397.508, Time=0.44 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-24396.447, Time=1.30 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-24396.452, Time=0.68 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-24399.399, Time=0.18 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-24394.415, Time=2.03 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 5.916 seconds


In [21]:
# Hago la predicción sobre el conjunto de testeo
forecast = model.predict(n_periods=1500)

In [19]:
# Creo el dataframe de predicciones
forecast = pd.DataFrame(forecast, index=test.index, columns=['Close'])
forecast

Unnamed: 0,Close
0,
1,
2,
3,
4,
...,...
1495,
1496,
1497,
1498,


In [23]:
forecast.unique()

array([1.17069])

# ARIMA Kaggle

In [5]:
import itertools
import statsmodels.api as sm

In [3]:
p = d = q = range(0, 3)
pdq = list(itertools.product(p, d, q))

In [4]:
seasonal_pdq = [(0, 0, 0, 0)]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[0]))
print('SARIMAX: {} x {}'.format(pdq[3], seasonal_pdq[0]))

Examples of parameter combinations for Seasonal ARIMA...
SARIMAX: (0, 0, 1) x (0, 0, 0, 0)
SARIMAX: (0, 1, 0) x (0, 0, 0, 0)


In [6]:
best_score = 0
best_params = None
best_seasonal_params = None

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            
            mod = sm.tsa.statespace.SARIMAX(train['Close'],
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity = False,
                                            enforce_invertibility = False)

            results = mod.fit()

            print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
            
            # guardamos el mejor resultado
            if best_score == 0:
                best_score = results.aic
                best_params = param
                best_seasonal_params = param_seasonal
                
            elif abs(results.aic) < abs(best_score):
                best_score = results.aic
                best_params = param
                best_seasonal_params = param_seasonal
            
        # alguna combinación de parámetros en SARIMAX, no son válidos
        # y los vamos a cazar con un except
        except:
            continue

ARIMA(0, 0, 0)x(0, 0, 0, 0)12 - AIC:11788.159376129855
ARIMA(0, 0, 1)x(0, 0, 0, 0)12 - AIC:6952.29939890713
ARIMA(0, 0, 2)x(0, 0, 0, 0)12 - AIC:2393.6951855067123
ARIMA(0, 1, 0)x(0, 0, 0, 0)12 - AIC:-24391.55516605069
ARIMA(0, 1, 1)x(0, 0, 0, 0)12 - AIC:-24382.679235505682
ARIMA(0, 1, 2)x(0, 0, 0, 0)12 - AIC:-24372.741072234283
ARIMA(0, 2, 0)x(0, 0, 0, 0)12 - AIC:-22012.49511915057
ARIMA(0, 2, 1)x(0, 0, 0, 0)12 - AIC:-24363.43643944328




ARIMA(0, 2, 2)x(0, 0, 0, 0)12 - AIC:-24355.34534344596
ARIMA(1, 0, 0)x(0, 0, 0, 0)12 - AIC:-24397.635628444743
ARIMA(1, 0, 1)x(0, 0, 0, 0)12 - AIC:-24388.72687449026
ARIMA(1, 0, 2)x(0, 0, 0, 0)12 - AIC:-24378.91455370395
ARIMA(1, 1, 0)x(0, 0, 0, 0)12 - AIC:-24390.49765518552
ARIMA(1, 1, 1)x(0, 0, 0, 0)12 - AIC:-24380.641355932796
ARIMA(1, 1, 2)x(0, 0, 0, 0)12 - AIC:-24370.782190266204




ARIMA(1, 2, 0)x(0, 0, 0, 0)12 - AIC:-22974.024776241997
ARIMA(1, 2, 1)x(0, 0, 0, 0)12 - AIC:-24361.994854616652
ARIMA(1, 2, 2)x(0, 0, 0, 0)12 - AIC:-24353.51236284585




ARIMA(2, 0, 0)x(0, 0, 0, 0)12 - AIC:-24388.726289230748
ARIMA(2, 0, 1)x(0, 0, 0, 0)12 - AIC:-24386.604925344123
ARIMA(2, 0, 2)x(0, 0, 0, 0)12 - AIC:-24376.644784443117
ARIMA(2, 1, 0)x(0, 0, 0, 0)12 - AIC:-24380.70622425469
ARIMA(2, 1, 1)x(0, 0, 0, 0)12 - AIC:-21746.10748596854
ARIMA(2, 1, 2)x(0, 0, 0, 0)12 - AIC:-24366.30690229743
ARIMA(2, 2, 0)x(0, 0, 0, 0)12 - AIC:-23334.108559019656
ARIMA(2, 2, 1)x(0, 0, 0, 0)12 - AIC:-24359.38641159521
ARIMA(2, 2, 2)x(0, 0, 0, 0)12 - AIC:-24351.905777840482


In [7]:
mod = sm.tsa.statespace.SARIMAX(train['Close'],
                                order = best_params,
                                seasonal_order = param_seasonal,
                                enforce_stationarity = False,
                                enforce_invertibility = False)

results = mod.fit()

In [8]:
len(test)

1500

In [11]:
pred_uc = results.get_forecast(steps = len(test))
pred_ci = pred_uc.conf_int()

In [15]:
y_pred = pred_ci.iloc[:, 0]
y_pred

3508    0.231561
3509   -1.111609
3510   -1.575124
3511   -1.575124
3512   -1.575124
          ...   
5003   -1.575124
5004   -1.575124
5005   -1.575124
5006   -1.575124
5007   -1.575124
Name: lower Close, Length: 1500, dtype: float64