In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from pmdarima import auto_arima
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

In [2]:
file_path = os.path.join('..', 'aggregated_data.csv')
data = pd.read_csv(file_path)

# inflation adjustment
base_cpi = data['CPI_US'].iloc[-1]
data['AVG_PRICE_AUCTION'] = data['AVG_PRICE_AUCTION'] * (base_cpi/data['CPI_US'])
data['CORN_FUTURES_TYPICAL_PRICE'] = data['CORN_FUTURES_TYPICAL_PRICE'] * (base_cpi/data['CPI_US'])
data['FEEDER_CATTLE_FUTURES_TYPICAL_PRICE'] = data['FEEDER_CATTLE_FUTURES_TYPICAL_PRICE'] * (base_cpi/data['CPI_US'])
data['US_MXN_RATES'] = data['US_MXN_RATES'] * (base_cpi/data['CPI_US'])
data['DIESEL_RETAIL_PRICE'] = data['DIESEL_RETAIL_PRICE'] * (base_cpi/data['CPI_US'])
data['DATE'] = pd.to_datetime(data['DATE'])

data = data.sort_values('DATE', ignore_index=True).set_index('DATE').drop( # drop columns not used for training
    ['avg_price_steers_ML1', 
     'avg_price_steers_ML1_2', 
     'avg_price_heifers_ML1', 
     'avg_price_heifers_ML1_2', 
     'NATIONAL_AUCTION_SALES', 
     'NATIONAL_TOTAL_SALES', 
     'TEXAS_AUCTION_SALES', 
     'CORN_FUTURES_TYPICAL_PRICE', 
     'DIESEL_RETAIL_PRICE', 
     'ALFAFA_HAY_PPI', 
     'CORN_PPI',
    #  'US_MXN_RATES',
     ], axis=1)


In [3]:
X = data.drop(['AVG_PRICE_AUCTION'], axis=1)
y = data['AVG_PRICE_AUCTION'].values.reshape(-1,1)

train_size = int(len(data.index) * 0.9)

X_train = X.iloc[:train_size]
X_test = X.iloc[train_size:]
y_train = y[:train_size]
y_test = y[train_size:]

# scale exogenous variables based on training data
scaler_X = StandardScaler().fit(X_train)

# transform exogenous variables 
X_train_scaled = scaler_X.transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

In [4]:
# create auto_arima model 
model = auto_arima(y_train,
                  X=X_train_scaled,
                  method='lbfgs',
                  solver='lbfgs',
                  information_criterion='bic',
                  start_p=1,
                  start_q=1,
                  max_p=3,
                  max_q=3,
                  seasonal=False,
                  trace=True,
                  error_action='ignore',
                  suppress_warnings=True,
                  stepwise=True)

Performing stepwise search to minimize bic
 ARIMA(1,0,1)(0,0,0)[0]             : BIC=2235.318, Time=0.17 sec
 ARIMA(0,0,0)(0,0,0)[0]             : BIC=4443.145, Time=0.03 sec
 ARIMA(1,0,0)(0,0,0)[0]             : BIC=inf, Time=0.06 sec
 ARIMA(0,0,1)(0,0,0)[0]             : BIC=inf, Time=0.07 sec
 ARIMA(2,0,1)(0,0,0)[0]             : BIC=2241.567, Time=0.21 sec
 ARIMA(1,0,2)(0,0,0)[0]             : BIC=2241.349, Time=0.24 sec
 ARIMA(0,0,2)(0,0,0)[0]             : BIC=inf, Time=0.28 sec
 ARIMA(2,0,0)(0,0,0)[0]             : BIC=inf, Time=0.14 sec
 ARIMA(2,0,2)(0,0,0)[0]             : BIC=inf, Time=0.26 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : BIC=2299.327, Time=0.22 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 1.679 seconds
