In [37]:
import pandas as pd
from pandas import Series as Series
import numpy as np
import matplotlib.pyplot as plt  

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller, acf, pacf, arma_order_select_ic
import pmdarima as pm
from pmdarima.arima.utils import ndiffs

import sklearn.metrics as sme
import math

yPredS = sarima_model.predict(n_periods = len(test_data_sarima))

rmse = math.sqrt(sme.mean_squared_error(test_data_sarima, yPredS ))
print("The root mean squared error is {}.".format(rmse))



In [38]:
def data_split(data):
    data_copy = data.copy()
    data_copy = data_copy.sort_values('Date')
    data_copy.set_index('Date',inplace=True)
    data_copy.dropna(inplace=True)
    
    num_data = data_copy.shape[0]
    split_val = int(num_data*0.8)                       # 80:20
    train_data = data_copy[:split_val]
    test_data = data_copy[split_val:]

    X_train = train_data[['Open','High','Low','Volume','Turnover']]
    Y_train = train_data[['Close']]

    X_test = test_data[['Open','High','Low','Volume','Turnover']]
    Y_test = test_data[['Close']]

    return X_train,Y_train,X_test,Y_test
    

In [39]:
data = pd.read_csv('../datasets/inidices/NIFTY 50.csv')
X_train,Y_train,X_test,Y_test = data_split(data)


In [40]:
def test_stationarity(timeseries):
    pval=-1
    #Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print (dfoutput)
    return dfoutput["p-value"]

In [48]:
def SMXModel(data):
    X_train,Y_train,X_test,Y_test = data_split(data)

    pval = test_stationarity(Y_train)
    print('\n\n')

    if pval<=0.5:
        print("Going without any de-trending/differencing")
        
        sxmodel = pm.auto_arima(Y_train,exogenous = X_train,
                           start_p=1, start_q=0,
                           test='adf',
                           max_p=2, max_q=2, m=16,
                           start_P=0,
                           d=None, D=1, trace=True,
                           error_action='ignore',  
                           suppress_warnings=True, 
                           stepwise=True,n_jobs = -1,
                           random = True)

        sxmodel.summary()

        

    else:
        num_diff = ndiffs(Y_train.values, test='adf')
        print("Needs de-trending/differencing :",num_diff)

        sxmodel = pm.auto_arima(Y_train,exogenous = X_train,
                           start_p=1, start_q=0,
                           test='adf',
                           max_p=2, max_q=2, m=16,
                           start_P=0,
                           d=1, D=1, trace=True,
                           error_action='ignore',  
                           suppress_warnings=True, 
                           stepwise=True,n_jobs = -1,
                           random = True)

        sxmodel.summary()

    yPredS = sxmodel.predict(n_periods = len(Y_test))

    rmse = math.sqrt(sme.mean_squared_error(Y_test, yPredS ))
    print("The root mean squared error is {}.".format(rmse))
        
    return rmse,sxmodel

In [49]:
SMXModel(data)

Results of Dickey-Fuller Test:
Test Statistic                  -19.224828
p-value                           0.000000
#Lags Used                       19.000000
Number of Observations Used    4857.000000
Critical Value (1%)              -3.431697
Critical Value (5%)              -2.862135
Critical Value (10%)             -2.567087
dtype: float64



Going without any de-trending/differencing
Performing stepwise search to minimize aic




KeyboardInterrupt: 