In [1]:
import pandas as pd
from pandas import Series as Series
import numpy as np
import matplotlib.pyplot as plt  

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller, acf, pacf, arma_order_select_ic
import pmdarima as pm
from pmdarima.arima.utils import ndiffs

import sklearn.metrics as sme
import math



In [29]:
def data_split(data):
    data_copy = data.copy()
                    # 80:20
                
    train_data = data_copy[:-365]
    test_data = data_copy[-365:]

    X_train = train_data[['Open','High','Low','Volume','Turnover']]
    Y_train = train_data[['Close']]

    X_test = test_data[['Open','High','Low','Volume','Turnover']]
    Y_test = test_data[['Close']]

    return X_train,Y_train,X_test,Y_test
    

In [21]:
data = pd.read_csv('../datasets/inidices/NIFTY 50.csv')
X_train,Y_train,X_test,Y_test = data_split(data)


In [22]:
def preprocess(data):
    start_date = "2012-03-01"
    end_date = "2021-01-07"
    df=data.loc[start_date:end_date]

    df = df[["Open", "High", "Low", "Close", "Volume", "Turnover"]]
    df.dropna()
    return df

In [5]:
def readData(path):
    data = pd.read_csv(path , )
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace = True)
    data = data.loc['2012-03-01', '2021-01-07']
    data.dropna(inplace = True)
    return data

In [23]:
data = readData("../datasets/inidices/NIFTY 50.csv")
data = preprocess(data)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Turnover
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-03-01,4675.80,4773.10,4675.80,4765.30,146621115,5.021290e+10
2012-04-01,4774.95,4782.85,4728.85,4749.65,165938849,5.661160e+10
2012-05-01,4749.00,4779.80,4730.15,4749.95,177862936,5.873790e+10
2012-06-01,4724.15,4794.90,4686.85,4754.10,176057282,5.234690e+10
2012-07-01,4755.60,4759.40,4743.05,4746.90,18783880,4.148800e+09
...,...,...,...,...,...,...
2021-01-02,13758.60,14336.35,13661.75,14281.20,870529495,5.184020e+11
2021-01-03,14702.50,14806.80,14638.55,14761.55,507302977,3.003060e+11
2021-01-04,14798.40,14883.20,14692.45,14867.35,430028476,2.958520e+11
2021-01-06,15629.65,15660.75,15528.30,15574.85,409562322,2.796410e+11


In [24]:
def test_stationarity(timeseries):
    pval=-1
    #Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print (dfoutput)
    return dfoutput["p-value"]

In [32]:
def SMXModel(data):
    X_train,Y_train,X_test,Y_test = data_split(data)

    pval = test_stationarity(Y_train)
    print('\n\n')

    if pval<=0.5:
        print("Going without any de-trending/differencing")
        
        sxmodel = pm.auto_arima(Y_train,exogenous = X_train,
                           start_p=1, start_q=0,
                           test='adf',
                           max_p=2, max_q=2, m=16,
                           start_P=0,
                           d=None, D=1, trace=True,
                           error_action='ignore',  
                           suppress_warnings=True, 
                           stepwise=True,n_jobs = -1,
                           random = True)

        sxmodel.summary()

        

    else:
        num_diff = ndiffs(Y_train.values, test='adf')
        print("Needs de-trending/differencing :",num_diff)

        sxmodel = pm.auto_arima(Y_train,exogenous = X_train,
                           start_p=1, start_q=0,
                           test='adf',
                           max_p=2, max_q=2, m=16,
                           start_P=0,
                           d=1, D=1, trace=True,
                           error_action='ignore',  
                           suppress_warnings=True, 
                           stepwise=True,n_jobs = -1,
                           random = True)

        sxmodel.summary()

    print("\n\n HERE")
    yPredS = sxmodel.predict(n_periods = len(Y_test), exogenous = X_test)

    rmse = math.sqrt(sme.mean_squared_error(Y_test, yPredS ))
    print("The root mean squared error is {}.".format(rmse))
        
    return rmse,sxmodel

In [33]:
SMXModel(data)

Results of Dickey-Fuller Test:
Test Statistic                   -1.020242
p-value                           0.745810
#Lags Used                        1.000000
Number of Observations Used    1832.000000
Critical Value (1%)              -3.433925
Critical Value (5%)              -2.863119
Critical Value (10%)             -2.567611
dtype: float64



Needs de-trending/differencing : 1
Performing stepwise search to minimize aic




 ARIMA(1,1,0)(0,1,1)[16]             : AIC=18433.351, Time=8.95 sec
 ARIMA(0,1,0)(0,1,0)[16]             : AIC=31428.365, Time=1.65 sec
 ARIMA(1,1,0)(1,1,0)[16]             : AIC=18767.116, Time=9.92 sec
 ARIMA(0,1,1)(0,1,1)[16]             : AIC=18326.738, Time=10.51 sec
 ARIMA(0,1,1)(0,1,0)[16]             : AIC=19099.073, Time=2.38 sec
 ARIMA(0,1,1)(1,1,1)[16]             : AIC=18409.361, Time=12.68 sec
 ARIMA(0,1,1)(0,1,2)[16]             : AIC=inf, Time=28.51 sec
 ARIMA(0,1,1)(1,1,0)[16]             : AIC=18651.913, Time=10.76 sec
 ARIMA(0,1,1)(1,1,2)[16]             : AIC=inf, Time=33.75 sec
 ARIMA(0,1,0)(0,1,1)[16]             : AIC=18652.324, Time=9.37 sec
 ARIMA(1,1,1)(0,1,1)[16]             : AIC=18329.101, Time=12.57 sec
 ARIMA(0,1,2)(0,1,1)[16]             : AIC=18324.479, Time=12.30 sec
 ARIMA(0,1,2)(0,1,0)[16]             : AIC=19101.800, Time=2.77 sec
 ARIMA(0,1,2)(1,1,1)[16]             : AIC=18407.436, Time=12.96 sec
 ARIMA(0,1,2)(0,1,2)[16]             : AIC=inf, Time

In [30]:
Xtr,Ytr,Xtest,YtTest = data_split(data)
Xtr

Unnamed: 0_level_0,Open,High,Low,Volume,Turnover
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-03-01,4675.80,4773.10,4675.80,146621115,5.021290e+10
2012-04-01,4774.95,4782.85,4728.85,165938849,5.661160e+10
2012-05-01,4749.00,4779.80,4730.15,177862936,5.873790e+10
2012-06-01,4724.15,4794.90,4686.85,176057282,5.234690e+10
2012-07-01,4755.60,4759.40,4743.05,18783880,4.148800e+09
...,...,...,...,...,...
2019-07-16,11596.65,11670.05,11573.95,480156167,1.596790e+11
2019-07-17,11670.75,11706.65,11651.15,464827734,1.525050e+11
2019-07-18,11675.60,11677.15,11582.40,498258158,1.668400e+11
2019-07-19,11627.95,11640.35,11399.30,446049198,1.732640e+11


In [31]:
Xtest

Unnamed: 0_level_0,Open,High,Low,Volume,Turnover
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-07-23,11372.25,11398.15,11302.80,458931191,1.919930e+11
2019-07-24,11322.45,11359.75,11229.80,413202832,1.835310e+11
2019-07-25,11290.40,11361.40,11239.35,553676897,2.432920e+11
2019-07-26,11247.45,11307.60,11210.05,522670420,2.035040e+11
2019-07-29,11307.50,11310.95,11152.40,482862376,1.870590e+11
...,...,...,...,...,...
2021-01-02,13758.60,14336.35,13661.75,870529495,5.184020e+11
2021-01-03,14702.50,14806.80,14638.55,507302977,3.003060e+11
2021-01-04,14798.40,14883.20,14692.45,430028476,2.958520e+11
2021-01-06,15629.65,15660.75,15528.30,409562322,2.796410e+11


In [19]:
2199-1759

440