In [1]:
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import warnings

from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.statespace.varmax import VARMAX
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

In [2]:
start=dt.datetime(2025,1,1)
end=dt.datetime(2025,7,15)
stock_data=yf.download('WIPRO.NS',start=start,end=end)

  stock_data=yf.download('WIPRO.NS',start=start,end=end)
[*********************100%***********************]  1 of 1 completed


In [3]:
stock_data

Price,Close,High,Low,Open,Volume
Ticker,WIPRO.NS,WIPRO.NS,WIPRO.NS,WIPRO.NS,WIPRO.NS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-01-01,288.769073,291.701977,287.470934,288.480605,7878201
2025-01-02,292.134705,292.759769,285.740057,286.557424,8800435
2025-01-03,283.143738,292.134694,282.711005,291.653893,12535720
2025-01-06,282.999481,286.365088,279.393473,283.672614,9173687
2025-01-07,282.759064,286.076602,280.114658,282.999464,7381943
...,...,...,...,...,...
2025-07-08,264.448395,265.085850,262.192774,262.634076,5519016
2025-07-09,262.634094,264.203236,259.741018,262.830248,11564433
2025-07-10,259.937134,262.388908,258.073791,262.388908,6911031
2025-07-11,253.317337,256.994999,252.777959,255.965266,13109857


In [4]:
stock_data.columns=stock_data.columns.droplevel(1)

start_date=dt.datetime(2025,1,1)
end_date=dt.datetime(2025,5,13)
all_date=pd.date_range(start_date,end_date,freq='d')
stock_data.index=all_date
stock_data

Price,Close,High,Low,Open,Volume
2025-01-01,288.769073,291.701977,287.470934,288.480605,7878201
2025-01-02,292.134705,292.759769,285.740057,286.557424,8800435
2025-01-03,283.143738,292.134694,282.711005,291.653893,12535720
2025-01-04,282.999481,286.365088,279.393473,283.672614,9173687
2025-01-05,282.759064,286.076602,280.114658,282.999464,7381943
...,...,...,...,...,...
2025-05-09,264.448395,265.085850,262.192774,262.634076,5519016
2025-05-10,262.634094,264.203236,259.741018,262.830248,11564433
2025-05-11,259.937134,262.388908,258.073791,262.388908,6911031
2025-05-12,253.317337,256.994999,252.777959,255.965266,13109857


#Stationarity Test

In [5]:
dftest=adfuller(stock_data['Close'],autolag='AIC')
dfoutput=pd.Series(dftest[0:4],index=('Test Statistic','p-value','#lags used','#observations used'))
for key,value in dftest[4].items():
    dfoutput['Critical value (%s)' %key]=value
dfoutput

print('Condition for stationarity using ADF:')
print('If Test Statistic < Critical value and p-value < 0.05 --> Reject Null Hypothesis --> Stationary data')
print('Else --> Accept Null Hypothesis --> Non-Stationary data')

if (
    (dfoutput['Test Statistic'] < dfoutput ['Critical value (1%)'] or 
     dfoutput['Test Statistic'] < dfoutput ['Critical value (5%)'] or 
     dfoutput['Test Statistic'] < dfoutput ['Critical value (10%)']) and 
     dfoutput['p-value'] < 0.05):
    print('\nCondition: Test Statistic < Critical value and p-value < 0.05 is true. Stationary as per ADF')
else:
    print('\nCondition: Test Statistic < Critical value and p-value < 0.05 is not true. Non- Stationary as per ADF')

Condition for stationarity using ADF:
If Test Statistic < Critical value and p-value < 0.05 --> Reject Null Hypothesis --> Stationary data
Else --> Accept Null Hypothesis --> Non-Stationary data

Condition: Test Statistic < Critical value and p-value < 0.05 is not true. Non- Stationary as per ADF


#Data Preprocessing

In [6]:
ms=MinMaxScaler()
prestock_data=ms.fit_transform(stock_data)

In [7]:
prestock_data

array([[0.71023605, 0.69510054, 0.74132445, 0.70944231, 0.1435652 ],
       [0.75065809, 0.70767162, 0.72123427, 0.68739612, 0.16037116],
       [0.6426744 , 0.70024305, 0.68607626, 0.74581899, 0.22843961],
       [0.64094185, 0.63167557, 0.64756988, 0.65432639, 0.16717297],
       [0.63805438, 0.62824712, 0.65594063, 0.6466098 , 0.13452185],
       [0.67847605, 0.63910372, 0.63361804, 0.63227966, 0.14465101],
       [0.61784354, 0.63396113, 0.65649893, 0.67637269, 0.14514152],
       [0.71312315, 0.69395751, 0.6894242 , 0.64660962, 0.3554516 ],
       [0.6126466 , 0.64824597, 0.64422142, 0.69786807, 0.17387107],
       [0.62419539, 0.59453429, 0.60292438, 0.61905143, 0.13977241],
       [0.62188556, 0.59624888, 0.63808271, 0.63227956, 0.08496871],
       [0.56875981, 0.61681907, 0.60459901, 0.66424691, 0.18853005],
       [0.49831069, 0.5173959 , 0.53539905, 0.57495863, 0.17054912],
       [0.70965877, 0.71852769, 0.71900198, 0.73148881, 0.71762848],
       [0.68771538, 0.72367017, 0.

In [8]:
prestock_data.shape

(133, 5)

In [10]:
stock_table=pd.DataFrame(prestock_data,columns=['Close','High','Low','Open','Volume'])
stock_table

Unnamed: 0,Close,High,Low,Open,Volume
0,0.710236,0.695101,0.741324,0.709442,0.143565
1,0.750658,0.707672,0.721234,0.687396,0.160371
2,0.642674,0.700243,0.686076,0.745819,0.228440
3,0.640942,0.631676,0.647570,0.654326,0.167173
4,0.638054,0.628247,0.655941,0.646610,0.134522
...,...,...,...,...,...
128,0.418139,0.378788,0.447922,0.413153,0.100574
129,0.396349,0.368298,0.419465,0.415402,0.210740
130,0.363957,0.346736,0.400114,0.410343,0.125940
131,0.284452,0.282634,0.338645,0.336706,0.238902


In [11]:
warnings.filterwarnings('ignore')

#Order selection & Model Creation

In [12]:
performance={'Model':[],'RMSE':[],'MaPe':[],'Lag':[],'Test':[]}
list=['Close','High','Low','Open']

In [13]:
dataset=stock_table[list]
dataset

Unnamed: 0,Close,High,Low,Open
0,0.710236,0.695101,0.741324,0.709442
1,0.750658,0.707672,0.721234,0.687396
2,0.642674,0.700243,0.686076,0.745819
3,0.640942,0.631676,0.647570,0.654326
4,0.638054,0.628247,0.655941,0.646610
...,...,...,...,...
128,0.418139,0.378788,0.447922,0.413153
129,0.396349,0.368298,0.419465,0.415402
130,0.363957,0.346736,0.400114,0.410343
131,0.284452,0.282634,0.338645,0.336706


In [14]:
test_obs=30
train_data=dataset[:-test_obs]
test_data=dataset[-test_obs:]

In [15]:
train_data

Unnamed: 0,Close,High,Low,Open
0,0.710236,0.695101,0.741324,0.709442
1,0.750658,0.707672,0.721234,0.687396
2,0.642674,0.700243,0.686076,0.745819
3,0.640942,0.631676,0.647570,0.654326
4,0.638054,0.628247,0.655941,0.646610
...,...,...,...,...
98,0.166078,0.153730,0.227547,0.224171
99,0.156419,0.143823,0.215822,0.208432
100,0.188339,0.162471,0.228799,0.209106
101,0.182803,0.155128,0.233580,0.213041


In [16]:
train_data.shape

(103, 4)

In [17]:
test_data.shape

(30, 4)

In [19]:
combinations=[['Close','High'],['Close','High','Low'],['Close','High','Low','Open']]
order=[(1,1),(1,2),(2,1),(2,2)]
exog=stock_table.loc[train_data.index,['Volume']]
exog_forecast=stock_table.loc[test_data.index,['Volume']]

for columns in combinations:
    best_aic=float("inf")
    best_results=None
    best_order=None
    print('\nFor the combination of ',columns)
    for pq in order:
        model=VARMAX(endog=train_data,exog=exog,order=pq)
        results=model.fit()
        print('Order=',pq)
        print('AIC=',results.aic)
        print('BIC=',results.bic)
        
        if results.aic<best_aic:
            best_aic=results.aic
            best_results=results
            best_order=pq
    #Forecasting the future values equivalent to test set
    prediction=best_results.forecast(steps=30,exog=exog_forecast)
    pred_table=pd.DataFrame(prediction,columns=columns)
    print('\nThe corresponding predicted values: ')
    print(pred_table)
    #Calculating the test metrics
    rmse=round(mean_squared_error(test_data[columns],prediction[columns],squared=False))
    mape=mean_absolute_percentage_error(test_data[columns],prediction[columns])
    #Appending the corresponding values to the performance table
    performance['Model'].append(columns)
    performance['RMSE'].append(rmse)
    performance['MaPe'].append(mape)
    performance['Lag'].append(best_order)
    performance['Test'].append(test_obs)
    perform_table=pd.DataFrame(performance)
print('\nThe performance table:')
print(perform_table)


For the combination of  ['Close', 'High']
Order= (1, 1)
AIC= -1677.5347075978416
BIC= -1545.7982581863598
Order= (1, 2)
AIC= -1661.771066166375
BIC= -1487.878952943219
Order= (2, 1)
AIC= -1654.2980840315838
BIC= -1480.4059708084278
Order= (2, 2)
AIC= -1632.5115826686183
BIC= -1416.463805633788

The corresponding predicted values: 
        Close      High
103  0.166268  0.155560
104  0.175316  0.160557
105  0.182937  0.168451
106  0.191593  0.176042
107  0.196020  0.184202
108  0.202135  0.189987
109  0.204672  0.196415
110  0.212408  0.200652
111  0.221754  0.207415
112  0.226462  0.215317
113  0.234732  0.220793
114  0.242372  0.228059
115  0.242973  0.235209
116  0.249550  0.238342
117  0.258945  0.244241
118  0.264488  0.251906
119  0.270942  0.257689
120  0.274687  0.263893
121  0.279989  0.268547
122  0.286308  0.273830
123  0.293514  0.279650
124  0.295865  0.286086
125  0.300650  0.289853
126  0.306782  0.294631
127  0.312975  0.300149
128  0.317480  0.305809
129  0.318628  0.3