In [1]:
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import warnings

from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.statespace.varmax import VARMAX
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

In [2]:
start=dt.datetime(2025,1,1)
end=dt.datetime(2025,7,15)
stock_data=yf.download('WIPRO.NS',start=start,end=end)

  stock_data=yf.download('WIPRO.NS',start=start,end=end)
[*********************100%***********************]  1 of 1 completed


In [3]:
stock_data.columns=stock_data.columns.droplevel(1)
stock_data.drop('Volume',axis=1,inplace=True)
stock_data

Price,Close,High,Low,Open
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-01-01,288.769073,291.701977,287.470934,288.480605
2025-01-02,292.134705,292.759769,285.740057,286.557424
2025-01-03,283.143738,292.134694,282.711005,291.653893
2025-01-06,282.999481,286.365088,279.393473,283.672614
2025-01-07,282.759064,286.076602,280.114658,282.999464
...,...,...,...,...
2025-07-08,264.448395,265.085850,262.192774,262.634076
2025-07-09,262.634094,264.203236,259.741018,262.830248
2025-07-10,259.937134,262.388908,258.073791,262.388908
2025-07-11,253.317337,256.994999,252.777959,255.965266


In [4]:
start_date=dt.datetime(2025,1,1)
end_date=dt.datetime(2025,5,13)
all_date=pd.date_range(start_date,end_date,freq='d')
stock_data.index=all_date
stock_data

Price,Close,High,Low,Open
2025-01-01,288.769073,291.701977,287.470934,288.480605
2025-01-02,292.134705,292.759769,285.740057,286.557424
2025-01-03,283.143738,292.134694,282.711005,291.653893
2025-01-04,282.999481,286.365088,279.393473,283.672614
2025-01-05,282.759064,286.076602,280.114658,282.999464
...,...,...,...,...
2025-05-09,264.448395,265.085850,262.192774,262.634076
2025-05-10,262.634094,264.203236,259.741018,262.830248
2025-05-11,259.937134,262.388908,258.073791,262.388908
2025-05-12,253.317337,256.994999,252.777959,255.965266


#Stationarity Test

In [5]:
dftest=adfuller(stock_data['Close'],autolag='AIC')
dfoutput=pd.Series(dftest[0:4],index=('Test Statistic','p-value','#lags used','#observations used'))
for key,value in dftest[4].items():
    dfoutput['Critical value (%s)' %key]=value
dfoutput

print('Condition for stationarity using ADF:')
print('If Test Statistic < Critical value and p-value < 0.05 --> Reject Null Hypothesis --> Stationary data')
print('Else --> Accept Null Hypothesis --> Non-Stationary data')

if (
    (dfoutput['Test Statistic'] < dfoutput ['Critical value (1%)'] or 
     dfoutput['Test Statistic'] < dfoutput ['Critical value (5%)'] or 
     dfoutput['Test Statistic'] < dfoutput ['Critical value (10%)']) and 
     dfoutput['p-value'] < 0.05):
    print('\nCondition: Test Statistic < Critical value and p-value < 0.05 is true. Stationary as per ADF')
else:
    print('\nCondition: Test Statistic < Critical value and p-value < 0.05 is not true. Non- Stationary as per ADF')

Condition for stationarity using ADF:
If Test Statistic < Critical value and p-value < 0.05 --> Reject Null Hypothesis --> Stationary data
Else --> Accept Null Hypothesis --> Non-Stationary data

Condition: Test Statistic < Critical value and p-value < 0.05 is not true. Non- Stationary as per ADF


#Data Preprocessing

In [6]:
ms=MinMaxScaler()
prestock_data=ms.fit_transform(stock_data)

In [7]:
prestock_data

array([[0.71023605, 0.69510054, 0.74132445, 0.70944231],
       [0.75065809, 0.70767162, 0.72123427, 0.68739612],
       [0.6426744 , 0.70024305, 0.68607626, 0.74581899],
       [0.64094185, 0.63167557, 0.64756988, 0.65432639],
       [0.63805438, 0.62824712, 0.65594063, 0.6466098 ],
       [0.67847605, 0.63910372, 0.63361804, 0.63227966],
       [0.61784354, 0.63396113, 0.65649893, 0.67637269],
       [0.71312315, 0.69395751, 0.6894242 , 0.64660962],
       [0.6126466 , 0.64824597, 0.64422142, 0.69786807],
       [0.62419539, 0.59453429, 0.60292438, 0.61905143],
       [0.62188556, 0.59624888, 0.63808271, 0.63227956],
       [0.56875981, 0.61681907, 0.60459901, 0.66424691],
       [0.49831069, 0.5173959 , 0.53539905, 0.57495863],
       [0.70965877, 0.71852769, 0.71900198, 0.73148881],
       [0.68771538, 0.72367017, 0.72848856, 0.73589782],
       [0.81186825, 0.77109634, 0.74578903, 0.70227742],
       [0.91119026, 0.93794406, 0.87191097, 0.83180029],
       [0.93890786, 0.91394525,

In [8]:
prestock_data.shape

(133, 4)

In [9]:
stock_table=pd.DataFrame(prestock_data,columns=['Close','High','Low','Open'])
stock_table

Unnamed: 0,Close,High,Low,Open
0,0.710236,0.695101,0.741324,0.709442
1,0.750658,0.707672,0.721234,0.687396
2,0.642674,0.700243,0.686076,0.745819
3,0.640942,0.631676,0.647570,0.654326
4,0.638054,0.628247,0.655941,0.646610
...,...,...,...,...
128,0.418139,0.378788,0.447922,0.413153
129,0.396349,0.368298,0.419465,0.415402
130,0.363957,0.346736,0.400114,0.410343
131,0.284452,0.282634,0.338645,0.336706


In [10]:
warnings.filterwarnings('ignore')

#Order selection & Model Creation

In [11]:
performance={'Model':[],'RMSE':[],'MaPe':[],'Lag':[],'Test':[]}
list=['Close','High','Low','Open']

In [12]:
dataset=stock_table[list]
dataset

Unnamed: 0,Close,High,Low,Open
0,0.710236,0.695101,0.741324,0.709442
1,0.750658,0.707672,0.721234,0.687396
2,0.642674,0.700243,0.686076,0.745819
3,0.640942,0.631676,0.647570,0.654326
4,0.638054,0.628247,0.655941,0.646610
...,...,...,...,...
128,0.418139,0.378788,0.447922,0.413153
129,0.396349,0.368298,0.419465,0.415402
130,0.363957,0.346736,0.400114,0.410343
131,0.284452,0.282634,0.338645,0.336706


In [13]:
test_obs=30
train_data=dataset[:-test_obs]
test_data=dataset[-test_obs:]

In [14]:
train_data

Unnamed: 0,Close,High,Low,Open
0,0.710236,0.695101,0.741324,0.709442
1,0.750658,0.707672,0.721234,0.687396
2,0.642674,0.700243,0.686076,0.745819
3,0.640942,0.631676,0.647570,0.654326
4,0.638054,0.628247,0.655941,0.646610
...,...,...,...,...
98,0.166078,0.153730,0.227547,0.224171
99,0.156419,0.143823,0.215822,0.208432
100,0.188339,0.162471,0.228799,0.209106
101,0.182803,0.155128,0.233580,0.213041


In [15]:
train_data.shape

(103, 4)

In [16]:
test_data.shape

(30, 4)

In [17]:
combinations=[['Close','High'],['Close','High','Low'],['Close','High','Low','Open']]
order=[(1,1),(1,2),(2,1),(2,2)]

for columns in combinations:
    best_aic=float("inf")
    best_results=None
    best_order=None
    print('\nFor the combination of ',columns)
    for pq in order:
        model=VARMAX(train_data,order=pq)
        results=model.fit()
        print('Order=',pq)
        print('AIC=',results.aic)
        print('BIC=',results.bic)
        
        if results.aic<best_aic:
            best_aic=results.aic
            best_results=results
            best_order=pq
    #Forecasting the future values equivalent to test set
    prediction=best_results.forecast(steps=30)
    pred_table=pd.DataFrame(prediction,columns=columns)
    print('\nThe corresponding predicted values: ')
    print(pred_table)
    #Calculating the test metrics
    rmse=round(mean_squared_error(test_data[columns],prediction[columns],squared=False))
    mape=mean_absolute_percentage_error(test_data[columns],prediction[columns])
    #Appending the corresponding values to the performance table
    performance['Model'].append(columns)
    performance['RMSE'].append(rmse)
    performance['MaPe'].append(mape)
    performance['Lag'].append(best_order)
    performance['Test'].append(test_obs)
    perform_table=pd.DataFrame(performance)
print('\nThe performance table:')
print(perform_table)


For the combination of  ['Close', 'High']
Order= (1, 1)
AIC= -1680.6386473138664
BIC= -1559.4411138553032
Order= (1, 2)
AIC= -1665.609300173446
BIC= -1502.2561029032086
Order= (2, 1)
AIC= -1656.8596863989612
BIC= -1493.5064891287238
Order= (2, 2)
AIC= -1634.6445618959865
BIC= -1429.1357008140749

The corresponding predicted values: 
        Close      High
103  0.151317  0.143642
104  0.156317  0.145571
105  0.160979  0.150136
106  0.165625  0.154842
107  0.170206  0.159507
108  0.174718  0.164106
109  0.179161  0.168634
110  0.183535  0.173093
111  0.187841  0.177483
112  0.192081  0.181806
113  0.196256  0.186062
114  0.200366  0.190252
115  0.204413  0.194377
116  0.208398  0.198439
117  0.212321  0.202438
118  0.216184  0.206376
119  0.219987  0.210253
120  0.223731  0.214070
121  0.227418  0.217828
122  0.231048  0.221529
123  0.234622  0.225172
124  0.238141  0.228759
125  0.241605  0.232291
126  0.245017  0.235768
127  0.248375  0.239192
128  0.251682  0.242563
129  0.254938  0