In [1]:
import pandas as pd
import datetime as dt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.ar_model import AutoReg

In [2]:
from nsepy import get_history as gh
import datetime as dt

In [3]:
# Fetching data
#historical stock data for AXISBANK
start = dt.datetime(2021, 7, 1)
end = dt.datetime(2022, 1, 27)
symbol = 'AXISBANK.NS'
stk_data = yf.download(symbol, start=start, end=end)

[*********************100%%**********************]  1 of 1 completed


In [4]:
#Selecting Features ,The data only the columns: Open, High, Low, and Close.
stk_data=stk_data[["Open","High","Low","Close"]]

In [5]:
stk_data

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-07-01,750.000000,754.400024,744.500000,746.450012
2021-07-02,747.599976,752.849976,744.000000,750.650024
2021-07-05,757.099976,762.000000,753.400024,760.349976
2021-07-06,757.000000,767.000000,755.200012,758.349976
2021-07-07,755.900024,764.000000,752.250000,762.900024
...,...,...,...,...
2022-01-19,725.200012,731.950012,718.700012,729.599976
2022-01-20,731.049988,737.150024,722.150024,728.900024
2022-01-21,722.000000,725.250000,708.049988,712.500000
2022-01-24,715.000000,716.400024,688.200012,704.599976


In [6]:
#The stock prices are normalized using MinMaxScaler to scale the values between 0 and 1, which is often necessary for time series models.
from sklearn.preprocessing import MinMaxScaler
Ms = MinMaxScaler()
data1= Ms.fit_transform(stk_data)
print("Len:",data1.shape)

Len: (143, 4)


## Splitting Data

In [7]:
data1=pd.DataFrame(data1,columns=["Open","High","Low","Close"])

In [8]:
#Training and Testing data is split into training (80%) and testing (20%) sets to evaluate the model's performance.
#The same data is used as both features (X) and targets (y) because VAR models forecast multiple time series at once.

In [9]:
training_size = round(len(data1 ) * 0.80)
print(training_size)
X_train=data1[:training_size]
X_test=data1[training_size:]
print("X_train length:",X_train.shape)
print("X_test length:",X_test.shape)
y_train=data1[:training_size]
y_test=data1[training_size:]
print("y_train length:",y_train.shape)
print("y_test length:",y_test.shape)

114
X_train length: (114, 4)
X_test length: (29, 4)
y_train length: (114, 4)
y_test length: (29, 4)


## VAR Model Setup and Forecasting

In [10]:
import warnings
warnings.filterwarnings("ignore")

In [11]:
performance={"Model":[],"RMSE":[],"MaPe":[],"Lag":[],"Test":[]}

#"cominbation" function 
#pass in the time series data and the columns to forecast (listt)
#VAR Model Fitting,The model is fit using different lags (up to 12), and the best lag order is selected based on the AIC (Akaike Information Criterion)
#Forecasting model then forecasts the next 28 time steps using the best lag order
#Performance Metrics,The forecast is compared to actual values, and two key performance metrics are calculated:
## RMSE (Root Mean Squared Error): Measures the average error magnitude.
## MAPE (Mean Absolute Percentage Error): Measures the percentage error on average.

## VAR: Models relationships between multiple time series variables, considering past values of all variables.

In [18]:
def cominbation(dataset,listt):
    print(listt)
    datasetTwo=dataset[listt]
    test_obs = 28
    train =datasetTwo[:-test_obs]
    test = datasetTwo[-test_obs:]
    from statsmodels.tsa.api import VAR
    for i in [1,2,3,4,5,6,7,8,9,10]:
        model = VAR(train)
        results = model.fit(i)
        print('Order =', i)
        print('AIC: ', results.aic)
        print('BIC: ', results.bic)
        print()
    x = model.select_order(maxlags=12)
    order=x.selected_orders["aic"]
    result = model.fit(order)
    #result.summary()
    lagged_Values = train.values[-order:]
    pred = result.forecast(y=lagged_Values,steps=28) 
    preds=pd.DataFrame(pred,columns=listt)
    preds.to_csv("varforecasted_{}.csv".format(test_obs))
    from sklearn.metrics import mean_squared_error
    rmse= round(mean_squared_error(test,pred,squared=False))
    from sklearn.metrics import mean_absolute_percentage_error
    mape=mean_absolute_percentage_error(test,pred)
    performance["Model"].append(listt)
    performance["RMSE"].append(rmse)
    performance["MaPe"].append(mape)
    performance["Lag"].append(order)
    performance["Test"].append(test_obs)
    perf=pd.DataFrame(performance)
    return perf,result,pred

In [19]:
listt=["Close","High","Open","Low"]
#The function returns a DataFrame showing the performance of the model (perf), the fitted model (result), and the predictions (pred).

In [20]:
perf,result,pred=cominbation(data1,listt)

['Close', 'High', 'Open', 'Low']
Order = 1
AIC:  -28.077293879121832
BIC:  -27.597259063614025

Order = 2
AIC:  -28.205346860941308
BIC:  -27.33644454701525

Order = 3
AIC:  -28.039955787578787
BIC:  -26.777795597334634

Order = 4
AIC:  -27.85286676152596
BIC:  -26.19297438594723

Order = 5
AIC:  -27.683290249236755
BIC:  -25.621105242631636

Order = 6
AIC:  -27.571034319145937
BIC:  -25.10190782168801

Order = 7
AIC:  -27.472116161730515
BIC:  -24.59130854741191

Order = 8
AIC:  -27.593825360225576
BIC:  -24.29650380743145

Order = 9
AIC:  -27.458850227430602
BIC:  -23.740086209236395

Order = 10
AIC:  -27.32959530545205
BIC:  -23.184361996634586



In [15]:
data1

Unnamed: 0,Open,High,Low,Close
0,0.508982,0.441549,0.543815,0.491495
1,0.497006,0.433854,0.540997,0.513145
2,0.544411,0.479275,0.593970,0.563144
3,0.543912,0.504095,0.604114,0.552835
4,0.538423,0.489203,0.587489,0.576289
...,...,...,...,...
138,0.385230,0.330107,0.398422,0.404639
139,0.414421,0.355920,0.417864,0.401031
140,0.369261,0.296848,0.338405,0.316495
141,0.334331,0.252916,0.226543,0.275773


In [16]:
#forecasting multiple interdependent time series, such as different stock price components.

In [17]:
perf

Unnamed: 0,Model,RMSE,MaPe,Lag,Test
0,"[Close, High, Open, Low]",0,1.172438,2,28
