In [19]:
import pandas as pd
import datetime as dt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.ar_model import AutoReg

In [20]:
from nsepy import get_history as gh
import datetime as dt

In [21]:
# Fetching data
#historical stock data for AXISBANK
start = dt.datetime(2021, 7, 1)
end = dt.datetime(2022, 1, 27)
symbol = 'AXISBANK.NS'
stk_data = yf.download(symbol, start=start, end=end)

[*********************100%%**********************]  1 of 1 completed


In [22]:
# Selecting relevant columns for analysis
stk_data = stk_data[["Open", "High", "Low", "Close"]]

In [23]:
stk_data

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-07-01,750.000000,754.400024,744.500000,746.450012
2021-07-02,747.599976,752.849976,744.000000,750.650024
2021-07-05,757.099976,762.000000,753.400024,760.349976
2021-07-06,757.000000,767.000000,755.200012,758.349976
2021-07-07,755.900024,764.000000,752.250000,762.900024
...,...,...,...,...
2022-01-19,725.200012,731.950012,718.700012,729.599976
2022-01-20,731.049988,737.150024,722.150024,728.900024
2022-01-21,722.000000,725.250000,708.049988,712.500000
2022-01-24,715.000000,716.400024,688.200012,704.599976


In [24]:
# Scaling the data to the range [0, 1]
Ms = MinMaxScaler()
data1 = Ms.fit_transform(stk_data)
print("Len:", data1.shape)

Len: (143, 4)


In [25]:
# Converting scaled data back to DataFrame for easier manipulation
data1 = pd.DataFrame(data1, columns=["Open", "High", "Low", "Close"])

In [26]:
# Splitting the data into training and testing sets (80% train, 20% test)
training_size = round(len(data1) * 0.80)
print(training_size)
X_train = data1[:training_size]
X_test = data1[training_size:]
print("X_train length:", X_train.shape)
print("X_test length:", X_test.shape)
y_train = data1[:training_size]
y_test = data1[training_size:]
print("y_train length:", y_train.shape)
print("y_test length:", y_test.shape)

114
X_train length: (114, 4)
X_test length: (29, 4)
y_train length: (114, 4)
y_test length: (29, 4)


In [27]:
import warnings
warnings.filterwarnings("ignore")

In [28]:
# Dictionary to store performance metrics
performance = {"Model": [], "RMSE": [], "MaPe": [], "Lag": [], "Test": []}

## VARMAX: Extends VARMA by including additional exogenous variables to account for external factors affecting the time series

In [29]:
import pandas as pd
from statsmodels.tsa.api import VARMAX
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

def cominbation(dataset, listt):
    print(listt)
    datasetTwo = dataset[listt]
    test_obs = 28
    train = datasetTwo[:-test_obs]
    test = datasetTwo[-test_obs:]
    
    best_aic = float('inf')
    best_order = None
    best_model = None
    
    # Iterate through possible AR and MA orders to find the best model
    for i in range(1, 5):  # Check AR orders from 1 to 4
        for j in range(1, 5):  # Check MA orders from 1 to 4
            try:
                model = VARMAX(train, order=(i, j))  # VARMAX model with AR order=i and MA order=j
                results = model.fit(disp=False)
                
                print('AR Order =', i)
                print('MA Order =', j)
                print('AIC: ', results.aic)
                print('BIC: ', results.bic)
                print()
                
                # Track the best model based on AIC
                if results.aic < best_aic:
                    best_aic = results.aic
                    best_order = (i, j)
                    best_model = results
            
            except Exception as e:
                print(f"Error for order ({i}, {j}):", e)
    
    if best_model is not None:
        # Forecast using the best model
        pred = best_model.forecast(steps=test_obs)
        preds = pd.DataFrame(pred, columns=listt)
        preds.to_csv("varmaxforecasted_{}.csv".format(test_obs))
        
        # Evaluate performance
        rmse = round(mean_squared_error(test, pred, squared=False))
        mape = mean_absolute_percentage_error(test, pred)
        
        # Performance metrics
        performance = {"Model": [], "RMSE": [], "MaPe": [], "Lag": [], "Test": []}
        performance["Model"].append(listt)
        performance["RMSE"].append(rmse)
        performance["MaPe"].append(mape)
        performance["Lag"].append(best_order)
        performance["Test"].append(test_obs)
        
        perf = pd.DataFrame(performance)
        return perf, best_model, pred

In [30]:
listt=["Close","High","Open","Low"]

In [31]:
perf,result,pred=cominbation(data1,listt)

['Close', 'High', 'Open', 'Low']
AR Order = 1
MA Order = 1
AIC:  -1876.9422103816091
BIC:  -1750.6753324768997

AR Order = 1
MA Order = 2
AIC:  -1883.7730522291959
BIC:  -1713.5872602706743

AR Order = 1
MA Order = 3
AIC:  -1862.9961806468016
BIC:  -1648.891474634468

AR Order = 1
MA Order = 4
AIC:  -1825.9387752296711
BIC:  -1567.9151551635257

AR Order = 2
MA Order = 1
AIC:  -1888.8109726667565
BIC:  -1718.625180708235

AR Order = 2
MA Order = 2
AIC:  -1860.4496750720912
BIC:  -1646.3449690597577

AR Order = 2
MA Order = 3
AIC:  -1839.0285217237217
BIC:  -1581.0049016575763

AR Order = 2
MA Order = 4
AIC:  -1818.6467549862632
BIC:  -1516.7042208663056

AR Order = 3
MA Order = 1
AIC:  -1870.1663163142175
BIC:  -1656.061610301884

AR Order = 3
MA Order = 2
AIC:  -1840.1248426306145
BIC:  -1582.101222564469

AR Order = 3
MA Order = 3
AIC:  -1811.550982087627
BIC:  -1509.6084479676697

AR Order = 3
MA Order = 4
AIC:  -1791.3103918412994
BIC:  -1445.4489436675299

AR Order = 4
MA Order = 

In [32]:
data1

Unnamed: 0,Open,High,Low,Close
0,0.508982,0.441549,0.543815,0.491495
1,0.497006,0.433854,0.540997,0.513145
2,0.544411,0.479275,0.593970,0.563144
3,0.543912,0.504095,0.604114,0.552835
4,0.538423,0.489203,0.587489,0.576289
...,...,...,...,...
138,0.385230,0.330107,0.398422,0.404639
139,0.414421,0.355920,0.417864,0.401031
140,0.369261,0.296848,0.338405,0.316495
141,0.334331,0.252916,0.226543,0.275773


In [33]:
perf

Unnamed: 0,Model,RMSE,MaPe,Lag,Test
0,"[Close, High, Open, Low]",0,1.246002,"(2, 1)",28
