In [1]:
import pandas as pd
import datetime as dt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.ar_model import AutoReg
from nsepy import get_history as gh
import datetime as dt
from statsmodels.tsa.api import VARMAX
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

In [2]:
# Fetching data
#historical stock data for AXISBANK
start = dt.datetime(2021, 7, 1)
end = dt.datetime(2022, 1, 27)
symbol = 'AXISBANK.NS'
stk_data = yf.download(symbol, start=start, end=end)

[*********************100%%**********************]  1 of 1 completed


In [3]:
# Selecting relevant columns for analysis
stk_data = stk_data[["Open", "High", "Low", "Close"]]

In [4]:
stk_data

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-07-01,750.000000,754.400024,744.500000,746.450012
2021-07-02,747.599976,752.849976,744.000000,750.650024
2021-07-05,757.099976,762.000000,753.400024,760.349976
2021-07-06,757.000000,767.000000,755.200012,758.349976
2021-07-07,755.900024,764.000000,752.250000,762.900024
...,...,...,...,...
2022-01-19,725.200012,731.950012,718.700012,729.599976
2022-01-20,731.049988,737.150024,722.150024,728.900024
2022-01-21,722.000000,725.250000,708.049988,712.500000
2022-01-24,715.000000,716.400024,688.200012,704.599976


In [5]:
stk_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 143 entries, 2021-07-01 to 2022-01-25
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    143 non-null    float64
 1   High    143 non-null    float64
 2   Low     143 non-null    float64
 3   Close   143 non-null    float64
dtypes: float64(4)
memory usage: 5.6 KB


In [6]:
# Scaling the data to the range [0, 1]
Ms = MinMaxScaler()
data1 = Ms.fit_transform(stk_data)
print("Len:", data1.shape)

Len: (143, 4)


In [7]:
# Converting scaled data back to DataFrame for easier manipulation
data1 = pd.DataFrame(data1, columns=["Open", "High", "Low", "Close"])

In [8]:
# Splitting the data into training and testing sets (80% train, 20% test)
training_size = round(len(data1) * 0.80)
print(training_size)
X_train = data1[:training_size]
X_test = data1[training_size:]
print("X_train length:", X_train.shape)
print("X_test length:", X_test.shape)
y_train = data1[:training_size]
y_test = data1[training_size:]
print("y_train length:", y_train.shape)
print("y_test length:", y_test.shape)

114
X_train length: (114, 4)
X_test length: (29, 4)
y_train length: (114, 4)
y_test length: (29, 4)


In [9]:
import warnings
warnings.filterwarnings("ignore")

In [10]:
# Dictionary to store performance metrics
performance = {"Model": [], "RMSE": [], "MaPe": [], "Lag": [], "Test": []}

## VARMA: Combines both past values (VAR) and past errors (VMA) of the variables for forecasting.

In [11]:
def cominbation(dataset, listt):
    print(listt)
    datasetTwo = dataset[listt]
    test_obs = 28
    train = datasetTwo[:-test_obs]
    test = datasetTwo[-test_obs:]
    best_aic = float('inf')
    best_order = None
    best_model = None
    
    # Iterate through possible AR and MA orders to find the best model
    for i in range(1,5):  # Check VARMA(1,1) to VARMA(10,10)
        for j in range(1,):
            try:
                model = VARMAX(train, order=(i, j))  # VARMA model with AR order=i and MA order=j
                results = model.fit(disp=False)
                
                print('AR Order =', i)
                print('MA Order =', j)
                print('AIC: ', results.aic)
                print('BIC: ', results.bic)
                print()
                
                # Track the best model based on AIC
                if results.aic < best_aic:
                    best_aic = results.aic
                    best_order = (i, j)
                    best_model = results
            
            except Exception as e:
                print(f"Error for order ({i}, {j}):", e)
    
    # Forecast using the best model
    pred = best_model.forecast(steps=test_obs)
    preds = pd.DataFrame(pred, columns=listt)
    preds.to_csv("varmaforecasted_{}.csv".format(test_obs))
    
    # Evaluate performance
    rmse = round(mean_squared_error(test, pred, squared=False))
    mape = mean_absolute_percentage_error(test, pred)
    
    # Performance metrics
    performance = {"Model": [], "RMSE": [], "MaPe": [], "Lag": [], "Test": []}
    performance["Model"].append(listt)
    performance["RMSE"].append(rmse)
    performance["MaPe"].append(mape)
    performance["Lag"].append(best_order)
    performance["Test"].append(test_obs)
    
    perf = pd.DataFrame(performance)
    return perf, best_model, pred


In [12]:
listt = ["Close", "High", "Open", "Low"]

In [13]:
perf, result, pred = cominbation(data1, listt)

['Close', 'High', 'Open', 'Low']
AR Order = 1
MA Order = 0
AIC:  -1901.5893375342425
BIC:  -1819.241373683345

AR Order = 2
MA Order = 0
AIC:  -1918.7183229343543
BIC:  -1792.451445029645

AR Order = 3
MA Order = 0
AIC:  -1902.2288997568244
BIC:  -1732.0431077983028

AR Order = 4
MA Order = 0
AIC:  -1883.234662516965
BIC:  -1669.1299565046315



In [16]:
data1

Unnamed: 0,Open,High,Low,Close
0,0.508982,0.441549,0.543815,0.491495
1,0.497006,0.433854,0.540997,0.513145
2,0.544411,0.479275,0.593970,0.563144
3,0.543912,0.504095,0.604114,0.552835
4,0.538423,0.489203,0.587489,0.576289
...,...,...,...,...
138,0.385230,0.330107,0.398422,0.404639
139,0.414421,0.355920,0.417864,0.401031
140,0.369261,0.296848,0.338405,0.316495
141,0.334331,0.252916,0.226543,0.275773


In [17]:
perf

Unnamed: 0,Model,RMSE,MaPe,Lag,Test
0,"[Close, High, Open, Low]",0,1.256465,"(2, 0)",28
