In [1]:
import pandas as pd
import datetime as dt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.ar_model import AutoReg

In [2]:
from nsepy import get_history as gh
import datetime as dt

In [3]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

In [4]:
# Fetching data
#historical stock data for AXISBANK
start = dt.datetime(2021, 7, 1)
end = dt.datetime(2022, 1, 27)
symbol = 'AXISBANK.NS'
stk_data = yf.download(symbol, start=start, end=end)

[*********************100%%**********************]  1 of 1 completed


In [5]:
# Selecting relevant columns for analysis
stk_data = stk_data[["Open", "High", "Low", "Close"]]

In [6]:
stk_data

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-07-01,750.000000,754.400024,744.500000,746.450012
2021-07-02,747.599976,752.849976,744.000000,750.650024
2021-07-05,757.099976,762.000000,753.400024,760.349976
2021-07-06,757.000000,767.000000,755.200012,758.349976
2021-07-07,755.900024,764.000000,752.250000,762.900024
...,...,...,...,...
2022-01-19,725.200012,731.950012,718.700012,729.599976
2022-01-20,731.049988,737.150024,722.150024,728.900024
2022-01-21,722.000000,725.250000,708.049988,712.500000
2022-01-24,715.000000,716.400024,688.200012,704.599976


In [7]:
# Scaling the data to the range [0, 1]
Ms = MinMaxScaler()
data1 = Ms.fit_transform(stk_data)
print("Len:", data1.shape)

Len: (143, 4)


In [8]:
# Converting scaled data back to DataFrame for easier manipulation
data1 = pd.DataFrame(data1, columns=["Open", "High", "Low", "Close"])

In [9]:
# Splitting the data into training and testing sets (80% train, 20% test)
training_size = round(len(data1) * 0.80)
print(training_size)
X_train = data1[:training_size]
X_test = data1[training_size:]
print("X_train length:", X_train.shape)
print("X_test length:", X_test.shape)
y_train = data1[:training_size]
y_test = data1[training_size:]
print("y_train length:", y_train.shape)
print("y_test length:", y_test.shape)

114
X_train length: (114, 4)
X_test length: (29, 4)
y_train length: (114, 4)
y_test length: (29, 4)


In [10]:
import warnings
warnings.filterwarnings("ignore")

In [11]:
# Dictionary to store performance metrics
performance = {"Model": [], "RMSE": [], "MaPe": [], "Lag": [], "Test": []}

## VMA: Focuses on modeling the errors or noise in a time series, which helps to smooth and capture short-term dynamics.

In [17]:
def cominbation(dataset, listt):
    print(listt)
    datasetTwo = dataset[listt]
    test_obs = 28
    train = datasetTwo[:-test_obs]
    test = datasetTwo[-test_obs:]
    
    from statsmodels.tsa.api import VARMAX
    best_aic = float('inf')
    best_order = None
    best_model = None
    
    # Iterate through possible MA orders to find the best model
    for i in range(1, 11):  # Check VMA(1) to VMA(10)
        model = VARMAX(train, order=(0, i))  # VMA model with order=(0, i)
        results = model.fit(disp=False)
        print('Order =', i)
        print('AIC: ', results.aic)
        print('BIC: ', results.bic)
        print()
        
        # Track the best model based on AIC
        if results.aic < best_aic:
            best_aic = results.aic
            best_order = i
            best_model = results
    
    # Forecast using the best model
    lagged_Values = train.values[-best_order:]
    pred = best_model.forecast(steps=28)
    preds = pd.DataFrame(pred, columns=listt)
    preds.to_csv("vmaforecasted_{}.csv".format(test_obs))
    
    # Evaluate performance
    from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
    rmse = round(mean_squared_error(test, pred, squared=False))
    mape = mean_absolute_percentage_error(test, pred)
    
    # Performance metrics
    performance = {"Model": [], "RMSE": [], "MaPe": [], "Lag": [], "Test": []}
    performance["Model"].append(listt)
    performance["RMSE"].append(rmse)
    performance["MaPe"].append(mape)
    performance["Lag"].append(best_order)
    performance["Test"].append(test_obs)
    
    perf = pd.DataFrame(performance)
    return perf, best_model, pred

In [18]:
listt = ["Close", "High", "Open", "Low"]

In [19]:
perf, result, pred = cominbation(data1, listt)

['Close', 'High', 'Open', 'Low']
Order = 1
AIC:  -1482.9937654843438
BIC:  -1400.6458016334464

Order = 2
AIC:  -1520.1659000972327
BIC:  -1393.8990221925233

Order = 3
AIC:  -1052.116481238967
BIC:  -881.9306892804454

Order = 4
AIC:  -1137.8188819796048
BIC:  -923.7141759672713

Order = 5
AIC:  -1048.77652379655
BIC:  -790.7529037304046

Order = 6
AIC:  -1068.8548777924395
BIC:  -766.912343672482

Order = 7
AIC:  -1014.8767372328261
BIC:  -669.0152890590566

Order = 8
AIC:  -955.8768264004907
BIC:  -566.0964641729091

Order = 9
AIC:  -1215.2370017018343
BIC:  -781.5377254204408

Order = 10
AIC:  -1012.1659499241191
BIC:  -534.5477595889136



In [20]:
data1

Unnamed: 0,Open,High,Low,Close
0,0.508982,0.441549,0.543815,0.491495
1,0.497006,0.433854,0.540997,0.513145
2,0.544411,0.479275,0.593970,0.563144
3,0.543912,0.504095,0.604114,0.552835
4,0.538423,0.489203,0.587489,0.576289
...,...,...,...,...
138,0.385230,0.330107,0.398422,0.404639
139,0.414421,0.355920,0.417864,0.401031
140,0.369261,0.296848,0.338405,0.316495
141,0.334331,0.252916,0.226543,0.275773


In [21]:
perf

Unnamed: 0,Model,RMSE,MaPe,Lag,Test
0,"[Close, High, Open, Low]",0,2.022461,2,28
