In [27]:
import pandas as pd
import numpy as np
from nsepy import get_history as gh
import datetime as dt
import yfinance as yf

In [28]:
dataset=pd.read_csv("Tatacoffee13_21.csv", parse_dates=['Date'], index_col='Date')
dataset

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-01-01,1410.60,1427.90,1408.30,1415.10
2013-01-02,1421.00,1626.60,1416.15,1607.40
2013-01-03,1632.55,1673.90,1613.05,1626.20
2013-01-04,1627.75,1627.75,1574.60,1579.05
2013-01-07,1580.00,1639.50,1565.50,1595.65
...,...,...,...,...
2021-12-22,202.90,207.80,201.35,205.00
2021-12-23,206.00,206.85,202.05,202.95
2021-12-24,203.90,203.90,199.35,201.00
2021-12-27,200.00,222.00,196.00,218.35


## Normalization

In [29]:
from sklearn.preprocessing import MinMaxScaler
Ms = MinMaxScaler()
data = Ms.fit_transform(dataset)
print("Len:", data.shape)

Len: (2225, 4)


## New DataFrame after applying MinMax normalization 

In [30]:
data=pd.DataFrame(data,columns=["Open","High","Low","Close"])

In [31]:
training_size = round(len(data) * 0.80)
print(training_size)
x_train=data[:training_size]
x_test=data[training_size:]
print("x_train Length:", x_train.shape)
print("x_test Length:", x_test.shape)
y_train=data[:training_size]
y_test=data[training_size:]
print("y_train Length:", y_train.shape)
print("y_test Length:", y_test.shape)

1780
x_train Length: (1780, 4)
x_test Length: (445, 4)
y_train Length: (1780, 4)
y_test Length: (445, 4)


In [32]:
import warnings
warnings.filterwarnings("ignore")

## Create dictionary with 5 keys

In [33]:
performance={"Model":[],"RMSE":[],"MaPe":[],"Lag":[],"Test":[]}

## Create list OHLC

In [34]:
listt=["Close","High","Open","Low"]
print(listt)

['Close', 'High', 'Open', 'Low']


In [35]:
datasetTwo=data[listt]
test_obs = 100

## Spilit Train and Test

In [36]:
train =datasetTwo[:-test_obs]
test = datasetTwo[-test_obs:]

## Create a Model

In [None]:
# AIC (Akaike Information Criterion):
# BIC (Bayesian Information Criterion): 
from statsmodels.tsa.api import VAR
for i in range(1, 11):
        model = VAR(train)
        model_fit = model.fit(i)
        print('Order =', i)
        print('AIC: ', model_fit.aic)
        print('BIC: ', model_fit.bic)
        print()

## Selecting the best one based on AIC

In [46]:
x = model.select_order(maxlags=12)
order=x.selected_orders["aic"]
result = model.fit(order)

In [39]:
order

np.int64(11)

## Prediction and saved result to csv files

In [40]:
lagged_Values = train.values[-order:]
pred = result.forecast(y=lagged_Values,steps=100) 
preds=pd.DataFrame(pred,columns=listt)
preds.to_csv("varforecasted_{}.csv".format(test_obs))

In [41]:
lagged_Values

array([[0.08737803, 0.08625982, 0.08803696, 0.08779383],
       [0.08443833, 0.08555378, 0.0875999 , 0.08448976],
       [0.09357018, 0.09276768, 0.08903596, 0.08977627],
       [0.09941831, 0.09798625, 0.09805819, 0.09660468],
       [0.10389042, 0.10277505, 0.09965035, 0.1010101 ],
       [0.11627471, 0.11714145, 0.10620629, 0.10705183],
       [0.10364023, 0.1202112 , 0.11831918, 0.10088423],
       [0.10138854, 0.10366528, 0.10133616, 0.09953114],
       [0.10201401, 0.10277505, 0.10121129, 0.10207999],
       [0.10151364, 0.10105599, 0.10280345, 0.10207999],
       [0.09841756, 0.09985879, 0.10180445, 0.09880739]])

In [42]:
preds

Unnamed: 0,Close,High,Open,Low
0,0.097534,0.098006,0.099270,0.097856
1,0.095685,0.095652,0.096348,0.094995
2,0.094177,0.095693,0.096198,0.092455
3,0.095152,0.095120,0.094630,0.094319
4,0.095111,0.094974,0.094697,0.094379
...,...,...,...,...
95,0.089419,0.089085,0.089534,0.089313
96,0.089313,0.088978,0.089427,0.089206
97,0.089206,0.088872,0.089320,0.089100
98,0.089100,0.088766,0.089214,0.088995


## Evaluation Metrics

In [43]:
from sklearn.metrics import mean_squared_error
rmse= round(mean_squared_error(test,pred,squared=False))
from sklearn.metrics import mean_absolute_percentage_error
mape=mean_absolute_percentage_error(test,pred)

## Append the Values to the Dictionary performance

In [44]:
performance["Model"].append(listt)
performance["RMSE"].append(rmse)
performance["MaPe"].append(mape)
performance["Lag"].append(order)
performance["Test"].append(test_obs)
perf=pd.DataFrame(performance)

In [45]:
perf

Unnamed: 0,Model,RMSE,MaPe,Lag,Test
0,"[Close, High, Open, Low]",0,0.07746,11,100
