## In this notebook the VAR model for the multivariate forecasting is tested. The input to the model is the closing price time series of all 12 banks which are correlated with each other and the output is the forecast for 5 days ahead.



In [66]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from scipy import stats
from statsmodels.graphics.gofplots import qqplot
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
import numpy as np
from statsmodels.tsa.stattools import grangercausalitytests
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
plt.rcParams["figure.figsize"] = (15,7)
from scipy.stats import shapiro
from statsmodels.tsa.api import VAR
import seaborn as sns
import os
import tensorflow as tf
import random
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, GRU
import keras
import warnings
warnings.filterwarnings("ignore")
from pandas_datareader.data import DataReader
import pandas as pd
from pandas import DataFrame
from datetime import datetime,timedelta
import matplotlib.pyplot as plt

In [67]:
tickers=['BNS', 'RY', 'TD','BMO','CM','C','JPM','IBN', 'WTBA','BAC','AXP','PNC']
m_df=pd.DataFrame()
for item in tickers:
    df = DataReader(item, data_source='yahoo', start='2011-01-01', end=datetime.now())
    m_df[item]=df["Close"]

In [68]:
X_train=m_df[:-5]
X_test=m_df[-5:]
print(X_train.shape)
print(X_test.shape)
X_diff=X_train.diff().dropna()
print(X_diff.shape)

(2671, 12)
(5, 12)
(2670, 12)


In [69]:
mod = VAR(X_diff)
res = mod.fit(maxlags=30, ic='aic')
print(res.summary())
y_fitted = res.fittedvalues

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Thu, 19, Aug, 2021
Time:                     18:37:46
--------------------------------------------------------------------
No. of Equations:         12.0000    BIC:                   -18.6373
Nobs:                     2667.00    HQIC:                  -19.2629
Log likelihood:          -18807.6    FPE:                3.02127e-09
AIC:                     -19.6176    Det(Omega_mle):     2.56086e-09
--------------------------------------------------------------------
Results for equation BNS
             coefficient       std. error           t-stat            prob
--------------------------------------------------------------------------
const           0.002550         0.013088            0.195           0.846
L1.BNS         -0.058776         0.042691           -1.377           0.169
L1.RY          -0.043122         0.036926           -1.168           0.243
L1.TD      

In [70]:
lag_order = res.k_ar
# Input data for forecasting which is 30 days data input
input_data = X_diff.values[-lag_order:]
print(input_data.shape)

(3, 12)


In [71]:
pred = res.forecast(y=input_data, steps=5)
pred = pd.DataFrame(pred, index=X_test.index, columns=X_diff.columns + '_pred')

In [80]:


def sliding_data(df,index):
    df=df[:-index]
    return df

def test(pred,true):
    rmse = np.sqrt(np.mean(((pred - true) ** 2)))
    mae = mean_absolute_error(pred, true)
    mape= np.mean(mae/true) *100
    return rmse, mae, mape
def invert_transformation(X_train, pred):
    forecast = pred.copy()
    columns = X_train.columns
    for col in columns:
        forecast[str(col)+'_pred'] = X_train[col].iloc[-1] + forecast[str(col)+'_pred'].cumsum()
    return forecast
output = invert_transformation(X_train, pred)

In [82]:
tickers=['BNS', 'RY', 'TD','BMO','CM','C','JPM','IBN', 'WTBA','BAC','AXP','PNC']
m_df=pd.DataFrame()
for item in tickers:
    df = DataReader(item, data_source='yahoo', start='2011-01-01', end=datetime.now())
    m_df[item]=df["Close"]
df=m_df

# Fitting and Testing the VAR model
## The testing is performed using the sliding window testing 10 times for 5 values i.e 50 samples. Thus the model is tested on the 10 weeks data (50/5=10)

In [83]:
mape_err=[]
for idx in range(5,50,5):  
    X_train=df[:-5]
    X_test=df[-5:]
    print(X_train.shape)
    print(X_test.shape)
    X_diff=X_train.diff().dropna()
    print(X_diff.shape)
    mod = VAR(X_diff)
    res = mod.fit(maxlags=30, ic='aic')
    #print(res.summary())
    y_fitted = res.fittedvalues
    lag_order = res.k_ar
    # Input data for forecasting which is 30 days data input
    input_data = X_diff.values[-lag_order:]
    print(input_data.shape)
    pred = res.forecast(y=input_data, steps=5)
    pred = pd.DataFrame(pred, index=X_test.index, columns=X_diff.columns + '_pred')
    output = invert_transformation(X_train, pred)
    mape=[]
    for i,j in zip(output.columns,X_test.columns):
        print("For, ",j)
        combine = pd.concat([output[i], X_test[j]], axis=1)
        combine['MAPE'] = combine.apply(lambda row: abs(row[i]-row[j])/row[j] *100, axis = 1)
        #combine['MAPE'] = pd.Series(["{0:.2f}%".format(val) for val in combine['MAPE']],index = combine.index)
        #combine = combine.reset_index()
        print(combine['MAPE'].mean())
        mape.append(combine['MAPE'].mean())
    mape_err.append(mape)
    df=sliding_data(df,idx)

(2671, 12)
(5, 12)
(2670, 12)
(3, 12)
For,  BNS
1.7086919185427114
For,  RY
1.3605446190711998
For,  TD
1.6546744612086592
For,  BMO
2.1119597568263786
For,  CM
1.4967510812881901
For,  C
4.278958311875523
For,  JPM
3.2466355112483227
For,  IBN
3.000402784994548
For,  WTBA
2.468144361848249
For,  BAC
3.25538311557007
For,  AXP
3.3121603379706555
For,  PNC
3.4794224763994324
(2666, 12)
(5, 12)
(2665, 12)
(3, 12)
For,  BNS
1.895129869742093
For,  RY
1.3173633971411278
For,  TD
1.7877593720021143
For,  BMO
1.2987960643850005
For,  CM
1.1727483948043358
For,  C
3.6986299032018017
For,  JPM
3.6479727082063773
For,  IBN
1.0676079323793877
For,  WTBA
2.059128146279866
For,  BAC
5.1886782899583626
For,  AXP
0.6634146076738059
For,  PNC
2.612048890432569
(2656, 12)
(5, 12)
(2655, 12)
(3, 12)
For,  BNS
1.1272255725917464
For,  RY
0.6345534016600769
For,  TD
1.048223341502839
For,  BMO
1.6208431196929147
For,  CM
1.5186961608578584
For,  C
1.3236810625072033
For,  JPM
0.4919512096194317
For,  IBN

In [87]:
mape_df=pd.DataFrame(mape_err).mean()
mape_df.index=df.columns

## The values below are the Mean Absolute Percentage Error values obtained for all the banks when tested using the sliding window testing 10 times for 5 values for the Statistical Model

In [95]:
pd.DataFrame(mape_df,columns=["Close"]).T

Unnamed: 0,BNS,RY,TD,BMO,CM,C,JPM,IBN,WTBA,BAC,AXP,PNC
Close,1.475972,1.264431,1.168521,1.259598,1.148736,2.671106,1.895053,2.054463,2.846624,2.617496,2.632643,2.486249


## Below is the average MAPE of this model which is around 2%

In [91]:
mape_df.mean()

1.960074424165546