[Reference](https://medium.com/@ignaciozamanilloherreros/lstms-vs-var-models-5af891325647)

VAR models (vector autoregressive models) are used for multivariate time series.

We will first estimate the Encoder-Decoder Model (this will be our LTSM structure) and then the VAR model.

# Encoder-Decoder LSTM overview (Python)

## Import the necessary libraries


In [20]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from numpy import hstack,array
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Dropout,LSTM,Dense,RepeatVector
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import matplotlib.pyplot as plt

## Transform our series into samples:

In [21]:
def split_sequences(sequences, n_steps_in, n_steps_out):
	X, y = list(), list()
	for i in range(len(sequences)):
		# find the end of this pattern
		end_ix = i + n_steps_in
		out_end_ix = end_ix + n_steps_out
		# check if we are beyond the dataset
		if out_end_ix > len(sequences):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix:out_end_ix, :]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

In [22]:
def plot_graphs(history, string):
  plt.figure(figsize=(11,5))

  plt.plot(history.history[string][50:])
  plt.plot(history.history['val_'+string][50:])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()

In [23]:
# XS, yS = split_sequences(trainS_df, n_steps_in, n_steps_out)
# XS_test,yS_test =split_sequences(testS_df, n_steps_in, n_steps_out)

## Load and plot our data

In [24]:
# df1 = pd.read_csv('SPAIN/Data_for_Python.csv', header=0, infer_datetime_format=True, parse_dates=['DATE'], index_col=['DATE'])

# fig = make_subplots(rows=2, cols=2,subplot_titles=("Unemployment (%)", "GDP (10^) ", "i rate(%)", "Debt"))
                    
# # Top left
# fig.add_trace(
#     go.Scatter(x=df1.index, y=df1["Unem"], name="Unemployment"),
#     row=1, col=1, secondary_y=False)

# # Top right
# fig.add_trace(
#     go.Scatter(x=df1.index, y=df1["GDP"], name="GDP"),
#     row=1, col=2, secondary_y=False,
# )

# # Bottom left
# fig.add_trace(
#     go.Scatter(x=df1.index, y=df1["i_rate"], name="i rate"),
#     row=2, col=1, secondary_y=False,
# )

# # Bottom right
# fig.add_trace(
#     go.Scatter(x=df1.index, y=df1["Debt"], name="Debt"),
#     row=2, col=2, secondary_y=False,
# )
# fig.update_layout(height=600, width=800, title_text="Quarterly Data: Spain")



# fig.show()

## Define the number of steps in & out


In [25]:
n_steps_in, n_steps_out = 10, 8

## Create the samples

In [26]:
# trainS_df,testS_df = df1[0:72], df1[(72-n_steps_in):]

# trainS_df=pd.DataFrame(trainS_df[2:]).to_numpy()
# testS_df=pd.DataFrame(testS_df[2:]).to_numpy()

# XS, yS = split_sequences(trainS_df, n_steps_in, n_steps_out)
# XS_test,yS_test = split_sequences(testS_df, n_steps_in, n_steps_out)

## Load the data of the other countries


In [27]:
# # FRANCE
# df2 = pd.read_csv('France/Data_for_Python.csv', header=0, infer_datetime_format=True, parse_dates=['DATE'], index_col=['DATE'])
# trainF_df = df2[0:72]
# trainF_df=trainF_df.to_numpy()
# XF, yF = split_sequences(trainF_df, n_steps_in, n_steps_out)

# # ITALY
# df3 = pd.read_csv('Italy/Data_for_Python.csv', header=0, infer_datetime_format=True, parse_dates=['DATE'], index_col=['DATE'])
# trainIT_df = df3[0:72]
# trainIT_df=trainIT_df.to_numpy()
# XIT, yIT = split_sequences(trainIT_df, n_steps_in, n_steps_out)

# # GREECE
# df4 = pd.read_csv('Greece/Data_for_Python.csv', header=0, infer_datetime_format=True, parse_dates=['DATE'], index_col=['DATE'])
# trainGR_df = df4[0:68]
# trainGR_df=trainGR_df.to_numpy()
# XGR, yGR = split_sequences(trainGR_df, n_steps_in, n_steps_out)

# # IRELAND
# df5 = pd.read_csv('Ireland/Data_for_Python.csv', header=0, infer_datetime_format=True, parse_dates=['DATE'], index_col=['DATE'])
# trainIR_df = df5[0:68]
# trainIR_df=trainIR_df.to_numpy()
# XIR, yIR = split_sequences(trainIR_df, n_steps_in, n_steps_out)

# # PORTUGAL
# df6 = pd.read_csv('Portugal/Data_for_Python.csv', header=0, infer_datetime_format=True, parse_dates=['DATE'], index_col=['DATE'])
# trainPT_df = df6[0:68]
# trainPT_df=trainPT_df.to_numpy()
# XPT, yPT = split_sequences(trainPT_df, n_steps_in, n_steps_out)

## Scale the data:

In [28]:
# O=np.concatenate((trainS_df,trainF_df,trainIT_df,trainGR_df,trainIR_df,trainPT_df))
# scaler = MinMaxScaler()
# scaler.fit(O)

# #we see an example: FRANCE
# for i in range(len(XF)):
#    XF[i]=scaler.transform(XF[i])
# yF=yF[:,:,2].reshape(len(yF),n_steps_out,1)

#ITALY
# for i in range(len(XIT)):
#    XIT[i]=scaler.transform(XIT[i])
# yIT=yIT[:,:,2].reshape(len(yIT),n_steps_out,1)

# #GREECE
# for i in range(len(XGR)):
#    XGR[i]=scaler.transform(XGR[i])
# yGR=yGR[:,:,2].reshape(len(yGR),n_steps_out,1)

# #IRELAND
# for i in range(len(XIR)):
#    XIR[i]=scaler.transform(XIR[i])
# yIR=yIR[:,:,2].reshape(len(yIR),n_steps_out,1)

# #PORTUGAL
# for i in range(len(XPT)):
#    XPT[i]=scaler.transform(XPT[i])
# yPT=yPT[:,:,2].reshape(len(yPT),n_steps_out,1)

# """   SPAIN: TRAIN & TEST     """
# for i in range(len(XS)):
#    XS[i]=scaler.transform(XS[i])

# for i in range(len(XS_test)):
#    XS_test[i]=scaler.transform(XS_test[i])
   
# yS=yS[:,:,2].reshape(len(yS),n_steps_out,1)
# yS_test=yS_test[:,:,2].reshape(len(yS_test),n_steps_out,1)

In [29]:
# X=np.concatenate((XS,XF,XIT,XGR,XIR,XPT))
# y=np.concatenate((yS,yF,yIT,yGR,yIR,yPT))

## Model creation & hyperparameter tuning:


In [30]:
# tf.random.set_seed(1265)

# o1=tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False)

# n_features = X.shape[2]
# # define model
# model = Sequential()
# model.add(LSTM(12, activation='relu', input_shape=(n_steps_in, n_features)))
# model.add(RepeatVector(n_steps_out))
# model.add(LSTM(12, activation='relu', return_sequences=True))
# model.add(Dropout(0.45))
# model.add(TimeDistributed(Dense(1)))
# model.compile(optimizer=o1, loss='mse', metrics=["mae"])

# model.summary()

# # fit model
# history=model.fit(X, y, epochs=3000, verbose=0,validation_data=(XS_test,yS_test),batch_size=len(X))

In [31]:
# plot_graphs(history, 'loss')

## Prediction

In [32]:
# U=np.zeros((8, 17))
# Rs= [None] * 17

# j=0
# aux1=df1.diff()
# aux2=aux1.diff()
# aux3 = scaler.transform(aux2)
# aux4=scaler.transform(df1)
# i=88
# for i in range(72,89):
#     aux5=df1[0:i]  
#     datos_entrada=aux4[(i-n_steps_in):(i)]
#     datos_entrada=pd.DataFrame(datos_entrada).to_numpy()
#     datos_entrada2=datos_entrada.reshape(1,n_steps_in,n_features)
#     yhat = model.predict(datos_entrada2, verbose=0)
#     yhat=yhat.reshape(n_steps_out)
#     Yp=yhat
#     Yp2=np.concatenate((aux5["Unem"],Yp))
#     Rs[j]= Yp2
#     Rv=df1["Unem"][0:(i+n_steps_out)].to_numpy()
#     l=np.zeros((8))    
#     l2=Rv[(i):(i+n_steps_out)]
#     l[0:len(l2)]=l2
#     a2= Yp2[(i):(i+n_steps_out)]
#     a3=np.zeros(8) 
#     a3[0:len(l2)]=a2[0:len(l2)]
#     R=l-a3
#     U[:,j]=R
#     j=j+1
    
# U=abs(U)

# VAR model overview (in R)

![VAR](https://miro.medium.com/max/1056/1*pIHYGdT4AmMmHT-61UTUwQ.png)

## Data preprocessing

In [33]:
# y1<-diff(gdp$GDP[1:72])
# plot(y1,type="l")
# adf.test(y1)
# y2<-diff(y1)
# plot(y2,type="l")
# adf.test(y2)

## Model creation

In [34]:
# library(tseries)
# library(forcats)
# library(vars)

In [35]:
# #Model creation
# y<-matrix(c(y2,e2,u2,in2),c(length(y2),4))
# colnames(y) <- cbind("GDP","Debt","Unem", "i_rate")

In [36]:
# lagselect <- VARselect(y, lag.max = 16, type = "const")
# lagselect$selection
# Model1 <- VAR(y, p = 10, type = "const", season = NULL, exog = NULL) 
# summary(Model1)
# view raw

## Granger causality test

In [37]:
# #Granger causality test
# GrangerGDP<- causality(Model1, cause = "GDP")
# GrangerGDP
# Grangeri_rate<- causality(Model1, cause = "i_rate")
# Grangeri_rate
# GrangerUnem<- causality(Model1, cause = "Unem")
# GrangerUnem
# GrangerDebt<- causality(Model1, cause = "Debt")
# GrangerDebt

## Prediction

In [38]:
# WE MAKE ALL THE PERIODS PREDICTIONS
# j=0
# U=matrix(0, nrow = 8, ncol = 17)
# for(i in seq(72, 88, 1)){
# j=j+1
#  y1<-diff(gdp$GDP[1:i])
#  y2<-diff(y1)
  
#  e1<-diff(Debt$DEBT[1:i])
#  e2<-diff(e1)
# u1<-diff(Unem$Unem[1:i])
#  u2<-diff(u1)
# in1<-diff(Ecb$i_rate[1:i])
#  in2<-diff(in1)
# #Model creation
# y<-matrix(c(y2,e2,u2,in2),c(length(y2),4))
#  colnames(y) <- cbind("GDP","Debt","Unem", "i_rate")
# lagselect <- VARselect(y, lag.max = 16, type = "const")
#  lagselect$selection
#  Model1 <- VAR(y, p = 10, type = "const", season = NULL, exog = NULL)
# steps=8
#  forecast <- predict(Model1, n.ahead = steps, ci = 0.95)
# gdp_forecast=forecast$fcst$GDP[,1]
#  debt_forecast=forecast$fcst$Debt[,1]
#  unem_forecast=forecast$fcst$Unem[,1]
#  i_rate_forecast=forecast$fcst$i_rate[,1]
# #We focus on unemployment
#  unem_plus_forecast<-as.vector(c(u2,unem_forecast))
#  unem_plus_forecast_ad1<-diffinv(unem_plus_forecast, xi=u1[1])
#  unem_plus_forecast_ad2<-diffinv(unem_plus_forecast_ad1, xi=Unem$Unem[1])
 
#  unem_plus_forecast_ad2[(i+1):(i+steps)]#predictions
#  Unem$Unem[(i+1):(i+steps)]#real values
 
#  U[,j]= Unem$Unem[(i+1):(i+steps)]-unem_plus_forecast_ad2[(i+1):(i+steps)]
#  #In U we are going to store the residuals
 
 
# }
# U=abs(U)
# rowMeans(U,na.rm=TRUE)

# Performance comparison


## Load the error matrix of the VAR model


In [39]:
# U_R=pd.read_csv("SPAIN/U_R.csv")

## Calculate the mae for each timestep for both models & and the mean of this values

In [40]:
# U[U == 0] = np.nan
# ULS=np.nanmean(U,axis=1)
# np.mean(ULS)
    
#     #     VAR model 
# UR=np.nanmean(U_R,axis=1)
# np.mean(UR)

## Interactive visualization of the performance

## Data preprocessing


In [41]:
# ULS=ULS.reshape(8,1)
# UR=UR.reshape(8,1)

# U1=pd.DataFrame(ULS).set_axis(["mae"],axis=1)
# U1["Model"]="LSTM"
# U1["N step ahead"]=np.arange(1,9)

# U2=pd.DataFrame(UR).set_axis(["mae"],axis=1)
# U2["Model"]="VAR"
# U2["N step ahead"]=np.arange(1,9)

# U3=pd.concat([U1,U2])

## Data visualization


In [42]:
# fig = px.scatter(U3, x="N step ahead", y="mae", color="Model",width=900, height=500,color_discrete_sequence=["red", "blue"])
# fig.update_traces(marker=dict(size=30,
#                               line=dict(width=2,
#                                         color='DarkSlateGrey')),
#                   selector=dict(mode='markers'))
# fig.update_layout(
#      title=dict(
#         text='<b>Model</b>',
#         x=0.45,
#         y=0.97,
#         font=dict(
#             family="Arial",
#             size=25,
#             color='#000000'
#         )
#     ),

# )


# fig.show()