In [1]:
#####################################################################################################################
#                                                                                                                   #
# Libraries                                                                                                         #
#                                                                                                                   #
#####################################################################################################################

# R Path - required by rpy2 prior to importing libraries
import os
os.environ["R_HOME"] = r"C:\R\R-4.0.3"
os.environ["PATH"] = r"C:\R\R-4.0.3\bin\x64" + ";" + os.environ["PATH"]
import rpy2

# Basics
import numpy as np
import pandas as pd
import scipy as sp
from scipy import stats
from scipy.stats import jarque_bera
from datetime import datetime

# R
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import r, DataFrame, FloatVector
from rpy2.robjects import pandas2ri

# Kalman Filter
from pykalman import KalmanFilter

# Wavelets
import pywt

# Machine Learning - tensorflow, keras, and sklearn
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras import backend as K
from keras import losses
from keras.models import Sequential, Model
from keras.layers import Input, Dense, LSTM, ConvLSTM2D, Flatten, BatchNormalization, Lambda 
from keras.layers import MaxPooling1D, MaxPooling2D, MaxPooling3D, Conv3D, RepeatVector, TimeDistributed, Bidirectional
from keras.layers import LeakyReLU
from keras.utils import plot_model
from sklearn.neural_network import MLPRegressor

# SKLearn Models
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.linear_model import Lasso, LassoCV
from sklearn.linear_model import ElasticNetCV
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import ARDRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor, BaggingRegressor
from sklearn.tree import export_graphviz
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import scale 
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression
from sklearn.linear_model import HuberRegressor, TheilSenRegressor, LinearRegression 

# Statsmodels
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller 
from statsmodels.tsa.statespace.sarimax import SARIMAX 
from statsmodels.stats.diagnostic import acorr_ljungbox 
from statsmodels.stats.diagnostic import het_arch
from statsmodels.tsa.api import VAR
from statsmodels.tsa.vector_ar.svar_model import SVAR
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.vector_ar.vecm import VECM
from statsmodels.tsa.vector_ar.vecm import select_coint_rank
from statsmodels.tsa.vector_ar.output import VARSummary

# Univariate GARCH
from arch import arch_model

# Plots
from matplotlib import pyplot as plt
from matplotlib import dates as md
import seaborn as sns
from matplotlib.ticker import PercentFormatter

# Misc
import pydot
import warnings

KeyboardInterrupt: 

In [None]:
#####################################################################################################################
#                                                                                                                   #
# Constants and Parameters                                                                                          #
#                                                                                                                   #
#####################################################################################################################

# Folders and worksheet names
str_Dir_Plan_FRED = 'C:/Users/alext/Desktop/Modelo/FRED/'
str_Dir_Plan_Data = 'C:/Users/alext/Desktop/Modelo/PC/'
str_Dir_Plan_PC = 'C:/Users/alext/Desktop/Modelo/PC/'
str_Dir_Results = 'C:/Users/alext/Desktop/Modelo/Results/'
str_Nome_Plan_FRED_MD = 'FRED_MD_2020_04'
str_Nome_Plan_FRED_QD = 'FRED_QD_2020_04'
str_Nome_Plan_FRED_MD_Desc = 'Data_Description_MD'
str_Nome_Plan_FRED_QD_Desc = 'Data_Description_QD'

# How to display plots
%matplotlib inline 
plt.rcParams['figure.dpi'] = 200 # Plot resolution (dpi)

# Required to convert datatypes from Python to R and vice-versa
pandas2ri.activate()

# Remove warnings
warnings.filterwarnings('ignore')

# Color style (plots)
sns.set(color_codes = True)

# Statistical significance for hypothesis testing
# Using 1% due to the high number of tests carried out
alfa = 0.01

# Test size (share of observations used to build the test sample)
share_test_size = 0.20

# Validation sample size (share of observations used to build the validation sample)
share_validation_size = 0.20

# Random state (used to split samples into training and test samples)
rnd_state = 42

# Number of lags considered when splitting the data - see LSTM models
n_lags_lstm = 12

# Number of lags considered when splitting the data - see ConvLSTM models
n_lags_conv = 12

# Number of sequences into which sample are broken when fitting ConvLSTM
# Note: n_lags = n_seq * n_steps
n_seq_conv = 1

# Size of each sequence into which sample are broken when fitting ConvLSTM
# Note: n_lags = n_seq * n_steps
n_steps_conv = int(n_lags_conv / n_seq_conv)

# Activation function
act_fun = 'selu'

# Number of epochs
num_epochs = 50

In [None]:
#####################################################################################################################
#                                                                                                                   #
# Auxiliary Functions                                                                                               #
#                                                                                                                   #
#####################################################################################################################

# Split a univariate sequence into samples
def split_sequence_uni(sequence, n_steps, per_ahead, cum = False):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix + per_ahead - 1 > len(sequence) - 1:
            break
        # gather input and output parts of the pattern
        if cum == False:
            seq_x, seq_y = sequence[i:end_ix], sequence[end_ix + per_ahead - 1]
        else:
            seq_x, seq_y = sequence[i:end_ix], np.sum(sequence[end_ix:(end_ix + per_ahead)])
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

# Split a multivariate sequence into samples
def split_sequence_mult(sequences, n_steps, per_ahead, cum = False):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix + per_ahead - 1 > len(sequences) - 1:
            break
        # gather input and output parts of the pattern
        if cum == False:
            seq_x, seq_y = sequences[(i+1):(end_ix+1), :-1], sequences[end_ix + per_ahead - 1, -1]
        else:
            seq_x, seq_y = sequences[(i+1):(end_ix+1), :-1], np.sum(sequences[end_ix:(end_ix + per_ahead), -1])
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

# Kalman filter regression
# If EM = True, then EM algorithm is used for estimation
# delta is related to the variance of the betas. Delta -> 1 makes betas more volatile, which may lead to overfitting.
# However, delta -> 0 may increase the MSE.

def KFReg(X, y, delta, obs_cov, init_mean, init_cov, EM = False):
    n_features = X.shape[1]
    obs_mat = X[:, np.newaxis, :]
    if EM == False:
        trans_cov = (delta/(1 - delta))*np.eye(n_features)
        kf = KalmanFilter(n_dim_obs = 1, n_dim_state = n_features, 
                          initial_state_mean = init_mean,
                          initial_state_covariance = init_cov,
                          transition_matrices = np.eye(n_features),
                          observation_matrices = obs_mat,
                          observation_covariance = obs_cov,
                          transition_covariance = trans_cov)
        state_means, state_covs = kf.filter(y)
    else:
        kf = KalmanFilter(n_dim_obs = 1, n_dim_state = n_features, 
                          initial_state_mean = init_mean, 
                          initial_state_covariance = init_cov,
                          observation_matrices = obs_mat)
    state_means, state_covs = kf.em(y).filter(y)
    return state_means, state_covs, kf

# Mean Absolute Error
def MAE(y_obs, y_hat):
    return np.mean(np.abs(y_obs - y_hat))

# Mean Squared Error
def MSE(y_obs, y_hat):
    return np.mean((y_obs - y_hat)**2)

# RMSE
def RMSE(y_obs, y_hat):
    return np.sqrt(MSE(y_obs, y_hat))

def MAPE(y_obs, y_hat):
    return np.mean(np.abs(y_obs - y_hat)/y_obs)

def cos_sim(y_obs, y_hat):
    return np.dot(y_obs, y_hat)/(np.linalg.norm(y_obs)*np.linalg.norm(y_hat))

def cos_sim2(y_obs, y_hat):
    return np.sum(y_obs*y_hat)/(np.linalg.norm(y_obs)*np.linalg.norm(y_hat))
    
def R2(y_obs, y_hat):
    SSR = np.sum((y_obs - y_hat)**2)
    SST = np.sum((y_obs - np.mean(y_obs))**2)
    return (1 - SSR/SST)

In [None]:
#####################################################################################################################
#                                                                                                                   #
# Data                                                                                                              #
#                                                                                                                   #
#####################################################################################################################

# Raw data

y = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'y.csv', index_col = 'Date', sep = ',')
X = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X.csv', index_col = 'Date', sep = ',')

X_L1 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L1.csv', index_col = 'Date', sep = ',')
str_col = X_L1.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L1'
X_L1.columns = str_col

X_L2 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L2.csv', index_col = 'Date', sep = ',')
str_col = X_L2.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L2'
X_L2.columns = str_col

X_L3 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L3.csv', index_col = 'Date', sep = ',')
str_col = X_L3.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L3'
X_L3.columns = str_col

X_L4 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L4.csv', index_col = 'Date', sep = ',')
str_col = X_L4.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L4'
X_L4.columns = str_col

X_L5 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L5.csv', index_col = 'Date', sep = ',')
str_col = X_L5.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L5'
X_L5.columns = str_col

X_L6 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L6.csv', index_col = 'Date', sep = ',')
str_col = X_L6.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L6'
X_L6.columns = str_col

X_L7 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L7.csv', index_col = 'Date', sep = ',')
str_col = X_L7.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L7'
X_L7.columns = str_col

X_L8 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L8.csv', index_col = 'Date', sep = ',')
str_col = X_L8.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L8'
X_L8.columns = str_col

X_L9 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L9.csv', index_col = 'Date', sep = ',')
str_col = X_L9.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L9'
X_L9.columns = str_col

X_L10 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L10.csv', index_col = 'Date', sep = ',')
str_col = X_L10.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L10'
X_L10.columns = str_col

X_L11 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L11.csv', index_col = 'Date', sep = ',')
str_col = X_L11.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L11'
X_L11.columns = str_col

X_L12 = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L12.csv', index_col = 'Date', sep = ',')
str_col = X_L12.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L12'
X_L12.columns = str_col

# Raw data - split samples - Train

y_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'y_train.csv', index_col = 'Date', sep = ',')
X_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_train.csv', index_col = 'Date', sep = ',')

X_L1_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L1_train.csv', index_col = 'Date', sep = ',')
str_col = X_L1_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L1'
X_L1_train.columns = str_col

X_L2_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L2_train.csv', index_col = 'Date', sep = ',')
str_col = X_L2_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L2'
X_L2_train.columns = str_col

X_L3_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L3_train.csv', index_col = 'Date', sep = ',')
str_col = X_L3_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L3'
X_L3_train.columns = str_col

X_L4_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L4_train.csv', index_col = 'Date', sep = ',')
str_col = X_L4_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L4'
X_L4_train.columns = str_col

X_L5_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L5_train.csv', index_col = 'Date', sep = ',')
str_col = X_L5_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L5'
X_L5_train.columns = str_col

X_L6_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L6_train.csv', index_col = 'Date', sep = ',')
str_col = X_L6_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L6'
X_L6_train.columns = str_col

X_L7_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L7_train.csv', index_col = 'Date', sep = ',')
str_col = X_L7_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L7'
X_L7_train.columns = str_col

X_L8_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L8_train.csv', index_col = 'Date', sep = ',')
str_col = X_L8_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L8'
X_L8_train.columns = str_col

X_L9_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L9_train.csv', index_col = 'Date', sep = ',')
str_col = X_L9_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L9'
X_L9_train.columns = str_col

X_L10_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L10_train.csv', index_col = 'Date', sep = ',')
str_col = X_L10_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L10'
X_L10_train.columns = str_col

X_L11_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L11_train.csv', index_col = 'Date', sep = ',')
str_col = X_L11_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L11'
X_L11_train.columns = str_col

X_L12_train = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L12_train.csv', index_col = 'Date', sep = ',')
str_col = X_L12_train.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L12'
X_L12_train.columns = str_col

# Raw data - split samples - Test

X_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_test.csv', index_col = 'Date', sep = ',')
y_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'y_test.csv', index_col = 'Date', sep = ',')

X_L1_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L1_test.csv', index_col = 'Date', sep = ',')
str_col = X_L1_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L1'
X_L1_test.columns = str_col

X_L2_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L2_test.csv', index_col = 'Date', sep = ',')
str_col = X_L2_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L2'
X_L2_test.columns = str_col

X_L3_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L3_test.csv', index_col = 'Date', sep = ',')
str_col = X_L3_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L3'
X_L3_test.columns = str_col

X_L4_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L4_test.csv', index_col = 'Date', sep = ',')
str_col = X_L4_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L4'
X_L4_test.columns = str_col

X_L5_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L5_test.csv', index_col = 'Date', sep = ',')
str_col = X_L5_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L5'
X_L5_test.columns = str_col

X_L6_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L6_test.csv', index_col = 'Date', sep = ',')
str_col = X_L6_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L6'
X_L6_test.columns = str_col

X_L7_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L7_test.csv', index_col = 'Date', sep = ',')
str_col = X_L7_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L7'
X_L7_test.columns = str_col

X_L8_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L8_test.csv', index_col = 'Date', sep = ',')
str_col = X_L8_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L8'
X_L8_test.columns = str_col

X_L9_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L9_test.csv', index_col = 'Date', sep = ',')
str_col = X_L9_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L9'
X_L9_test.columns = str_col

X_L10_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L10_test.csv', index_col = 'Date', sep = ',')
str_col = X_L10_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L10'
X_L10_test.columns = str_col

X_L11_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L11_test.csv', index_col = 'Date', sep = ',')
str_col = X_L11_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L11'
X_L11_test.columns = str_col

X_L12_test = pd.read_csv(filepath_or_buffer = str_Dir_Plan_Data + str(rnd_state) + ' ' + 'X_L12_test.csv', index_col = 'Date', sep = ',')
str_col = X_L12_test.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L12'
X_L12_test.columns = str_col

# PCA samples - train

X_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_pca_train' + '.csv', 
                          index_col = 'Date', sep = ',')

X_OI_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_pca_train' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_OI_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI'
X_OI_train_pca.columns = str_col

X_LM_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_pca_train' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_LM_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM'
X_LM_train_pca.columns = str_col

X_H_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_pca_train' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_H_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H'
X_H_train_pca.columns = str_col

X_COI_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_pca_train' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_COI_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI'
X_COI_train_pca.columns = str_col

X_MC_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_pca_train' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_MC_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC'
X_MC_train_pca.columns = str_col

X_INTFX_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_INTFX_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX'
X_INTFX_train_pca.columns = str_col

X_P_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_pca_train' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_P_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P'
X_P_train_pca.columns = str_col

X_S_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_pca_train' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_S_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S'
X_S_train_pca.columns = str_col

# PCA samples - test

X_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_pca_test' + '.csv', 
                          index_col = 'Date', sep = ',')

X_OI_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_pca_test' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_OI_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI'
X_OI_test_pca.columns = str_col

X_LM_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_pca_test' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_LM_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM'
X_LM_test_pca.columns = str_col

X_H_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_pca_test' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_H_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H'
X_H_test_pca.columns = str_col

X_COI_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_pca_test' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_COI_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI'
X_COI_test_pca.columns = str_col

X_MC_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_pca_test' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_MC_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC'
X_MC_test_pca.columns = str_col

X_INTFX_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_INTFX_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX'
X_INTFX_test_pca.columns = str_col

X_P_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_pca_test' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_P_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P'
X_P_test_pca.columns = str_col

X_S_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_pca_test' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_S_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S'
X_S_test_pca.columns = str_col

# PCA samples - train L1

X_L1_train_pca  = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_L1_pca_train' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_L1_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L1'
X_L1_train_pca.columns = str_col

X_OI_L1_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_L1_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_OI_L1_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI_L1'
X_OI_L1_train_pca.columns = str_col

X_LM_L1_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_L1_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_LM_L1_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM_L1'
X_LM_L1_train_pca.columns = str_col

X_H_L1_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_L1_pca_train' + '.csv',
                               index_col = 'Date', sep = ',')
str_col = X_H_L1_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H_L1'
X_H_L1_train_pca.columns = str_col

X_COI_L1_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_L1_pca_train' + '.csv',
                                 index_col = 'Date', sep = ',')
str_col = X_COI_L1_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI_L1'
X_COI_L1_train_pca.columns = str_col

X_MC_L1_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_L1_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_MC_L1_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC_L1'
X_MC_L1_train_pca.columns = str_col

X_INTFX_L1_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_L1_pca_train' + '.csv', 
                                   index_col = 'Date', sep = ',')
str_col = X_INTFX_L1_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX_L1'
X_INTFX_L1_train_pca.columns = str_col

X_P_L1_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_L1_pca_train' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_P_L1_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P_L1'
X_P_L1_train_pca.columns = str_col

X_S_L1_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_L1_pca_train' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_S_L1_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S_L1'
X_S_L1_train_pca.columns = str_col

# PCA samples - test L1

X_L1_test_pca  = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_L1_pca_test' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_L1_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L1'
X_L1_test_pca.columns = str_col

X_OI_L1_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_L1_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_OI_L1_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI_L1'
X_OI_L1_test_pca.columns = str_col

X_LM_L1_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_L1_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_LM_L1_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM_L1'
X_LM_L1_test_pca.columns = str_col

X_H_L1_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_L1_pca_test' + '.csv',
                               index_col = 'Date', sep = ',')
str_col = X_H_L1_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H_L1'
X_H_L1_test_pca.columns = str_col

X_COI_L1_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_L1_pca_test' + '.csv',
                                 index_col = 'Date', sep = ',')
str_col = X_COI_L1_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI_L1'
X_COI_L1_test_pca.columns = str_col

X_MC_L1_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_L1_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_MC_L1_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC_L1'
X_MC_L1_test_pca.columns = str_col

X_INTFX_L1_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_L1_pca_test' + '.csv', 
                                   index_col = 'Date', sep = ',')
str_col = X_INTFX_L1_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX_L1'
X_INTFX_L1_test_pca.columns = str_col

X_P_L1_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_L1_pca_test' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_P_L1_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P_L1'
X_P_L1_test_pca.columns = str_col

X_S_L1_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_L1_pca_test' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_S_L1_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S_L1'
X_S_L1_test_pca.columns = str_col

# PCA samples - train L2

X_L2_train_pca  = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_L2_pca_train' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_L2_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L2'
X_L2_train_pca.columns = str_col

X_OI_L2_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_L2_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_OI_L2_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI_L2'
X_OI_L2_train_pca.columns = str_col

X_LM_L2_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_L2_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_LM_L2_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM_L2'
X_LM_L2_train_pca.columns = str_col

X_H_L2_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_L2_pca_train' + '.csv',
                               index_col = 'Date', sep = ',')
str_col = X_H_L2_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H_L2'
X_H_L2_train_pca.columns = str_col

X_COI_L2_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_L2_pca_train' + '.csv',
                                 index_col = 'Date', sep = ',')
str_col = X_COI_L2_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI_L2'
X_COI_L2_train_pca.columns = str_col

X_MC_L2_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_L2_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_MC_L2_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC_L2'
X_MC_L2_train_pca.columns = str_col

X_INTFX_L2_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_L2_pca_train' + '.csv', 
                                   index_col = 'Date', sep = ',')
str_col = X_INTFX_L2_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX_L2'
X_INTFX_L2_train_pca.columns = str_col

X_P_L2_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_L2_pca_train' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_P_L2_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P_L2'
X_P_L2_train_pca.columns = str_col

X_S_L2_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_L2_pca_train' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_S_L2_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S_L2'
X_S_L2_train_pca.columns = str_col

# PCA samples - test L2

X_L2_test_pca  = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_L2_pca_test' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_L2_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L2'
X_L2_test_pca.columns = str_col

X_OI_L2_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_L2_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_OI_L2_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI_L2'
X_OI_L2_test_pca.columns = str_col

X_LM_L2_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_L2_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_LM_L2_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM_L2'
X_LM_L2_test_pca.columns = str_col

X_H_L2_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_L2_pca_test' + '.csv',
                               index_col = 'Date', sep = ',')
str_col = X_H_L2_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H_L2'
X_H_L2_test_pca.columns = str_col

X_COI_L2_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_L2_pca_test' + '.csv',
                                 index_col = 'Date', sep = ',')
str_col = X_COI_L2_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI_L2'
X_COI_L2_test_pca.columns = str_col

X_MC_L2_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_L2_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_MC_L2_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC_L2'
X_MC_L2_test_pca.columns = str_col

X_INTFX_L2_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_L2_pca_test' + '.csv', 
                                   index_col = 'Date', sep = ',')
str_col = X_INTFX_L2_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX_L2'
X_INTFX_L2_test_pca.columns = str_col

X_P_L2_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_L2_pca_test' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_P_L2_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P_L2'
X_P_L2_test_pca.columns = str_col

X_S_L2_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_L2_pca_test' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_S_L2_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S_L2'
X_S_L2_test_pca.columns = str_col

# PCA samples - train L3

X_L3_train_pca  = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_L3_pca_train' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_L3_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L3'
X_L3_train_pca.columns = str_col

X_OI_L3_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_L3_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_OI_L3_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI_L3'
X_OI_L3_train_pca.columns = str_col

X_LM_L3_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_L3_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_LM_L3_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM_L3'
X_LM_L3_train_pca.columns = str_col

X_H_L3_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_L3_pca_train' + '.csv',
                               index_col = 'Date', sep = ',')
str_col = X_H_L3_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H_L3'
X_H_L3_train_pca.columns = str_col

X_COI_L3_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_L3_pca_train' + '.csv',
                                 index_col = 'Date', sep = ',')
str_col = X_COI_L3_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI_L3'
X_COI_L3_train_pca.columns = str_col

X_MC_L3_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_L3_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_MC_L3_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC_L3'
X_MC_L3_train_pca.columns = str_col

X_INTFX_L3_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_L3_pca_train' + '.csv', 
                                   index_col = 'Date', sep = ',')
str_col = X_INTFX_L3_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX_L3'
X_INTFX_L3_train_pca.columns = str_col

X_P_L3_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_L3_pca_train' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_P_L3_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P_L3'
X_P_L3_train_pca.columns = str_col

X_S_L3_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_L3_pca_train' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_S_L3_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S_L3'
X_S_L3_train_pca.columns = str_col

# PCA samples - test L3

X_L3_test_pca  = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_L3_pca_test' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_L3_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L3'
X_L3_test_pca.columns = str_col

X_OI_L3_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_L3_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_OI_L3_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI_L3'
X_OI_L3_test_pca.columns = str_col

X_LM_L3_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_L3_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_LM_L3_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM_L3'
X_LM_L3_test_pca.columns = str_col

X_H_L3_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_L3_pca_test' + '.csv',
                               index_col = 'Date', sep = ',')
str_col = X_H_L3_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H_L3'
X_H_L3_test_pca.columns = str_col

X_COI_L3_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_L3_pca_test' + '.csv',
                                 index_col = 'Date', sep = ',')
str_col = X_COI_L3_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI_L3'
X_COI_L3_test_pca.columns = str_col

X_MC_L3_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_L3_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_MC_L3_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC_L3'
X_MC_L3_test_pca.columns = str_col

X_INTFX_L3_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_L3_pca_test' + '.csv', 
                                   index_col = 'Date', sep = ',')
str_col = X_INTFX_L3_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX_L3'
X_INTFX_L3_test_pca.columns = str_col

X_P_L3_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_L3_pca_test' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_P_L3_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P_L3'
X_P_L3_test_pca.columns = str_col

X_S_L3_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_L3_pca_test' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_S_L3_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S_L3'
X_S_L3_test_pca.columns = str_col

# PCA samples - train L4

X_L4_train_pca  = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_L4_pca_train' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_L4_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L4'
X_L4_train_pca.columns = str_col

X_OI_L4_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_L4_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_OI_L4_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI_L4'
X_OI_L4_train_pca.columns = str_col

X_LM_L4_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_L4_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_LM_L4_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM_L4'
X_LM_L4_train_pca.columns = str_col

X_H_L4_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_L4_pca_train' + '.csv',
                               index_col = 'Date', sep = ',')
str_col = X_H_L4_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H_L4'
X_H_L4_train_pca.columns = str_col

X_COI_L4_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_L4_pca_train' + '.csv',
                                 index_col = 'Date', sep = ',')
str_col = X_COI_L4_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI_L4'
X_COI_L4_train_pca.columns = str_col

X_MC_L4_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_L4_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_MC_L4_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC_L4'
X_MC_L4_train_pca.columns = str_col

X_INTFX_L4_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_L4_pca_train' + '.csv', 
                                   index_col = 'Date', sep = ',')
str_col = X_INTFX_L4_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX_L4'
X_INTFX_L4_train_pca.columns = str_col

X_P_L4_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_L4_pca_train' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_P_L4_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P_L4'
X_P_L4_train_pca.columns = str_col

X_S_L4_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_L4_pca_train' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_S_L4_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S_L4'
X_S_L4_train_pca.columns = str_col

# PCA samples - test L4

X_L4_test_pca  = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_L4_pca_test' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_L4_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L4'
X_L4_test_pca.columns = str_col

X_OI_L4_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_L4_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_OI_L4_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI_L4'
X_OI_L4_test_pca.columns = str_col

X_LM_L4_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_L4_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_LM_L4_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM_L4'
X_LM_L4_test_pca.columns = str_col

X_H_L4_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_L4_pca_test' + '.csv',
                               index_col = 'Date', sep = ',')
str_col = X_H_L4_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H_L4'
X_H_L4_test_pca.columns = str_col

X_COI_L4_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_L4_pca_test' + '.csv',
                                 index_col = 'Date', sep = ',')
str_col = X_COI_L4_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI_L4'
X_COI_L4_test_pca.columns = str_col

X_MC_L4_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_L4_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_MC_L4_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC_L4'
X_MC_L4_test_pca.columns = str_col

X_INTFX_L4_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_L4_pca_test' + '.csv', 
                                   index_col = 'Date', sep = ',')
str_col = X_INTFX_L4_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX_L4'
X_INTFX_L4_test_pca.columns = str_col

X_P_L4_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_L4_pca_test' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_P_L4_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P_L4'
X_P_L4_test_pca.columns = str_col

X_S_L4_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_L4_pca_test' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_S_L4_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S_L4'
X_S_L4_test_pca.columns = str_col

# PCA samples - train L12

X_L12_train_pca  = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_L12_pca_train' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_L12_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L12'
X_L12_train_pca.columns = str_col

X_OI_L12_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_L12_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_OI_L12_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI_L12'
X_OI_L12_train_pca.columns = str_col

X_LM_L12_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_L12_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_LM_L12_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM_L12'
X_LM_L12_train_pca.columns = str_col

X_H_L12_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_L12_pca_train' + '.csv',
                               index_col = 'Date', sep = ',')
str_col = X_H_L12_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H_L12'
X_H_L12_train_pca.columns = str_col

X_COI_L12_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_L12_pca_train' + '.csv',
                                 index_col = 'Date', sep = ',')
str_col = X_COI_L12_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI_L12'
X_COI_L12_train_pca.columns = str_col

X_MC_L12_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_L12_pca_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_MC_L12_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC_L12'
X_MC_L12_train_pca.columns = str_col

X_INTFX_L12_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_L12_pca_train' + '.csv', 
                                   index_col = 'Date', sep = ',')
str_col = X_INTFX_L12_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX_L12'
X_INTFX_L12_train_pca.columns = str_col

X_P_L12_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_L12_pca_train' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_P_L12_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P_L12'
X_P_L12_train_pca.columns = str_col

X_S_L12_train_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_L12_pca_train' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_S_L12_train_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S_L12'
X_S_L12_train_pca.columns = str_col

# PCA samples - test L12

X_L12_test_pca  = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_L12_pca_test' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_L12_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_L12'
X_L12_test_pca.columns = str_col

X_OI_L12_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_OI_L12_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_OI_L12_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI_L12'
X_OI_L12_test_pca.columns = str_col

X_LM_L12_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_LM_L12_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_LM_L12_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM_L12'
X_LM_L12_test_pca.columns = str_col

X_H_L12_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_H_L12_pca_test' + '.csv',
                               index_col = 'Date', sep = ',')
str_col = X_H_L12_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H_L12'
X_H_L12_test_pca.columns = str_col

X_COI_L12_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_COI_L12_pca_test' + '.csv',
                                 index_col = 'Date', sep = ',')
str_col = X_COI_L12_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI_L12'
X_COI_L12_test_pca.columns = str_col

X_MC_L12_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_MC_L12_pca_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_MC_L12_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC_L12'
X_MC_L12_test_pca.columns = str_col

X_INTFX_L12_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_INTFX_L12_pca_test' + '.csv', 
                                   index_col = 'Date', sep = ',')
str_col = X_INTFX_L12_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX_L12'
X_INTFX_L12_test_pca.columns = str_col

X_P_L12_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_P_L12_pca_test' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_P_L12_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P_L12'
X_P_L12_test_pca.columns = str_col

X_S_L12_test_pca = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_S_L12_pca_test' + '.csv', 
                               index_col = 'Date', sep = ',')
str_col = X_S_L12_test_pca.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S_L12'
X_S_L12_test_pca.columns = str_col

# Autoencoder samples - train

X_train_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_ae_train' + '.csv', 
                          index_col = 'Date', sep = ',')

X_OI_train_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_OI_ae_train' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_OI_train_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI'
X_OI_train_ae.columns = str_col

X_LM_train_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_LM_ae_train' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_LM_train_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM'
X_LM_train_ae.columns = str_col

X_H_train_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_H_ae_train' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_H_train_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H'
X_H_train_ae.columns = str_col

X_COI_train_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_COI_ae_train' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_COI_train_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI'
X_COI_train_ae.columns = str_col

X_MC_train_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_MC_ae_train' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_MC_train_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC'
X_MC_train_ae.columns = str_col

X_INTFX_train_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_INTFX_ae_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_INTFX_train_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX'
X_INTFX_train_ae.columns = str_col

X_P_train_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_P_ae_train' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_P_train_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P'
X_P_train_ae.columns = str_col

X_S_train_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_S_ae_train' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_S_train_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S'
X_S_train_ae.columns = str_col

# Autoencoder samples - test

X_test_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_ae_test' + '.csv', 
                          index_col = 'Date', sep = ',')

X_OI_test_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_OI_ae_test' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_OI_test_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI'
X_OI_test_ae.columns = str_col

X_LM_test_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_LM_ae_test' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_LM_test_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM'
X_LM_test_ae.columns = str_col

X_H_test_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_H_ae_test' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_H_test_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H'
X_H_test_ae.columns = str_col

X_COI_test_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_COI_ae_test' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_COI_test_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI'
X_COI_test_ae.columns = str_col

X_MC_test_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_MC_ae_test' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_MC_test_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC'
X_MC_test_ae.columns = str_col

X_INTFX_test_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_INTFX_ae_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_INTFX_test_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX'
X_INTFX_test_ae.columns = str_col

X_P_test_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_P_ae_test' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_P_test_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P'
X_P_test_ae.columns = str_col

X_S_test_ae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_S_ae_test' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_S_test_ae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S'
X_S_test_ae.columns = str_col

# Variational Autoencoder (VAE) samples - train

X_train_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_vae_train' + '.csv', 
                          index_col = 'Date', sep = ',')

X_OI_train_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_OI_vae_train' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_OI_train_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI'
X_OI_train_vae.columns = str_col

X_LM_train_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_LM_vae_train' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_LM_train_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM'
X_LM_train_vae.columns = str_col

X_H_train_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_H_vae_train' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_H_train_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H'
X_H_train_vae.columns = str_col

X_COI_train_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_COI_vae_train' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_COI_train_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI'
X_COI_train_vae.columns = str_col

X_MC_train_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_MC_vae_train' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_MC_train_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC'
X_MC_train_vae.columns = str_col

X_INTFX_train_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_INTFX_vae_train' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_INTFX_train_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX'
X_INTFX_train_vae.columns = str_col

X_P_train_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_P_vae_train' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_P_train_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P'
X_P_train_vae.columns = str_col

X_S_train_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_S_vae_train' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_S_train_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S'
X_S_train_vae.columns = str_col

# Variational Autoencoder (VAE) samples - test

X_test_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_full_vae_test' + '.csv', 
                          index_col = 'Date', sep = ',')

X_OI_test_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_OI_vae_test' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_OI_test_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_OI'
X_OI_test_vae.columns = str_col

X_LM_test_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_LM_vae_test' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_LM_test_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_LM'
X_LM_test_vae.columns = str_col

X_H_test_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_H_vae_test' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_H_test_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_H'
X_H_test_vae.columns = str_col

X_COI_test_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_COI_vae_test' + '.csv', 
                              index_col = 'Date', sep = ',')
str_col = X_COI_test_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_COI'
X_COI_test_vae.columns = str_col

X_MC_test_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_MC_vae_test' + '.csv', 
                             index_col = 'Date', sep = ',')
str_col = X_MC_test_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_MC'
X_MC_test_vae.columns = str_col

X_INTFX_test_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_INTFX_vae_test' + '.csv', 
                                index_col = 'Date', sep = ',')
str_col = X_INTFX_test_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_INTFX'
X_INTFX_test_vae.columns = str_col

X_P_test_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_P_vae_test' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_P_test_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_P'
X_P_test_vae.columns = str_col

X_S_test_vae = pd.read_csv(filepath_or_buffer = str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_S_vae_test' + '.csv', 
                            index_col = 'Date', sep = ',')
str_col = X_S_test_vae.columns.values
for i in range(0,len(str_col)):
    str_col[i] = str_col[i] + '_S'
X_S_test_vae.columns = str_col

In [None]:
#####################################################################################################################
#                                                                                                                   #
# Datasets for ML Models                                                                                            #
#                                                                                                                   #
#####################################################################################################################

# Saves index
index_full = y.index
index_test = y_test.index
index_train = y_train.index

df_index = pd.DataFrame(data = range(0,len(index_full)), index = index_full)
num_index_test = df_index.loc[index_test].iloc[:,0]
num_index_train = df_index.loc[index_train].iloc[:,0]

###########
# LSTM v1 #
###########

# Data
data_lstm_m1 = y.iloc[:,0]
data_lstm_m1 = np.array(data_lstm_m1)

# Split data into samples (input, output)
X_lstm_m1, y_lstm_m1 = split_sequence_uni(data_lstm_m1, n_lags_lstm, per_ahead = 1, cum = False)

# Indexation
len1 = len(y_lstm_m1)
len2 = len(y.index)
i = y.index[(len2-len1):len2]

X_lstm_m1 = pd.DataFrame(data = X_lstm_m1, index = i)
X_lstm_m1_train = X_lstm_m1.loc[X_lstm_m1.index.intersection(index_train)]
X_lstm_m1_test = X_lstm_m1.loc[X_lstm_m1.index.intersection(index_test)]

y_lstm_m1 = pd.DataFrame(data = y_lstm_m1, index = i)
y_lstm_m1_train = y_lstm_m1.loc[y_lstm_m1.index.intersection(index_train)]
y_lstm_m1_test = y_lstm_m1.loc[y_lstm_m1.index.intersection(index_test)]

y_lstm_m1_3m = y_lstm_m1.rolling(3).sum().dropna()
y_lstm_m1_3m_train = y_lstm_m1_3m.loc[y_lstm_m1_3m.index.intersection(index_train)]
y_lstm_m1_3m_test = y_lstm_m1_3m.loc[y_lstm_m1_3m.index.intersection(index_test)]

y_lstm_m1_6m = y_lstm_m1.rolling(6).sum().dropna()
y_lstm_m1_6m_train = y_lstm_m1_6m.loc[y_lstm_m1_6m.index.intersection(index_train)]
y_lstm_m1_6m_test = y_lstm_m1_6m.loc[y_lstm_m1_6m.index.intersection(index_test)]

y_lstm_m1_12m = y_lstm_m1.rolling(12).sum().dropna()
y_lstm_m1_12m_train = y_lstm_m1_12m.loc[y_lstm_m1_12m.index.intersection(index_train)]
y_lstm_m1_12m_test = y_lstm_m1_12m.loc[y_lstm_m1_12m.index.intersection(index_test)]

# Converts into array
X_lstm_m1 = np.array(X_lstm_m1)
X_lstm_m1_train = np.array(X_lstm_m1_train)
X_lstm_m1_test = np.array(X_lstm_m1_test)

y_lstm_m1 = np.array(y_lstm_m1)[:,0]
y_lstm_m1_train = np.array(y_lstm_m1_train)[:,0]
y_lstm_m1_test = np.array(y_lstm_m1_test)[:,0]

y_lstm_m1_3m = scale(np.array(y_lstm_m1_3m))[:,0]
y_lstm_m1_3m_train = scale(np.array(y_lstm_m1_3m_train))[:,0]
y_lstm_m1_3m_test = scale(np.array(y_lstm_m1_3m_test))[:,0]

y_lstm_m1_6m = scale(np.array(y_lstm_m1_6m))[:,0]
y_lstm_m1_6m_train = scale(np.array(y_lstm_m1_6m_train))[:,0]
y_lstm_m1_6m_test = scale(np.array(y_lstm_m1_6m_test))[:,0]

y_lstm_m1_12m = scale(np.array(y_lstm_m1_12m))[:,0]
y_lstm_m1_12m_train = scale(np.array(y_lstm_m1_12m_train))[:,0]
y_lstm_m1_12m_test = scale(np.array(y_lstm_m1_12m_test))[:,0]

# Reshape
n_features = 1
X_lstm_m1 = X_lstm_m1.reshape(X_lstm_m1.shape[0], X_lstm_m1.shape[1], n_features)
X_lstm_m1_train = X_lstm_m1_train.reshape(X_lstm_m1_train.shape[0], X_lstm_m1_train.shape[1], n_features)
X_lstm_m1_test = X_lstm_m1_test.reshape(X_lstm_m1_test.shape[0], X_lstm_m1_test.shape[1], n_features)

###########
# LSTM v2 #
###########

# Data
X_OI_ae = pd.concat([X_OI_train_ae, X_OI_test_ae], axis = 0)
X_OI_ae = X_OI_ae.sort_values(by='Date')
X_LM_ae = pd.concat([X_LM_train_ae, X_LM_test_ae], axis = 0)
X_LM_ae = X_LM_ae.sort_values(by='Date')
X_H_ae = pd.concat([X_H_train_ae, X_H_test_ae], axis = 0)
X_H_ae = X_H_ae.sort_values(by='Date')
X_COI_ae = pd.concat([X_COI_train_ae, X_COI_test_ae], axis = 0)
X_COI_ae = X_COI_ae.sort_values(by='Date')
X_MC_ae = pd.concat([X_MC_train_ae, X_MC_test_ae], axis = 0)
X_MC_ae = X_MC_ae.sort_values(by='Date')
X_INTFX_ae = pd.concat([X_INTFX_train_ae, X_INTFX_test_ae], axis = 0)
X_INTFX_ae = X_INTFX_ae.sort_values(by='Date')
X_P_ae = pd.concat([X_P_train_ae, X_P_test_ae], axis = 0)
X_P_ae = X_P_ae.sort_values(by='Date')
X_S_ae = pd.concat([X_S_train_ae, X_S_test_ae], axis = 0)
X_S_ae = X_S_ae.sort_values(by='Date')

# Concatenate
data_lstm_m2 = pd.concat([X_OI_ae, X_LM_ae, X_H_ae, X_COI_ae, X_MC_ae, X_INTFX_ae, X_P_ae, X_S_ae, y], axis = 1)
data_lstm_m2 = np.array(data_lstm_m2)

# Split data into samples (input, output)
X_lstm_m2, y_lstm_m2 = split_sequence_mult(data_lstm_m2, n_lags_lstm, per_ahead = 1, cum = False)

# Indexation
len1 = len(y_lstm_m2)
len2 = len(y.index)
i = y.index[(len2-len1):len2]

y_lstm_m2 = pd.DataFrame(data = y_lstm_m2, index = i)
df_index_adj = pd.DataFrame(data = range(0,len(y_lstm_m2)), index = y_lstm_m2.index)
y_lstm_m2_train = y_lstm_m2.loc[y_lstm_m2.index.intersection(index_train)]
y_lstm_m2_test = y_lstm_m2.loc[y_lstm_m2.index.intersection(index_test)]

y_lstm_m2_3m = y_lstm_m2.rolling(3).sum().dropna()
y_lstm_m2_3m_train = y_lstm_m2_3m.loc[y_lstm_m2_3m.index.intersection(index_train)]
y_lstm_m2_3m_test = y_lstm_m2_3m.loc[y_lstm_m2_3m.index.intersection(index_test)]

y_lstm_m2_6m = y_lstm_m2.rolling(6).sum().dropna()
y_lstm_m2_6m_train = y_lstm_m2_6m.loc[y_lstm_m2_6m.index.intersection(index_train)]
y_lstm_m2_6m_test = y_lstm_m2_6m.loc[y_lstm_m2_6m.index.intersection(index_test)]

y_lstm_m2_12m = y_lstm_m2.rolling(12).sum().dropna()
y_lstm_m2_12m_train = y_lstm_m2_12m.loc[y_lstm_m2_12m.index.intersection(index_train)]
y_lstm_m2_12m_test = y_lstm_m2_12m.loc[y_lstm_m2_12m.index.intersection(index_test)]

# Converts into array
X_lstm_m2 = np.array(X_lstm_m2)
X_lstm_m2_train = X_lstm_m2[df_index_adj.loc[y_lstm_m2_train.index][0], :, :]
X_lstm_m2_test = X_lstm_m2[df_index_adj.loc[y_lstm_m2_test.index][0], :, :]

y_lstm_m2 = np.array(y_lstm_m2)[:,0]
y_lstm_m2_train = np.array(y_lstm_m2_train)[:,0]
y_lstm_m2_test = np.array(y_lstm_m2_test)[:,0]

y_lstm_m2_3m = scale(np.array(y_lstm_m2_3m))[:,0]
y_lstm_m2_3m_train = scale(np.array(y_lstm_m2_3m_train))[:,0]
y_lstm_m2_3m_test = scale(np.array(y_lstm_m2_3m_test))[:,0]

y_lstm_m2_6m = scale(np.array(y_lstm_m2_6m))[:,0]
y_lstm_m2_6m_train = scale(np.array(y_lstm_m2_6m_train))[:,0]
y_lstm_m2_6m_test = scale(np.array(y_lstm_m2_6m_test))[:,0]

y_lstm_m2_12m = scale(np.array(y_lstm_m2_12m))[:,0]
y_lstm_m2_12m_train = scale(np.array(y_lstm_m2_12m_train))[:,0]
y_lstm_m2_12m_test = scale(np.array(y_lstm_m2_12m_test))[:,0]

###########
# LSTM v3 #
###########

# Data

data_lstm_m3 = pd.concat([X, y], axis = 1)
data_lstm_m3 = np.array(data_lstm_m3)

# Split data into samples (input, output)
X_lstm_m3, y_lstm_m3 = split_sequence_mult(data_lstm_m3, n_lags_lstm, per_ahead = 1, cum = False)

# Indexation
len1 = len(y_lstm_m3)
len2 = len(y.index)
i = y.index[(len2-len1):len2]

y_lstm_m3 = pd.DataFrame(data = y_lstm_m3, index = i)
df_index_adj = pd.DataFrame(data = range(0,len(y_lstm_m3)), index = y_lstm_m3.index)
y_lstm_m3_train = y_lstm_m3.loc[y_lstm_m3.index.intersection(index_train)]
y_lstm_m3_test = y_lstm_m3.loc[y_lstm_m3.index.intersection(index_test)]

y_lstm_m3_3m = y_lstm_m3.rolling(3).sum().dropna()
y_lstm_m3_3m_train = y_lstm_m3_3m.loc[y_lstm_m3_3m.index.intersection(index_train)]
y_lstm_m3_3m_test = y_lstm_m3_3m.loc[y_lstm_m3_3m.index.intersection(index_test)]

y_lstm_m3_6m = y_lstm_m3.rolling(6).sum().dropna()
y_lstm_m3_6m_train = y_lstm_m3_6m.loc[y_lstm_m3_6m.index.intersection(index_train)]
y_lstm_m3_6m_test = y_lstm_m3_6m.loc[y_lstm_m3_6m.index.intersection(index_test)]

y_lstm_m3_12m = y_lstm_m3.rolling(12).sum().dropna()
y_lstm_m3_12m_train = y_lstm_m3_12m.loc[y_lstm_m3_12m.index.intersection(index_train)]
y_lstm_m3_12m_test = y_lstm_m3_12m.loc[y_lstm_m3_12m.index.intersection(index_test)]

# Converts into array
X_lstm_m3 = np.array(X_lstm_m3)
X_lstm_m3_train = X_lstm_m3[df_index_adj.loc[y_lstm_m3_train.index][0], :, :]
X_lstm_m3_test = X_lstm_m3[df_index_adj.loc[y_lstm_m3_test.index][0], :, :]

y_lstm_m3 = np.array(y_lstm_m3)[:,0]
y_lstm_m3_train = np.array(y_lstm_m3_train)[:,0]
y_lstm_m3_test = np.array(y_lstm_m3_test)[:,0]

y_lstm_m3_3m = scale(np.array(y_lstm_m3_3m))[:,0]
y_lstm_m3_3m_train = scale(np.array(y_lstm_m3_3m_train))[:,0]
y_lstm_m3_3m_test = scale(np.array(y_lstm_m3_3m_test))[:,0]

y_lstm_m3_6m = scale(np.array(y_lstm_m3_6m))[:,0]
y_lstm_m3_6m_train = scale(np.array(y_lstm_m3_6m_train))[:,0]
y_lstm_m3_6m_test = scale(np.array(y_lstm_m3_6m_test))[:,0]

y_lstm_m3_12m = scale(np.array(y_lstm_m3_12m))[:,0]
y_lstm_m3_12m_train = scale(np.array(y_lstm_m3_12m_train))[:,0]
y_lstm_m3_12m_test = scale(np.array(y_lstm_m3_12m_test))[:,0]

###########
# LSTM v4 #
###########

# Data
X_OI_ae = pd.concat([X_OI_train_ae, X_OI_test_ae], axis = 0)
X_OI_ae = X_OI_ae.sort_values(by='Date')
X_LM_ae = pd.concat([X_LM_train_ae, X_LM_test_ae], axis = 0)
X_LM_ae = X_LM_ae.sort_values(by='Date')
X_H_ae = pd.concat([X_H_train_ae, X_H_test_ae], axis = 0)
X_H_ae = X_H_ae.sort_values(by='Date')
X_COI_ae = pd.concat([X_COI_train_ae, X_COI_test_ae], axis = 0)
X_COI_ae = X_COI_ae.sort_values(by='Date')
X_MC_ae = pd.concat([X_MC_train_ae, X_MC_test_ae], axis = 0)
X_MC_ae = X_MC_ae.sort_values(by='Date')
X_INTFX_ae = pd.concat([X_INTFX_train_ae, X_INTFX_test_ae], axis = 0)
X_INTFX_ae = X_INTFX_ae.sort_values(by='Date')
X_P_ae = pd.concat([X_P_train_ae, X_P_test_ae], axis = 0)
X_P_ae = X_P_ae.sort_values(by='Date')
X_S_ae = pd.concat([X_S_train_ae, X_S_test_ae], axis = 0)
X_S_ae = X_S_ae.sort_values(by='Date')

data_lstm_m4 = pd.concat([X_OI_ae, X_LM_ae, X_H_ae, X_COI_ae, X_MC_ae, X_INTFX_ae, X_P_ae, X_S_ae, y], axis = 1)
data_lstm_m4 = np.array(data_lstm_m4)

# Split data into samples (input, output)
X_lstm_m4, y_lstm_m4 = split_sequence_mult(data_lstm_m4, n_lags_lstm, per_ahead = 1, cum = False)

# Indexation
len1 = len(y_lstm_m4)
len2 = len(y.index)
i = y.index[(len2-len1):len2]

y_lstm_m4 = pd.DataFrame(data = y_lstm_m4, index = i)
df_index_adj = pd.DataFrame(data = range(0,len(y_lstm_m4)), index = y_lstm_m4.index)
y_lstm_m4_train = y_lstm_m4.loc[y_lstm_m4.index.intersection(index_train)]
y_lstm_m4_test = y_lstm_m4.loc[y_lstm_m4.index.intersection(index_test)]

y_lstm_m4_3m = y_lstm_m4.rolling(3).sum().dropna()
y_lstm_m4_3m_train = y_lstm_m4_3m.loc[y_lstm_m4_3m.index.intersection(index_train)]
y_lstm_m4_3m_test = y_lstm_m4_3m.loc[y_lstm_m4_3m.index.intersection(index_test)]

y_lstm_m4_6m = y_lstm_m4.rolling(6).sum().dropna()
y_lstm_m4_6m_train = y_lstm_m4_6m.loc[y_lstm_m4_6m.index.intersection(index_train)]
y_lstm_m4_6m_test = y_lstm_m4_6m.loc[y_lstm_m4_6m.index.intersection(index_test)]

y_lstm_m4_12m = y_lstm_m4.rolling(12).sum().dropna()
y_lstm_m4_12m_train = y_lstm_m4_12m.loc[y_lstm_m4_12m.index.intersection(index_train)]
y_lstm_m4_12m_test = y_lstm_m4_12m.loc[y_lstm_m4_12m.index.intersection(index_test)]

# Reshapes and converts into array
X_lstm_m4 = np.array(X_lstm_m4)
X_lstm_m4 = X_lstm_m4.reshape(X_lstm_m4.shape[0], n_seq_conv, 1, n_steps_conv, X_lstm_m4.shape[2])
X_lstm_m4_train = X_lstm_m4[df_index_adj.loc[y_lstm_m4_train.index][0], :, :, :, :]
X_lstm_m4_test = X_lstm_m4[df_index_adj.loc[y_lstm_m4_test.index][0], :, :, :, :]

y_lstm_m4 = np.array(y_lstm_m4)[:,0]
y_lstm_m4_train = np.array(y_lstm_m4_train)[:,0]
y_lstm_m4_test = np.array(y_lstm_m4_test)[:,0]

y_lstm_m4_3m = scale(np.array(y_lstm_m4_3m))[:,0]
y_lstm_m4_3m_train = scale(np.array(y_lstm_m4_3m_train))[:,0]
y_lstm_m4_3m_test = scale(np.array(y_lstm_m4_3m_test))[:,0]

y_lstm_m4_6m = scale(np.array(y_lstm_m4_6m))[:,0]
y_lstm_m4_6m_train = scale(np.array(y_lstm_m4_6m_train))[:,0]
y_lstm_m4_6m_test = scale(np.array(y_lstm_m4_6m_test))[:,0]

y_lstm_m4_12m = scale(np.array(y_lstm_m4_12m))[:,0]
y_lstm_m4_12m_train = scale(np.array(y_lstm_m4_12m_train))[:,0]
y_lstm_m4_12m_test = scale(np.array(y_lstm_m4_12m_test))[:,0]

###########
# LSTM v5 #
###########

# Data
X_OI_vae = pd.concat([X_OI_train_vae, X_OI_test_vae], axis = 0)
X_OI_vae = X_OI_vae.sort_values(by='Date')
X_LM_vae = pd.concat([X_LM_train_vae, X_LM_test_vae], axis = 0)
X_LM_vae = X_LM_vae.sort_values(by='Date')
X_H_vae = pd.concat([X_H_train_vae, X_H_test_vae], axis = 0)
X_H_vae = X_H_vae.sort_values(by='Date')
X_COI_vae = pd.concat([X_COI_train_vae, X_COI_test_vae], axis = 0)
X_COI_vae = X_COI_vae.sort_values(by='Date')
X_MC_vae = pd.concat([X_MC_train_vae, X_MC_test_vae], axis = 0)
X_MC_vae = X_MC_vae.sort_values(by='Date')
X_INTFX_vae = pd.concat([X_INTFX_train_vae, X_INTFX_test_vae], axis = 0)
X_INTFX_vae = X_INTFX_vae.sort_values(by='Date')
X_P_vae = pd.concat([X_P_train_vae, X_P_test_vae], axis = 0)
X_P_vae = X_P_vae.sort_values(by='Date')
X_S_vae = pd.concat([X_S_train_vae, X_S_test_vae], axis = 0)
X_S_vae = X_S_vae.sort_values(by='Date')

data_lstm_m5 = pd.concat([X_OI_vae, X_LM_vae, X_H_vae, X_COI_vae, X_MC_vae, X_INTFX_vae, X_P_vae, X_S_vae, y], axis = 1)
data_lstm_m5 = np.array(data_lstm_m5)

# Split data into samples (input, output)
X_lstm_m5, y_lstm_m5 = split_sequence_mult(data_lstm_m5, n_lags_lstm, per_ahead = 1, cum = False)

# Indexation
len1 = len(y_lstm_m5)
len2 = len(y.index)
i = y.index[(len2-len1):len2]

y_lstm_m5 = pd.DataFrame(data = y_lstm_m5, index = i)
df_index_adj = pd.DataFrame(data = range(0,len(y_lstm_m5)), index = y_lstm_m5.index)
y_lstm_m5_train = y_lstm_m5.loc[y_lstm_m5.index.intersection(index_train)]
y_lstm_m5_test = y_lstm_m5.loc[y_lstm_m5.index.intersection(index_test)]

y_lstm_m5_3m = y_lstm_m5.rolling(3).mean().dropna()
y_lstm_m5_3m_train = y_lstm_m5_3m.loc[y_lstm_m5_3m.index.intersection(index_train)]
y_lstm_m5_3m_test = y_lstm_m5_3m.loc[y_lstm_m5_3m.index.intersection(index_test)]

y_lstm_m5_6m = y_lstm_m5.rolling(6).mean().dropna()
y_lstm_m5_6m_train = y_lstm_m5_6m.loc[y_lstm_m5_6m.index.intersection(index_train)]
y_lstm_m5_6m_test = y_lstm_m5_6m.loc[y_lstm_m5_6m.index.intersection(index_test)]

y_lstm_m5_12m = y_lstm_m5.rolling(12).mean().dropna()
y_lstm_m5_12m_train = y_lstm_m5_12m.loc[y_lstm_m5_12m.index.intersection(index_train)]
y_lstm_m5_12m_test = y_lstm_m5_12m.loc[y_lstm_m5_12m.index.intersection(index_test)]

# Reshapes and converts into array
X_lstm_m5 = np.array(X_lstm_m5)
X_lstm_m5 = X_lstm_m5.reshape(X_lstm_m5.shape[0], n_seq_conv, 1, n_steps_conv, X_lstm_m5.shape[2])
X_lstm_m5_train = X_lstm_m5[df_index_adj.loc[y_lstm_m5_train.index][0], :, :, :, :]
X_lstm_m5_test = X_lstm_m5[df_index_adj.loc[y_lstm_m5_test.index][0], :, :, :, :]

y_lstm_m5 = np.array(y_lstm_m5)[:,0]
y_lstm_m5_train = np.array(y_lstm_m5_train)[:,0]
y_lstm_m5_test = np.array(y_lstm_m5_test)[:,0]

y_lstm_m5_3m = scale(np.array(y_lstm_m5_3m))[:,0]
y_lstm_m5_3m_train = scale(np.array(y_lstm_m5_3m_train))[:,0]
y_lstm_m5_3m_test = scale(np.array(y_lstm_m5_3m_test))[:,0]

y_lstm_m5_6m = scale(np.array(y_lstm_m5_6m))[:,0]
y_lstm_m5_6m_train = scale(np.array(y_lstm_m5_6m_train))[:,0]
y_lstm_m5_6m_test = scale(np.array(y_lstm_m5_6m_test))[:,0]

y_lstm_m5_12m = scale(np.array(y_lstm_m5_12m))[:,0]
y_lstm_m5_12m_train = scale(np.array(y_lstm_m5_12m_train))[:,0]
y_lstm_m5_12m_test = scale(np.array(y_lstm_m5_12m_test))[:,0]

###########
# LSTM v6 #
###########

# Data
data_lstm_m6 = y.iloc[:,0]
data_lstm_m6 = np.array(data_lstm_m1)

# Split data into samples (input, output)
X_lstm_m6, y_lstm_m6 = split_sequence_uni(data_lstm_m6, n_lags_lstm, per_ahead = 1, cum = False)

# Indexation
len1 = len(y_lstm_m6)
len2 = len(y.index)
i = y.index[(len2-len1):len2]

y_lstm_m6 = pd.DataFrame(data = y_lstm_m6, index = i)
df_index_adj = pd.DataFrame(data = range(0,len(y_lstm_m6)), index = y_lstm_m6.index)
y_lstm_m6_train = y_lstm_m6.loc[y_lstm_m6.index.intersection(index_train)]
y_lstm_m6_test = y_lstm_m6.loc[y_lstm_m6.index.intersection(index_test)]

y_lstm_m6_3m = y_lstm_m6.rolling(3).sum().dropna()
y_lstm_m6_3m_train = y_lstm_m6_3m.loc[y_lstm_m6_3m.index.intersection(index_train)]
y_lstm_m6_3m_test = y_lstm_m6_3m.loc[y_lstm_m6_3m.index.intersection(index_test)]

y_lstm_m6_6m = y_lstm_m6.rolling(6).sum().dropna()
y_lstm_m6_6m_train = y_lstm_m6_6m.loc[y_lstm_m6_6m.index.intersection(index_train)]
y_lstm_m6_6m_test = y_lstm_m6_6m.loc[y_lstm_m6_6m.index.intersection(index_test)]

y_lstm_m6_12m = y_lstm_m6.rolling(12).sum().dropna()
y_lstm_m6_12m_train = y_lstm_m6_12m.loc[y_lstm_m6_12m.index.intersection(index_train)]
y_lstm_m6_12m_test = y_lstm_m6_12m.loc[y_lstm_m6_12m.index.intersection(index_test)]

# Reshapes and converts into array
X_lstm_m6 = np.array(X_lstm_m6)
X_lstm_m6 = X_lstm_m6.reshape(X_lstm_m6.shape[0], n_seq_conv, 1, n_steps_conv, 1)
X_lstm_m6_train = X_lstm_m6[df_index_adj.loc[y_lstm_m6_train.index][0], :, :, :, :]
X_lstm_m6_test = X_lstm_m6[df_index_adj.loc[y_lstm_m6_test.index][0], :, :, :, :]

y_lstm_m6 = np.array(y_lstm_m6)[:,0]
y_lstm_m6_train = np.array(y_lstm_m6_train)[:,0]
y_lstm_m6_test = np.array(y_lstm_m6_test)[:,0]

y_lstm_m6_3m = scale(np.array(y_lstm_m6_3m))[:,0]
y_lstm_m6_3m_train = scale(np.array(y_lstm_m6_3m_train))[:,0]
y_lstm_m6_3m_test = scale(np.array(y_lstm_m6_3m_test))[:,0]

y_lstm_m6_6m = scale(np.array(y_lstm_m6_6m))[:,0]
y_lstm_m6_6m_train = scale(np.array(y_lstm_m6_6m_train))[:,0]
y_lstm_m6_6m_test = scale(np.array(y_lstm_m6_6m_test))[:,0]

y_lstm_m6_12m = scale(np.array(y_lstm_m6_12m))[:,0]
y_lstm_m6_12m_train = scale(np.array(y_lstm_m6_12m_train))[:,0]
y_lstm_m6_12m_test = scale(np.array(y_lstm_m6_12m_test))[:,0]

In [None]:
#####################################################################################################################
#                                                                                                                   #
# Models                                                                                                            #
#                                                                                                                   #
#####################################################################################################################

#################################
# Storage variables             #
#################################

# Column names - periods ahead
col_names = ['p0', 'p1', 'p2', 'p3', 'p4', 'p5', 'p12', '3m', '6m', '12m']
periods = [0, 1, 2, 3, 4, 5, 12, 2, 5, 11]
models_names = ['LSTM_M1', 'LSTM_M2', 'LSTM_M3', 'ConvLSTM_M1', 'ConvLSTM_M2', 'ConvLSTM_M3', 'MLP', 'MLP2', 'RW', 
                'Ridge', 'Ridge_CV', 'Bayesian_Ridge', 'Lasso', 'Lasso_CV', 'Bayesian_Lasso', 'ENet', 'SVR', 
                'Random_Forest', 'BART', 'BAGGING', 'kNN', 'Huber', 'Theil_Sen', 'Factor', 'GARCH', 'VECM', 'SETAR',
                'MA', 'SARIMA', 'ARFIMA', 'GradBoost', 'AdaBoost', 'BayesRegression']

# Matrices that store fitted values for each model (training and test samples)
df_y_fit_lstm_m1 = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_lstm_m1 = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_lstm_m2 = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_lstm_m2 = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_lstm_m3 = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_lstm_m3 = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_lstm_m4 = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_lstm_m4 = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_lstm_m5 = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_lstm_m5 = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_lstm_m6 = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_lstm_m6 = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_mlp = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_mlp = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_mlp2 = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_mlp2 = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_rw = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_rw = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_ridge = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_ridge = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_ridge_cv = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_ridge_cv = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_bridge = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_bridge = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_lasso = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_lasso = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_lasso_cv = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_lasso_cv = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_blasso = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_blasso = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_enet = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_enet = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_svr = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_svr = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_rf = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_rf = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_bart = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_bart = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_bagging = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_bagging = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_var = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_var = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_ma = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_ma = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_sarima = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_sarima = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_huber = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_huber = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_ts = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_ts = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_factor = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_factor = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_gradboost = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_gradboost = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_adaboost = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_adaboost = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_garch = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_garch = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_vecm = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_vecm = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_setar = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_setar = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_arfima = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_arfima = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_bayesreg = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_bayesreg = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)
df_y_fit_knn = pd.DataFrame(data = np.zeros([len(index_train), len(col_names)]).fill(np.nan), 
                             index = index_train, columns = col_names)
df_y_hat_knn = pd.DataFrame(data = np.zeros([len(index_test), len(col_names)]).fill(np.nan), 
                             index = index_test, columns = col_names)

# Matrices that save performance metrics
df_MSE = pd.DataFrame(data = np.zeros([len(models_names), len(col_names)]), index = models_names, columns = col_names)
df_MAE = pd.DataFrame(data = np.zeros([len(models_names), len(col_names)]), index = models_names, columns = col_names)
df_RMSE = pd.DataFrame(data = np.zeros([len(models_names), len(col_names)]), index = models_names, columns = col_names)
df_MAPE = pd.DataFrame(data = np.zeros([len(models_names), len(col_names)]), index = models_names, columns = col_names)
df_CS = pd.DataFrame(data = np.zeros([len(models_names), len(col_names)]), index = models_names, columns = col_names)

#################################
# LSTM with past inflation only #
#################################

# Number of features
n_features = 1

# Define the model

def lstm_m1_gen():
    lstm_m1 = Sequential()
    lstm_m1.add(LSTM(50, activation = act_fun, 
                     return_sequences=True, 
                     kernel_initializer = tf.keras.initializers.LecunNormal,
                     input_shape=(n_lags_lstm, n_features)))
    lstm_m1.add(LSTM(50, activation = act_fun,
                     kernel_initializer = tf.keras.initializers.LecunNormal))
    lstm_m1.add(Dense(1))
    lstm_m1.compile(optimizer='adam', loss='mse',
                    metrics = ['mse', 'mae', tf.keras.metrics.RootMeanSquaredError(), 
                               tf.keras.metrics.MeanAbsolutePercentageError(), 'cosine_similarity'])
    return lstm_m1

lstm_m1 = lstm_m1_gen()

# Fit model
hist_lstm_m1 = lstm_m1.fit(X_lstm_m1_train, y_lstm_m1_train, 
                           epochs=num_epochs,
                           # validation_data = (X_lstm_m1_test, y_lstm_m1_test),
                           validation_split = 0.1,
                           verbose=False)

# Fitted values
y_fit_lstm_m1 = lstm_m1.predict(X_lstm_m1_train)[:,0]
y_hat_lstm_m1 = lstm_m1.predict(X_lstm_m1_test)[:,0]
df_y_fit_lstm_m1['p0'][-len(y_lstm_m1_train):] = y_fit_lstm_m1
df_y_hat_lstm_m1['p0'][-len(y_lstm_m1_test):] = y_hat_lstm_m1

# Plots fitted values x observed values - training sample
y_fit_lstm_m1 = y_fit_lstm_m1.reshape(y_fit_lstm_m1.shape[0])
sns.lineplot(data=[y_lstm_m1_train, y_fit_lstm_m1])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.tight_layout()
plt.show()
plt.close()

# Plots fitted values x observed values - test sample
y_hat_lstm_m1 = y_hat_lstm_m1.reshape(y_hat_lstm_m1.shape[0])
sns.lineplot(data=[y_lstm_m1_test, y_hat_lstm_m1])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.tight_layout()
plt.show()
plt.close()

# Plots loss function
plt.plot(hist_lstm_m1.history['loss'])
plt.plot(hist_lstm_m1.history['val_loss'])
plt.title('LSTM - Loss (MSE)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
plt.close()

# Saves metrics
df_MSE.loc['LSTM_M1','p0'] = np.min(hist_lstm_m1.history['val_mse'])
df_MAE.loc['LSTM_M1','p0'] = np.min(hist_lstm_m1.history['val_mae'])
df_RMSE.loc['LSTM_M1','p0'] = np.min(hist_lstm_m1.history['val_root_mean_squared_error'])
df_MAPE.loc['LSTM_M1','p0'] = np.min(hist_lstm_m1.history['val_mean_absolute_percentage_error'])
df_CS.loc['LSTM_M1','p0'] = np.min(hist_lstm_m1.history['val_cosine_similarity'])

# Fits models for every period
k = 0
str_model = 'LSTM_M1'
for p in col_names:
    
    if k > 0: # starts at p1 because p0 has been already executed
        
        shift = periods[k]
        lstm_m1 = lstm_m1_gen()
        
        if p == '3m':
            hist_lstm_m1 = lstm_m1.fit(X_lstm_m1_train[:-shift,:,:], y_lstm_m1_3m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '6m':
            hist_lstm_m1 = lstm_m1.fit(X_lstm_m1_train[:-shift,:,:], y_lstm_m1_6m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '12m':
            hist_lstm_m1 = lstm_m1.fit(X_lstm_m1_train[:-shift,:,:], y_lstm_m1_12m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        else:
            hist_lstm_m1 = lstm_m1.fit(X_lstm_m1_train[:-shift,:,:], y_lstm_m1_train[shift:], 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        
        df_y_fit_lstm_m1[p][(-len(y_lstm_m1_train)+shift):] = lstm_m1.predict(X_lstm_m1_train[:-shift,:,:])[:,0]
        df_y_hat_lstm_m1[p][(-len(y_lstm_m1_test)+shift):] = lstm_m1.predict(X_lstm_m1_test[:-shift,:,:])[:,0]
        
        df_MSE.loc[str_model, p] = np.min(hist_lstm_m1.history['val_mse'])
        df_MAE.loc[str_model, p] = np.min(hist_lstm_m1.history['val_mae'])
        df_RMSE.loc[str_model, p] = np.min(hist_lstm_m1.history['val_root_mean_squared_error'])
        df_MAPE.loc[str_model, p] = np.min(hist_lstm_m1.history['val_mean_absolute_percentage_error'])
        df_CS.loc[str_model, p] = np.min(hist_lstm_m1.history['val_cosine_similarity'])
    
    k = k + 1

#################################
# LSTM with autoencoder         #
#################################

# Number of features
n_features = X_lstm_m2.shape[2]

# Define the model
def lstm_m2_gen():
    lstm_m2 = Sequential()
    lstm_m2.add(LSTM(50, activation = act_fun, 
                     return_sequences=True, 
                     kernel_initializer = tf.keras.initializers.LecunNormal,
                     input_shape=(n_lags_lstm, n_features)))
    lstm_m2.add(LSTM(50, activation = act_fun, 
                     kernel_initializer = tf.keras.initializers.LecunNormal))
    lstm_m2.add(Dense(1))
    lstm_m2.compile(optimizer='Nadam', loss='mse', 
                    metrics = ['mse', 'mae', tf.keras.metrics.RootMeanSquaredError(), 
                               tf.keras.metrics.MeanAbsolutePercentageError(), 'cosine_similarity'])
    return lstm_m2

lstm_m2 = lstm_m2_gen()

# Fit model
hist_lstm_m2 = lstm_m2.fit(X_lstm_m2_train, y_lstm_m2_train, 
                           epochs=num_epochs,
                           validation_split = 0.1,
                           verbose=False)

# Fitted values
y_fit_lstm_m2 = lstm_m2.predict(X_lstm_m2_train)[:,0]
y_hat_lstm_m2 = lstm_m2.predict(X_lstm_m2_test)[:,0]
df_y_fit_lstm_m2['p0'][-len(y_lstm_m2_train):] = y_fit_lstm_m2
df_y_hat_lstm_m2['p0'][-len(y_lstm_m2_test):] = y_hat_lstm_m2

# Plots fitted values x observed values - training sample
y_fit_lstm_m2 = y_fit_lstm_m2.reshape(y_fit_lstm_m2.shape[0])
sns.lineplot(data=[y_lstm_m2_train, y_fit_lstm_m2])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.tight_layout()
plt.show()
plt.close()

# Plots fitted values x observed values - test sample
y_hat_lstm_m2 = y_hat_lstm_m2.reshape(y_hat_lstm_m2.shape[0])
sns.lineplot(data=[y_lstm_m2_test, y_hat_lstm_m2])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.tight_layout()
plt.show()
plt.close()

# Plot our loss 
plt.plot(hist_lstm_m2.history['loss'])
plt.plot(hist_lstm_m2.history['val_loss'])
plt.title('Loss (MSE)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
plt.close()

# Saves performance metrics
df_MSE.loc['LSTM_M2','p0'] = np.min(hist_lstm_m2.history['val_mse'])
df_MAE.loc['LSTM_M2','p0'] = np.min(hist_lstm_m2.history['val_mae'])
df_RMSE.loc['LSTM_M2','p0'] = np.min(hist_lstm_m2.history['val_root_mean_squared_error'])
df_MAPE.loc['LSTM_M2','p0'] = np.min(hist_lstm_m2.history['val_mean_absolute_percentage_error'])
df_CS.loc['LSTM_M2','p0'] = np.min(hist_lstm_m2.history['val_cosine_similarity'])

# Fits models for every period
k = 0
str_model = 'LSTM_M2'
for p in col_names:
    
    if k > 0: # starts at p1 because p0 has been already executed
        
        shift = periods[k]
        lstm_m2 = lstm_m2_gen()
        
        # Split data into samples (input, output)
        X_lstm_m2, y_lstm_m2 = split_sequence_mult(data_lstm_m2, n_lags_lstm, per_ahead = shift + 1, cum = False)
            
        # Indexation
        len1 = len(y_lstm_m2)
        len2 = len(y.index)
        i = y.index[(len2-len1):len2]
            
        y_lstm_m2 = pd.DataFrame(data = y_lstm_m2, index = i)
        df_index_adj = pd.DataFrame(data = range(0,len(y_lstm_m2)), index = y_lstm_m2.index)
        y_lstm_m2_train = y_lstm_m2.loc[y_lstm_m2.index.intersection(index_train)]
        y_lstm_m2_test = y_lstm_m2.loc[y_lstm_m2.index.intersection(index_test)]
            
        # Converts into array
        X_lstm_m2 = np.array(X_lstm_m2)
        X_lstm_m2_train = X_lstm_m2[df_index_adj.loc[y_lstm_m2_train.index][0], :, :]
        X_lstm_m2_test = X_lstm_m2[df_index_adj.loc[y_lstm_m2_test.index][0], :, :]
            
        y_lstm_m2 = np.array(y_lstm_m2)[:,0]
        y_lstm_m2_train = np.array(y_lstm_m2_train)[:,0]
        y_lstm_m2_test = np.array(y_lstm_m2_test)[:,0]
        
        if p == '3m':
            
            hist_lstm_m2 = lstm_m2.fit(X_lstm_m2_train[:-shift,:,:], y_lstm_m2_3m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '6m':
            hist_lstm_m2 = lstm_m2.fit(X_lstm_m2_train[:-shift,:,:], y_lstm_m2_6m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '12m':
            hist_lstm_m2 = lstm_m2.fit(X_lstm_m2_train[:-shift,:,:], y_lstm_m2_12m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        else:
            
            hist_lstm_m2 = lstm_m2.fit(X_lstm_m2_train[:-shift,:,:], y_lstm_m2_train[shift:], 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        
        df_y_fit_lstm_m2[p][(-len(y_lstm_m2_train)+shift):] = lstm_m2.predict(X_lstm_m2_train[:-shift,:,:])[:,0]
        df_y_hat_lstm_m2[p][(-len(y_lstm_m2_test)+shift):] = lstm_m2.predict(X_lstm_m2_test[:-shift,:,:])[:,0]
        
        df_MSE.loc[str_model, p] = np.min(hist_lstm_m2.history['val_mse'])
        df_MAE.loc[str_model, p] = np.min(hist_lstm_m2.history['val_mae'])
        df_RMSE.loc[str_model, p] = np.min(hist_lstm_m2.history['val_root_mean_squared_error'])
        df_MAPE.loc[str_model, p] = np.min(hist_lstm_m2.history['val_mean_absolute_percentage_error'])
        df_CS.loc[str_model, p] = np.min(hist_lstm_m2.history['val_cosine_similarity'])
    
    k = k + 1

#################################
# LSTM w/o autoencoder          #
#################################

# Number of variables (features)
n_features = X_lstm_m3_train.shape[2]

# Define the model
def lstm_m3_gen():
    lstm_m3 = Sequential()
    lstm_m3.add(LSTM(50, activation=act_fun, 
                     return_sequences=True, 
                     kernel_initializer = tf.keras.initializers.LecunNormal,
                     input_shape=(n_lags_lstm, n_features)))
    lstm_m3.add(LSTM(50, activation=act_fun,
                     kernel_initializer = tf.keras.initializers.LecunNormal))
    lstm_m3.add(Dense(1))
    lstm_m3.compile(optimizer='Nadam', loss='mse',
                    metrics = ['mse', 'mae', tf.keras.metrics.RootMeanSquaredError(), 
                               tf.keras.metrics.MeanAbsolutePercentageError(), 'cosine_similarity'])
    return lstm_m3

lstm_m3 = lstm_m3_gen()

# Fit model
hist_lstm_m3 = lstm_m3.fit(X_lstm_m3_train, y_lstm_m3_train, 
                           epochs=num_epochs, 
                           validation_split = 0.1,
                           verbose=False)

# Fitted values
y_fit_lstm_m3 = lstm_m3.predict(X_lstm_m3_train)[:,0]
y_hat_lstm_m3 = lstm_m3.predict(X_lstm_m3_test)[:,0]
df_y_fit_lstm_m3['p0'][-len(y_lstm_m3_train):] = y_fit_lstm_m3
df_y_hat_lstm_m3['p0'][-len(y_lstm_m3_test):] = y_hat_lstm_m3

# Plots fitted values x observed values - training sample
y_fit_lstm_m3 = y_fit_lstm_m3.reshape(y_fit_lstm_m3.shape[0])
sns.lineplot(data=[y_lstm_m3_train, y_fit_lstm_m3])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.tight_layout()
plt.show()
plt.close()

# Plots fitted values x observed values - test sample
y_hat_lstm_m3 = y_hat_lstm_m3.reshape(y_hat_lstm_m3.shape[0])
sns.lineplot(data=[y_lstm_m3_test, y_hat_lstm_m3])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.tight_layout()
plt.show()
plt.close()

# Plot our loss 
plt.plot(hist_lstm_m3.history['loss'])
plt.plot(hist_lstm_m3.history['val_loss'])
plt.title('Loss (MSE)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
plt.close()

# Saves performance metrics
df_MSE.loc['LSTM_M3','p0'] = np.min(hist_lstm_m3.history['val_mse'])
df_MAE.loc['LSTM_M3','p0'] = np.min(hist_lstm_m3.history['val_mae'])
df_RMSE.loc['LSTM_M3','p0'] = np.min(hist_lstm_m3.history['val_root_mean_squared_error'])
df_MAPE.loc['LSTM_M3','p0'] = np.min(hist_lstm_m3.history['val_mean_absolute_percentage_error'])
df_CS.loc['LSTM_M3','p0'] = np.min(hist_lstm_m3.history['val_cosine_similarity'])

# Fits models for every period
k = 0
str_model = 'LSTM_M3'
for p in col_names:
    
    if k > 0: # starts at p1 because p0 has been already executed
        
        shift = periods[k]
        lstm_m3 = lstm_m3_gen()
        
        # Split data into samples (input, output)
        X_lstm_m3, y_lstm_m3 = split_sequence_mult(data_lstm_m3, n_lags_lstm, per_ahead = 1, cum = False)
            
        # Indexation
        len1 = len(y_lstm_m3)
        len2 = len(y.index)
        i = y.index[(len2-len1):len2]
            
        y_lstm_m3 = pd.DataFrame(data = y_lstm_m3, index = i)
        df_index_adj = pd.DataFrame(data = range(0,len(y_lstm_m3)), index = y_lstm_m3.index)
        y_lstm_m3_train = y_lstm_m3.loc[y_lstm_m3.index.intersection(index_train)]
        y_lstm_m3_test = y_lstm_m3.loc[y_lstm_m3.index.intersection(index_test)]
            
        # Converts into array
        X_lstm_m3 = np.array(X_lstm_m3)
        X_lstm_m3_train = X_lstm_m3[df_index_adj.loc[y_lstm_m3_train.index][0], :, :]
        X_lstm_m3_test = X_lstm_m3[df_index_adj.loc[y_lstm_m3_test.index][0], :, :]
        
        if p == '3m':
            hist_lstm_m3 = lstm_m3.fit(X_lstm_m3_train[:-shift,:,:], y_lstm_m3_3m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '6m':
            hist_lstm_m3 = lstm_m3.fit(X_lstm_m3_train[:-shift,:,:], y_lstm_m3_6m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '12m':
            hist_lstm_m3 = lstm_m3.fit(X_lstm_m3_train[:-shift,:,:], y_lstm_m3_12m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        else:

            hist_lstm_m3 = lstm_m3.fit(X_lstm_m3_train[:-shift,:,:], y_lstm_m3_train[shift:], 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        
        df_y_fit_lstm_m3[p][(-len(y_lstm_m3_train)+shift):] = lstm_m3.predict(X_lstm_m3_train[:-shift,:,:])[:,0]
        df_y_hat_lstm_m3[p][(-len(y_lstm_m3_test)+shift):] = lstm_m3.predict(X_lstm_m3_test[:-shift,:,:])[:,0]
        
        df_MSE.loc[str_model, p] = np.min(hist_lstm_m3.history['val_mse'])
        df_MAE.loc[str_model, p] = np.min(hist_lstm_m3.history['val_mae'])
        df_RMSE.loc[str_model, p] = np.min(hist_lstm_m3.history['val_root_mean_squared_error'])
        df_MAPE.loc[str_model, p] = np.min(hist_lstm_m3.history['val_mean_absolute_percentage_error'])
        df_CS.loc[str_model, p] = np.min(hist_lstm_m3.history['val_cosine_similarity'])
    
    k = k + 1

#################################
# ConvLSTM + Autoencoder        #
#################################

# Number of variables (features)
n_features = X_lstm_m4_train.shape[4]

# Define the model
def lstm_m4_gen():
    lstm_m4 = Sequential()
    lstm_m4.add(ConvLSTM2D(name='ConvLSTM_Layer1',
                       filters=8, 
                       kernel_size=(1,4), 
                       activation=act_fun,
                       padding='same',
                       kernel_initializer = tf.keras.initializers.LecunNormal,
                       input_shape=(n_seq_conv, 1, n_steps_conv, n_features),
                       dropout=0.0,
                       return_sequences = True))
    lstm_m4.add(BatchNormalization(name = 'Batch_Norm_1'))
    lstm_m4.add(ConvLSTM2D(name='ConvLSTM_Layer2',
                           filters=8, 
                           kernel_size=(1,4), 
                           activation=act_fun, 
                           padding='same',
                           kernel_initializer = tf.keras.initializers.LecunNormal,
                           dropout=0.0,
                           return_sequences = True))
    lstm_m4.add(BatchNormalization(name = 'Batch_Norm_2'))
    lstm_m4.add(Flatten(name = 'Flatten_1'))
    lstm_m4.add(RepeatVector(name = 'Repeat_Vector_1', n = 1))
    lstm_m4.add(LSTM(name = 'LSTM_Layer_1', units = 50, activation=act_fun, 
                     kernel_initializer = tf.keras.initializers.LecunNormal, 
                     return_sequences=True))
    lstm_m4.add(TimeDistributed(name = 'Time_Dist_1', 
                                layer = Dense(name = 'Dense_Layer_1', units = 32, 
                                              activation = act_fun, 
                                              kernel_initializer = tf.keras.initializers.LecunNormal)))
    lstm_m4.add(TimeDistributed(name = 'Time_Dist_2', 
                                layer = Dense(name = 'Dense_Layer_2', units = 16, 
                                              activation = act_fun, 
                                              kernel_initializer = tf.keras.initializers.LecunNormal)))
    lstm_m4.add(Dense(name = 'Output', units = 1))
    lstm_m4.compile(loss='mse', optimizer='Nadam',
                    metrics = ['mse', 'mae', tf.keras.metrics.RootMeanSquaredError(), 
                               tf.keras.metrics.MeanAbsolutePercentageError(), 'cosine_similarity'])

    return lstm_m4

lstm_m4 = lstm_m4_gen()

# Fit model
hist_lstm_m4 = lstm_m4.fit(X_lstm_m4_train, y_lstm_m4_train, 
                           epochs=num_epochs, 
                           validation_split = 0.1,
                           verbose=0)

# Fitted values
y_fit_lstm_m4 = lstm_m4.predict(X_lstm_m4_train)[:,0][:,0]
y_hat_lstm_m4 = lstm_m4.predict(X_lstm_m4_test)[:,0][:,0]
df_y_fit_lstm_m4['p0'][-len(y_lstm_m4_train):] = y_fit_lstm_m4
df_y_hat_lstm_m4['p0'][-len(y_lstm_m4_test):] = y_hat_lstm_m4

# Plots fitted values x observed values - training sample
y_fit_lstm_m4 = y_fit_lstm_m4.reshape(y_fit_lstm_m4.shape[0])
sns.lineplot(data=[y_lstm_m4_train, y_fit_lstm_m4])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.title('Conv + Autoencoder - Training Sample')
plt.tight_layout()
plt.show()
plt.close()

# Plots fitted values x observed values - test sample
y_hat_lstm_m4 = y_hat_lstm_m4.reshape(y_hat_lstm_m4.shape[0])
sns.lineplot(data=[y_lstm_m4_test, y_hat_lstm_m4])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.title('Conv + Autoencoder - Test Sample')
plt.tight_layout()
plt.show()
plt.close()

# Plot our loss 
plt.plot(hist_lstm_m4.history['loss'])
plt.plot(hist_lstm_m4.history['val_loss'])
plt.title('Conv + Autoencoder - Loss (MSE)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
plt.close()

# Saves performance metrics
df_MSE.loc['ConvLSTM_M1','p0'] = np.min(hist_lstm_m4.history['val_mse'])
df_MAE.loc['ConvLSTM_M1','p0'] = np.min(hist_lstm_m4.history['val_mae'])
df_RMSE.loc['ConvLSTM_M1','p0'] = np.min(hist_lstm_m4.history['val_root_mean_squared_error'])
df_MAPE.loc['ConvLSTM_M1','p0'] = np.min(hist_lstm_m4.history['val_mean_absolute_percentage_error'])
df_CS.loc['ConvLSTM_M1','p0'] = np.min(hist_lstm_m4.history['val_cosine_similarity'])

# Fits models for every period
k = 0
str_model = 'ConvLSTM_M1'
for p in col_names:
    
    if k > 0: # starts at p1 because p0 has been already executed
        
        shift = periods[k]
        lstm_m4 = lstm_m4_gen()
        
        # Split data into samples (input, output)
        X_lstm_m4, y_lstm_m4 = split_sequence_mult(data_lstm_m4, n_lags_lstm, per_ahead = shift + 1, cum = False)
            
        # Indexation
        len1 = len(y_lstm_m4)
        len2 = len(y.index)
        i = y.index[(len2-len1):len2]
            
        y_lstm_m4 = pd.DataFrame(data = y_lstm_m4, index = i)
        df_index_adj = pd.DataFrame(data = range(0,len(y_lstm_m4)), index = y_lstm_m4.index)
        y_lstm_m4_train = y_lstm_m4.loc[y_lstm_m4.index.intersection(index_train)]
        y_lstm_m4_test = y_lstm_m4.loc[y_lstm_m4.index.intersection(index_test)]
            
        # Reshapes and converts into array
        X_lstm_m4 = np.array(X_lstm_m4)
        X_lstm_m4 = X_lstm_m4.reshape(X_lstm_m4.shape[0], n_seq_conv, 1, n_steps_conv, X_lstm_m4.shape[2])
        X_lstm_m4_train = X_lstm_m4[df_index_adj.loc[y_lstm_m4_train.index][0], :, :, :, :]
        X_lstm_m4_test = X_lstm_m4[df_index_adj.loc[y_lstm_m4_test.index][0], :, :, :, :]
        
        y_lstm_m4 = np.array(y_lstm_m4)[:,0]
        y_lstm_m4_train = np.array(y_lstm_m4_train)[:,0]
        y_lstm_m4_test = np.array(y_lstm_m4_test)[:,0]
        
        if p == '3m':
            hist_lstm_m4 = lstm_m4.fit(X_lstm_m4_train[:-shift,:,:,:,:], y_lstm_m4_3m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '6m':
            hist_lstm_m4 = lstm_m4.fit(X_lstm_m4_train[:-shift,:,:,:,:], y_lstm_m4_6m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '12m':
            hist_lstm_m4 = lstm_m4.fit(X_lstm_m4_train[:-shift,:,:,:,:], y_lstm_m4_12m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        else:
            
            hist_lstm_m4 = lstm_m4.fit(X_lstm_m4_train, y_lstm_m4_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        
        df_y_fit_lstm_m4[p][(-len(y_lstm_m4_train)+shift):] = lstm_m4.predict(X_lstm_m4_train[:-shift,:,:,:,:])[:,0][:,0]
        df_y_hat_lstm_m4[p][(-len(y_lstm_m4_test)+shift):] = lstm_m4.predict(X_lstm_m4_test[:-shift,:,:,:,:])[:,0][:,0]
        
        df_MSE.loc[str_model, p] = np.min(hist_lstm_m4.history['val_mse'])
        df_MAE.loc[str_model, p] = np.min(hist_lstm_m4.history['val_mae'])
        df_RMSE.loc[str_model, p] = np.min(hist_lstm_m4.history['val_root_mean_squared_error'])
        df_MAPE.loc[str_model, p] = np.min(hist_lstm_m4.history['val_mean_absolute_percentage_error'])
        df_CS.loc[str_model, p] = np.min(hist_lstm_m4.history['val_cosine_similarity'])
    
    k = k + 1

#################################
# ConvLSTM + VAE                #
#################################

# Number of variables (features)
n_features = X_lstm_m5_train.shape[4]

def lstm_m5_gen():
    lstm_m5 = Sequential()
    lstm_m5.add(ConvLSTM2D(name='ConvLSTM_Layer1',
                           filters=16, 
                           kernel_size=(3,3), 
                           activation=act_fun,
                           padding='same',
                           kernel_initializer = tf.keras.initializers.LecunNormal,
                           input_shape=(n_seq_conv, 1, n_steps_conv, n_features),
                           dropout=0.2,
                           return_sequences = True))
    lstm_m5.add(BatchNormalization(name = 'Batch_Norm_1'))
    # lstm_m5.add(MaxPooling3D(name = 'Max_Pooling_3D_1', pool_size=(2,2,2), padding = 'same'))
    lstm_m5.add(ConvLSTM2D(name='ConvLSTM_Layer2',
                           filters=16, 
                           kernel_size=(3,3), 
                           activation=act_fun, 
                           padding='same',
                           kernel_initializer = tf.keras.initializers.LecunNormal,
                           dropout=0.2,
                           return_sequences = True))
    lstm_m5.add(BatchNormalization(name = 'Batch_Norm_2'))
    lstm_m5.add(MaxPooling3D(name = 'Max_Pooling_3D_1', pool_size=(2,2,2), padding = 'same'))
    lstm_m5.add(Flatten(name = 'Flatten_1'))
    lstm_m5.add(RepeatVector(name = 'Repeat_Vector_1', n = 1))
    lstm_m5.add(LSTM(name = 'LSTM_Layer_1', units = 100, activation=act_fun, 
                     kernel_initializer = tf.keras.initializers.LecunNormal,
                     dropout = 0.2,
                     return_sequences=True))
    lstm_m5.add(LSTM(name = 'LSTM_Layer_2', units = 100, activation=act_fun, 
                     kernel_initializer = tf.keras.initializers.LecunNormal, 
                     dropout = 0.2,
                     return_sequences=True))
    lstm_m5.add(TimeDistributed(name = 'Time_Dist_1', 
                                layer = Dense(name = 'Dense_Layer_1', units = 64, 
                                              activation = act_fun, 
                                              kernel_initializer = tf.keras.initializers.LecunNormal)))
    lstm_m5.add(TimeDistributed(name = 'Time_Dist_2', 
                                layer = Dense(name = 'Dense_Layer_2', units = 32, 
                                              activation = act_fun, 
                                              kernel_initializer = tf.keras.initializers.LecunNormal)))
    lstm_m5.add(Dense(name = 'Output', units = 1))
    lstm_m5.compile(loss='mse', optimizer='Nadam',
                    metrics = ['mse', 'mae', tf.keras.metrics.RootMeanSquaredError(), 
                               tf.keras.metrics.MeanAbsolutePercentageError(), 'cosine_similarity'])
    return lstm_m5

lstm_m5 = lstm_m5_gen()

hist_lstm_m5 = lstm_m5.fit(X_lstm_m5, y_lstm_m5, 
                           epochs=num_epochs,
                           validation_split = 0.1,
                           shuffle=True,
                           verbose=False)

# Fitted values
y_fit_lstm_m5 = lstm_m5.predict(X_lstm_m5_train)[:,0][:,0]
y_hat_lstm_m5 = lstm_m5.predict(X_lstm_m5_test)[:,0][:,0]
df_y_fit_lstm_m5['p0'][-len(y_lstm_m5_train):] = y_fit_lstm_m5
df_y_hat_lstm_m5['p0'][-len(y_lstm_m5_test):] = y_hat_lstm_m5

plt.rc('text', usetex = True)
plt.rc('font', family = 'serif')
str_Dir_Plots = 'C:/Users/alext/Desktop/Dissertação/Alexandre/v4/'
y_full = y.iloc[12:]
y_full.index = pd.to_datetime(y_full.index)
y_full_lstm_m5 = pd.DataFrame(lstm_m5.predict(X_lstm_m5)[:,0][:,0])
y_full_lstm_m5.index = pd.to_datetime(y_full.index)
data_y = pd.concat([y_full, y_full_lstm_m5], axis = 1)
sns.lineplot(data = data_y)
plt.close()
y_sq_err = (y_full.iloc[:,0] - y_full_lstm_m5.iloc[:,0])**2
MSE_roll = y_sq_err.rolling(12).mean()
ax = sns.lineplot(data=MSE_roll)
ax.set_xlabel("Date")
ax.set_ylabel("MSE")
plt.savefig(fname = str_Dir_Plots + 'ConvLSTM_MSE_Roll' + '.pdf')
plt.close()
vol_roll = y_full.rolling(12).std()
ax = sns.lineplot(data=vol_roll)
ax.set_xlabel("Date")
ax.set_ylabel("Volatility")
ax.get_legend().remove()
plt.savefig(fname = str_Dir_Plots + 'CPI_Vol_Roll' + '.pdf')

# Plots fitted values x observed values - training sample
y_fit_lstm_m5 = y_fit_lstm_m5.reshape(y_fit_lstm_m5.shape[0])
sns.lineplot(data=[y_lstm_m5_train, y_fit_lstm_m5])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.tight_layout()
plt.show()
plt.close()

# Plots fitted values x observed values - test sample
y_hat_lstm_m5 = y_hat_lstm_m5.reshape(y_hat_lstm_m5.shape[0])
sns.lineplot(data=[y_lstm_m5_test, y_hat_lstm_m5])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.tight_layout()
plt.show()
plt.close()

# Plot our loss 
plt.plot(hist_lstm_m5.history['loss'])
plt.plot(hist_lstm_m5.history['val_loss'])
plt.title('ConvLSTM + VAE - Loss (MSE)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
plt.close()

# Saves performance metrics
df_MSE.loc['ConvLSTM_M2','p0'] = np.min(hist_lstm_m5.history['val_mse'])
df_MAE.loc['ConvLSTM_M2','p0'] = np.min(hist_lstm_m5.history['val_mae'])
df_RMSE.loc['ConvLSTM_M2','p0'] = np.min(hist_lstm_m5.history['val_root_mean_squared_error'])
df_MAPE.loc['ConvLSTM_M2','p0'] = np.min(hist_lstm_m5.history['val_mean_absolute_percentage_error'])
df_CS.loc['ConvLSTM_M2','p0'] = np.min(hist_lstm_m5.history['val_cosine_similarity'])

# Fits models for every period
k = 0
str_model = 'ConvLSTM_M2'
for p in col_names:
    
    if k > 0: # starts at p1 because p0 has been already executed
        
        shift = periods[k]
        lstm_m5 = lstm_m5_gen()
        
        # Split data into samples (input, output)
        X_lstm_m5, y_lstm_m5 = split_sequence_mult(data_lstm_m5, n_lags_lstm, per_ahead = shift + 1, cum = False)
            
        # Indexation
        len1 = len(y_lstm_m5)
        len2 = len(y.index)
        i = y.index[(len2-len1):len2]
            
        y_lstm_m5 = pd.DataFrame(data = y_lstm_m5, index = i)
        df_index_adj = pd.DataFrame(data = range(0,len(y_lstm_m5)), index = y_lstm_m5.index)
        y_lstm_m5_train = y_lstm_m5.loc[y_lstm_m5.index.intersection(index_train)]
        y_lstm_m5_test = y_lstm_m5.loc[y_lstm_m5.index.intersection(index_test)]
            
        # Reshapes and converts into array
        X_lstm_m5 = np.array(X_lstm_m5)
        X_lstm_m5 = X_lstm_m5.reshape(X_lstm_m5.shape[0], n_seq_conv, 1, n_steps_conv, X_lstm_m5.shape[2])
        X_lstm_m5_train = X_lstm_m5[df_index_adj.loc[y_lstm_m5_train.index][0], :, :, :, :]
        X_lstm_m5_test = X_lstm_m5[df_index_adj.loc[y_lstm_m5_test.index][0], :, :, :, :]
            
        y_lstm_m5 = np.array(y_lstm_m5)[:,0]
        y_lstm_m5_train = np.array(y_lstm_m5_train)[:,0]
        y_lstm_m5_test = np.array(y_lstm_m5_test)[:,0]
        
        if p == '3m':
            hist_lstm_m5 = lstm_m5.fit(X_lstm_m5_train[:-shift,:,:,:,:], y_lstm_m5_3m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '6m':
            hist_lstm_m5 = lstm_m5.fit(X_lstm_m5_train[:-shift,:,:,:,:], y_lstm_m5_6m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '12m':
            hist_lstm_m5 = lstm_m5.fit(X_lstm_m5_train[:-shift,:,:,:,:], y_lstm_m5_12m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        else:
            
            hist_lstm_m5 = lstm_m5.fit(X_lstm_m5, y_lstm_m5, 
                           epochs=num_epochs,
                           validation_split = 0.1,
                           verbose=False)
        
        df_y_fit_lstm_m5[p][(-len(y_lstm_m5_train)+shift):] = lstm_m5.predict(X_lstm_m5_train[:-shift,:,:,:,:])[:,0][:,0]
        df_y_hat_lstm_m5[p][(-len(y_lstm_m5_test)+shift):] = lstm_m5.predict(X_lstm_m5_test[:-shift,:,:,:,:])[:,0][:,0]
        
        df_MSE.loc[str_model, p] = np.min(hist_lstm_m5.history['val_mse'])
        df_MAE.loc[str_model, p] = np.min(hist_lstm_m5.history['val_mae'])
        df_RMSE.loc[str_model, p] = np.min(hist_lstm_m5.history['val_root_mean_squared_error'])
        df_MAPE.loc[str_model, p] = np.min(hist_lstm_m5.history['val_mean_absolute_percentage_error'])
        df_CS.loc[str_model, p] = np.min(hist_lstm_m5.history['val_cosine_similarity'])
    
    k = k + 1

#################################
# ConvLSTM w/ past inflation    #
#################################

# Number of variables (features)
n_features = X_lstm_m6_train.shape[4]

def lstm_m6_gen():
    lstm_m6 = Sequential()
    lstm_m6.add(ConvLSTM2D(name='ConvLSTM_Layer1',
                           filters=8, 
                           kernel_size=(1,4), 
                           activation=act_fun,
                           padding='same',
                           kernel_initializer = tf.keras.initializers.LecunNormal,
                           input_shape=(n_seq_conv, 1, n_steps_conv, n_features),
                           dropout=0.1,
                           return_sequences = True))
    lstm_m6.add(BatchNormalization(name = 'Batch_Norm_1'))
    lstm_m6.add(ConvLSTM2D(name='ConvLSTM_Layer2',
                           filters=8, 
                           kernel_size=(1,4), 
                           activation=act_fun, 
                           padding='same',
                           kernel_initializer = tf.keras.initializers.LecunNormal,
                           dropout=0.1,
                           return_sequences = True))
    lstm_m6.add(BatchNormalization(name = 'Batch_Norm_2'))
    lstm_m6.add(Flatten(name = 'Flatten_1'))
    lstm_m6.add(RepeatVector(name = 'Repeat_Vector_1', n = 1))
    lstm_m6.add(LSTM(name = 'LSTM_Layer_1', units = 50, activation=act_fun, 
                     kernel_initializer = tf.keras.initializers.LecunNormal, 
                     return_sequences=True))
    lstm_m6.add(TimeDistributed(name = 'Time_Dist_1', 
                                layer = Dense(name = 'Dense_Layer_1', units = 32, 
                                              activation = act_fun, 
                                              kernel_initializer = tf.keras.initializers.LecunNormal)))
    lstm_m6.add(TimeDistributed(name = 'Time_Dist_2', 
                                layer = Dense(name = 'Dense_Layer_2', units = 16, 
                                              activation = act_fun, 
                                              kernel_initializer = tf.keras.initializers.LecunNormal)))
    lstm_m6.add(Dense(name = 'Output', units = 1))
    lstm_m6.compile(loss='mse', optimizer='Nadam',
                    metrics = ['mse', 'mae', tf.keras.metrics.RootMeanSquaredError(), 
                               tf.keras.metrics.MeanAbsolutePercentageError(), 'cosine_similarity'])
    return lstm_m6

lstm_m6 = lstm_m6_gen()

hist_lstm_m6 = lstm_m6.fit(X_lstm_m6, y_lstm_m6, 
                           epochs=num_epochs,
                           validation_split = 0.1,
                           verbose=False)

# Fitted values
y_fit_lstm_m6 = lstm_m6.predict(X_lstm_m6_train)[:,0][:,0]
y_hat_lstm_m6 = lstm_m6.predict(X_lstm_m6_test)[:,0][:,0]
df_y_fit_lstm_m6['p0'][-len(y_lstm_m6_train):] = y_fit_lstm_m6
df_y_hat_lstm_m6['p0'][-len(y_lstm_m6_test):] = y_hat_lstm_m6

# Plots fitted values x observed values - training sample
y_fit_lstm_m6 = y_fit_lstm_m6.reshape(y_fit_lstm_m6.shape[0])
sns.lineplot(data=[y_lstm_m6_train, y_fit_lstm_m6])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.tight_layout()
plt.show()
plt.close()

# Plots fitted values x observed values - test sample
y_hat_lstm_m6 = y_hat_lstm_m6.reshape(y_hat_lstm_m6.shape[0])
sns.lineplot(data=[y_lstm_m6_test, y_hat_lstm_m6])
plt.legend(['Observed', 'Fitted'], loc='lower right')
plt.tight_layout()
plt.show()
plt.close()

# Plot our loss 
plt.plot(hist_lstm_m6.history['loss'])
plt.plot(hist_lstm_m6.history['val_loss'])
plt.title('ConvLSTM + Past Inflation - Loss (MSE)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
plt.close()

# Saves performance metrics
df_MSE.loc['ConvLSTM_M3','p0'] = np.min(hist_lstm_m6.history['val_mse'])
df_MAE.loc['ConvLSTM_M3','p0'] = np.min(hist_lstm_m6.history['val_mae'])
df_RMSE.loc['ConvLSTM_M3','p0'] = np.min(hist_lstm_m6.history['val_root_mean_squared_error'])
df_MAPE.loc['ConvLSTM_M3','p0'] = np.min(hist_lstm_m6.history['val_mean_absolute_percentage_error'])
df_CS.loc['ConvLSTM_M3','p0'] = np.min(hist_lstm_m6.history['val_cosine_similarity'])

# Fits models for every period
k = 0
str_model = 'ConvLSTM_M3'
for p in col_names:
    
    if k > 0: # starts at p1 because p0 has been already executed
        
        shift = periods[k]
        lstm_m6 = lstm_m6_gen()
        
        if p == '3m':
            hist_lstm_m6 = lstm_m6.fit(X_lstm_m6_train[:-shift,:,:,:,:], y_lstm_m6_3m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '6m':
            hist_lstm_m6 = lstm_m6.fit(X_lstm_m6_train[:-shift,:,:,:,:], y_lstm_m6_6m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        elif p == '12m':
            hist_lstm_m6 = lstm_m6.fit(X_lstm_m6_train[:-shift,:,:,:,:], y_lstm_m6_12m_train, 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        else:
            hist_lstm_m6 = lstm_m6.fit(X_lstm_m6_train[:-shift,:,:,:,:], y_lstm_m6_train[shift:], 
                                       epochs=num_epochs,
                                       validation_split = 0.1,
                                       verbose=False)
        
        df_y_fit_lstm_m6[p][(-len(y_lstm_m6_train)+shift):] = lstm_m6.predict(X_lstm_m6_train[:-shift,:,:,:,:])[:,0][:,0]
        df_y_hat_lstm_m6[p][(-len(y_lstm_m6_test)+shift):] = lstm_m6.predict(X_lstm_m6_test[:-shift,:,:,:,:])[:,0][:,0]
        
        df_MSE.loc[str_model, p] = np.min(hist_lstm_m6.history['val_mse'])
        df_MAE.loc[str_model, p] = np.min(hist_lstm_m6.history['val_mae'])
        df_RMSE.loc[str_model, p] = np.min(hist_lstm_m6.history['val_root_mean_squared_error'])
        df_MAPE.loc[str_model, p] = np.min(hist_lstm_m6.history['val_mean_absolute_percentage_error'])
        df_CS.loc[str_model, p] = np.min(hist_lstm_m6.history['val_cosine_similarity'])
    
    k = k + 1

In [None]:
#################################
# MLP                           #
#################################

# Model
str_model = 'MLP'

# Data
X_mlp_train = pd.concat([X_OI_L1_train_pca, X_LM_L1_train_pca, X_H_L1_train_pca, X_COI_L1_train_pca, 
                                X_MC_L1_train_pca, X_INTFX_L1_train_pca, X_P_L1_train_pca, X_S_L1_train_pca, 
                                X_OI_L2_train_pca, X_LM_L2_train_pca, X_H_L2_train_pca, X_COI_L2_train_pca, 
                                X_MC_L2_train_pca, X_INTFX_L2_train_pca, X_P_L2_train_pca, X_S_L2_train_pca, 
                                X_OI_L3_train_pca, X_LM_L3_train_pca, X_H_L3_train_pca, X_COI_L3_train_pca, 
                                X_MC_L3_train_pca, X_INTFX_L3_train_pca, X_P_L3_train_pca, X_S_L3_train_pca, 
                                X_OI_L4_train_pca, X_LM_L4_train_pca, X_H_L4_train_pca, X_COI_L4_train_pca, 
                                X_MC_L4_train_pca, X_INTFX_L4_train_pca, X_P_L4_train_pca, X_S_L4_train_pca, 
                                X_OI_L12_train_pca, X_LM_L12_train_pca, X_H_L12_train_pca, X_COI_L12_train_pca, 
                                X_MC_L12_train_pca, X_INTFX_L12_train_pca, X_P_L12_train_pca, 
                                X_S_L12_train_pca], axis = 1).dropna()
X_mlp_train = np.array(X_mlp_train)
y_mlp_train = y_train
y_mlp_train_3m = scale(np.array(y_mlp_train.rolling(3).mean().dropna())[:,0])
y_mlp_train_6m = scale(np.array(y_mlp_train.rolling(6).mean().dropna())[:,0])
y_mlp_train_12m = scale(np.array(y_mlp_train.rolling(12).mean().dropna())[:,0])
y_mlp_train = np.array(y_train)[:,0]

X_mlp_test = pd.concat([X_OI_L1_test_pca, X_LM_L1_test_pca, X_H_L1_test_pca, X_COI_L1_test_pca, 
                                X_MC_L1_test_pca, X_INTFX_L1_test_pca, X_P_L1_test_pca, X_S_L1_test_pca, 
                                X_OI_L2_test_pca, X_LM_L2_test_pca, X_H_L2_test_pca, X_COI_L2_test_pca, 
                                X_MC_L2_test_pca, X_INTFX_L2_test_pca, X_P_L2_test_pca, X_S_L2_test_pca, 
                                X_OI_L3_test_pca, X_LM_L3_test_pca, X_H_L3_test_pca, X_COI_L3_test_pca, 
                                X_MC_L3_test_pca, X_INTFX_L3_test_pca, X_P_L3_test_pca, X_S_L3_test_pca, 
                                X_OI_L4_test_pca, X_LM_L4_test_pca, X_H_L4_test_pca, X_COI_L4_test_pca, 
                                X_MC_L4_test_pca, X_INTFX_L4_test_pca, X_P_L4_test_pca, X_S_L4_test_pca, 
                                X_OI_L12_test_pca, X_LM_L12_test_pca, X_H_L12_test_pca, X_COI_L12_test_pca, 
                                X_MC_L12_test_pca, X_INTFX_L12_test_pca, X_P_L12_test_pca, 
                                X_S_L12_test_pca], axis = 1).dropna()
X_mlp_test = np.array(X_mlp_test)
y_mlp_test = y_test
y_mlp_test_3m = scale(np.array(y_mlp_test.rolling(3).mean().dropna())[:,0])
y_mlp_test_6m = scale(np.array(y_mlp_test.rolling(6).mean().dropna())[:,0])
y_mlp_test_12m = scale(np.array(y_mlp_test.rolling(12).mean().dropna())[:,0])
y_mlp_test = np.array(y_test)[:,0]

k = 0
for p in col_names:
    
    shift = periods[k]

    # MLP Model
    mlp_model = MLPRegressor(hidden_layer_sizes = (200, 200, 200, 200, 200, 200), activation = 'relu', solver = 'adam', 
                             batch_size = 'auto', validation_fraction = share_validation_size,
                             max_iter = 1000, random_state = rnd_state) 
    
    if p == '3m':
        y_train_local = y_mlp_train_3m
        y_test_local = y_mlp_test_3m
        X_train_local = X_mlp_train[:-shift,:]
        X_test_local = X_mlp_test[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_mlp_train_6m
        y_test_local = y_mlp_test_6m
        X_train_local = X_mlp_train[:-shift,:]
        X_test_local = X_mlp_test[:-shift,:]

    elif p == '12m':
        y_train_local = y_mlp_train_12m
        y_test_local = y_mlp_test_12m
        X_train_local = X_mlp_train[:-shift,:]
        X_test_local = X_mlp_test[:-shift,:]
    else:
        if shift == 0:
            X_train_local = X_mlp_train
            X_test_local = X_mlp_test
            y_train_local = y_mlp_train
            y_test_local = y_mlp_test
        else:
            X_train_local = X_mlp_train[:-shift,:]
            X_test_local = X_mlp_test[:-shift,:]
            y_train_local = y_mlp_train[shift:]
            y_test_local = y_mlp_test[shift:]
    
    mlp_model_fit = mlp_model.fit(X_train_local, y_train_local)
    y_fit_local = mlp_model.predict(X_train_local)
    y_hat_local = mlp_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('MLP - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('MLP - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_mlp[p][(-len(y_mlp_train)+shift):] = y_fit_local
    df_y_hat_mlp[p][(-len(y_mlp_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1
    
# Model
str_model = 'MLP2'

# Data
X_mlp_train = pd.concat([X_OI_L1_train_pca, X_LM_L1_train_pca, X_H_L1_train_pca, X_COI_L1_train_pca, 
                                X_MC_L1_train_pca, X_INTFX_L1_train_pca, X_P_L1_train_pca, X_S_L1_train_pca, 
                                X_OI_L2_train_pca, X_LM_L2_train_pca, X_H_L2_train_pca, X_COI_L2_train_pca, 
                                X_MC_L2_train_pca, X_INTFX_L2_train_pca, X_P_L2_train_pca, X_S_L2_train_pca, 
                                X_OI_L3_train_pca, X_LM_L3_train_pca, X_H_L3_train_pca, X_COI_L3_train_pca, 
                                X_MC_L3_train_pca, X_INTFX_L3_train_pca, X_P_L3_train_pca, X_S_L3_train_pca, 
                                X_OI_L4_train_pca, X_LM_L4_train_pca, X_H_L4_train_pca, X_COI_L4_train_pca, 
                                X_MC_L4_train_pca, X_INTFX_L4_train_pca, X_P_L4_train_pca, X_S_L4_train_pca, 
                                X_OI_L12_train_pca, X_LM_L12_train_pca, X_H_L12_train_pca, X_COI_L12_train_pca, 
                                X_MC_L12_train_pca, X_INTFX_L12_train_pca, X_P_L12_train_pca, 
                                X_S_L12_train_pca], axis = 1).dropna()
X_mlp_train = np.array(X_mlp_train)
y_mlp_train = y_train
y_mlp_train_3m = scale(np.array(y_mlp_train.rolling(3).mean().dropna())[:,0])
y_mlp_train_6m = scale(np.array(y_mlp_train.rolling(6).mean().dropna())[:,0])
y_mlp_train_12m = scale(np.array(y_mlp_train.rolling(12).mean().dropna())[:,0])
y_mlp_train = np.array(y_train)[:,0]

X_mlp_test = pd.concat([X_OI_L1_test_pca, X_LM_L1_test_pca, X_H_L1_test_pca, X_COI_L1_test_pca, 
                                X_MC_L1_test_pca, X_INTFX_L1_test_pca, X_P_L1_test_pca, X_S_L1_test_pca, 
                                X_OI_L2_test_pca, X_LM_L2_test_pca, X_H_L2_test_pca, X_COI_L2_test_pca, 
                                X_MC_L2_test_pca, X_INTFX_L2_test_pca, X_P_L2_test_pca, X_S_L2_test_pca, 
                                X_OI_L3_test_pca, X_LM_L3_test_pca, X_H_L3_test_pca, X_COI_L3_test_pca, 
                                X_MC_L3_test_pca, X_INTFX_L3_test_pca, X_P_L3_test_pca, X_S_L3_test_pca, 
                                X_OI_L4_test_pca, X_LM_L4_test_pca, X_H_L4_test_pca, X_COI_L4_test_pca, 
                                X_MC_L4_test_pca, X_INTFX_L4_test_pca, X_P_L4_test_pca, X_S_L4_test_pca, 
                                X_OI_L12_test_pca, X_LM_L12_test_pca, X_H_L12_test_pca, X_COI_L12_test_pca, 
                                X_MC_L12_test_pca, X_INTFX_L12_test_pca, X_P_L12_test_pca, 
                                X_S_L12_test_pca], axis = 1).dropna()
X_mlp_test = np.array(X_mlp_test)
y_mlp_test = y_test
y_mlp_test_3m = scale(np.array(y_mlp_test.rolling(3).mean().dropna())[:,0])
y_mlp_test_6m = scale(np.array(y_mlp_test.rolling(6).mean().dropna())[:,0])
y_mlp_test_12m = scale(np.array(y_mlp_test.rolling(12).mean().dropna())[:,0])
y_mlp_test = np.array(y_test)[:,0]

k = 0
for p in col_names:
    
    shift = periods[k]

    # MLP Model
    mlp_model = MLPRegressor(hidden_layer_sizes = (200), activation = 'relu', solver = 'adam', 
                             batch_size = 'auto', validation_fraction = share_validation_size,
                             max_iter = 1000, random_state = rnd_state) 
    
    if p == '3m':
        y_train_local = y_mlp_train_3m
        y_test_local = y_mlp_test_3m
        X_train_local = X_mlp_train[:-shift,:]
        X_test_local = X_mlp_test[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_mlp_train_6m
        y_test_local = y_mlp_test_6m
        X_train_local = X_mlp_train[:-shift,:]
        X_test_local = X_mlp_test[:-shift,:]

    elif p == '12m':
        y_train_local = y_mlp_train_12m
        y_test_local = y_mlp_test_12m
        X_train_local = X_mlp_train[:-shift,:]
        X_test_local = X_mlp_test[:-shift,:]
    else:
        if shift == 0:
            X_train_local = X_mlp_train
            X_test_local = X_mlp_test
            y_train_local = y_mlp_train
            y_test_local = y_mlp_test
        else:
            X_train_local = X_mlp_train[:-shift,:]
            X_test_local = X_mlp_test[:-shift,:]
            y_train_local = y_mlp_train[shift:]
            y_test_local = y_mlp_test[shift:]
    
    ridge_model_fit = mlp_model.fit(X_train_local, y_train_local)
    y_fit_local = mlp_model.predict(X_train_local)
    y_hat_local = mlp_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('MLP - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('MLP - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_mlp2[p][(-len(y_mlp_train)+shift):] = y_fit_local
    df_y_hat_mlp2[p][(-len(y_mlp_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# Random Walk                   #
#################################

# Model
str_model = 'RW'

X_rw_train = y_train.shift(1).dropna()
y_rw_train = y_train.loc[X_rw_train.index]
y_rw_train_3m = y_rw_train.rolling(3).mean().dropna()
y_rw_train_6m = y_rw_train.rolling(6).mean().dropna()
y_rw_train_12m = y_rw_train.rolling(12).mean().dropna()
X_rw_train = np.array(X_rw_train)[:,0]
y_rw_train = np.array(y_rw_train)[:,0]
y_rw_train_3m = scale(np.array(y_rw_train_3m)[:,0])
y_rw_train_6m = scale(np.array(y_rw_train_6m)[:,0])
y_rw_train_12m = scale(np.array(y_rw_train_12m)[:,0])

X_rw_test = y_test.shift(1).dropna()
y_rw_test = y_test.loc[X_rw_test.index]
y_rw_test_3m = y_rw_test.rolling(3).mean().dropna()
y_rw_test_6m = y_rw_test.rolling(6).mean().dropna()
y_rw_test_12m = y_rw_test.rolling(12).mean().dropna()
X_rw_test = np.array(X_rw_test)[:,0]
y_rw_test = np.array(y_rw_test)[:,0]
y_rw_test_3m = scale(np.array(y_rw_test_3m)[:,0])
y_rw_test_6m = scale(np.array(y_rw_test_6m)[:,0])
y_rw_test_12m = scale(np.array(y_rw_test_12m)[:,0])

k = 0
for p in col_names:
    
    shift = periods[k]
    
    if shift == 0:
        y_fit_rw = X_rw_train
        y_hat_rw = X_rw_test
    else:
        y_fit_rw = X_rw_train[:-shift]
        y_hat_rw = X_rw_test[:-shift]
    
    if p == '3m':
            
        df_y_fit_rw[p][(-len(y_rw_train)+shift):] = y_fit_rw
        df_y_hat_rw[p][(-len(y_rw_test)+shift):] = y_hat_rw
        
        sns.lineplot(data = [y_rw_train_3m, y_fit_rw])
        plt.legend(['Observed', 'Fitted'], loc='lower right')
        plt.tight_layout()
        plt.show()
        plt.close()
        
        sns.lineplot(data = [y_rw_test_3m, y_hat_rw])
        plt.legend(['Observed', 'Fitted'], loc='lower right')
        plt.tight_layout()
        plt.show()
        plt.close()
        
        df_MSE.loc[str_model, p] = MSE(y_rw_test_3m, y_hat_rw)
        df_MAE.loc[str_model, p] = MAE(y_rw_test_3m, y_hat_rw)
        df_RMSE.loc[str_model, p] = RMSE(y_rw_test_3m, y_hat_rw)
        df_MAPE.loc[str_model, p] = MAPE(y_rw_test_3m, y_hat_rw)
        df_CS.loc[str_model, p] = cos_sim(y_rw_test_3m, y_hat_rw)

    elif p == '6m':
            
        df_y_fit_rw[p][(-len(y_rw_train)+shift):] = y_fit_rw
        df_y_hat_rw[p][(-len(y_rw_test)+shift):] = y_hat_rw
        
        sns.lineplot(data = [y_rw_train_6m, y_fit_rw])
        plt.legend(['Observed', 'Fitted'], loc='lower right')
        plt.tight_layout()
        plt.show()
        plt.close()
        
        sns.lineplot(data = [y_rw_test_6m, y_hat_rw])
        plt.legend(['Observed', 'Fitted'], loc='lower right')
        plt.tight_layout()
        plt.show()
        plt.close()
        
        df_MSE.loc[str_model, p] = MSE(y_rw_test_6m, y_hat_rw)
        df_MAE.loc[str_model, p] = MAE(y_rw_test_6m, y_hat_rw)
        df_RMSE.loc[str_model, p] = RMSE(y_rw_test_6m, y_hat_rw)
        df_MAPE.loc[str_model, p] = MAPE(y_rw_test_6m, y_hat_rw)
        df_CS.loc[str_model, p] = cos_sim(y_rw_test_6m, y_hat_rw)

    elif p == '12m':
            
        df_y_fit_rw[p][(-len(y_rw_train)+shift):] = y_fit_rw
        df_y_hat_rw[p][(-len(y_rw_test)+shift):] = y_hat_rw
        
        sns.lineplot(data = [y_rw_train_12m, y_fit_rw])
        plt.legend(['Observed', 'Fitted'], loc='lower right')
        plt.tight_layout()
        plt.show()
        plt.close()
        
        sns.lineplot(data = [y_rw_test_12m, y_hat_rw])
        plt.legend(['Observed', 'Fitted'], loc='lower right')
        plt.tight_layout()
        plt.show()
        plt.close()
        
        df_MSE.loc[str_model, p] = MSE(y_rw_test_12m, y_hat_rw)
        df_MAE.loc[str_model, p] = MAE(y_rw_test_12m, y_hat_rw)
        df_RMSE.loc[str_model, p] = RMSE(y_rw_test_12m, y_hat_rw)
        df_MAPE.loc[str_model, p] = MAPE(y_rw_test_12m, y_hat_rw)
        df_CS.loc[str_model, p] = cos_sim(y_rw_test_12m, y_hat_rw)
        
    else:
        
        df_y_fit_rw[p][(-len(y_rw_train)+shift):] = y_fit_rw
        df_y_hat_rw[p][(-len(y_rw_test)+shift):] = y_hat_rw
    
        sns.lineplot(data = [y_rw_train[shift:], y_fit_rw])
        plt.legend(['Observed', 'Fitted'], loc='lower right')
        plt.tight_layout()
        plt.show()
        plt.close()
        
        sns.lineplot(data = [y_rw_test[shift:], y_hat_rw])
        plt.legend(['Observed', 'Fitted'], loc='lower right')
        plt.tight_layout()
        plt.show()
        plt.close()
    
        df_MSE.loc[str_model, p] = MSE(y_rw_test[shift:], y_hat_rw)
        df_MAE.loc[str_model, p] = MAE(y_rw_test[shift:], y_hat_rw)
        df_RMSE.loc[str_model, p] = RMSE(y_rw_test[shift:], y_hat_rw)
        df_MAPE.loc[str_model, p] = MAPE(y_rw_test[shift:], y_hat_rw)
        df_CS.loc[str_model, p] = cos_sim(y_rw_test[shift:], y_hat_rw)
        
    k = k + 1

#################################
# Ridge Regression              #
#################################

# Model
str_model = 'Ridge'

X_ridge_train = pd.concat([X_L1_train, X_L2_train, X_L3_train, X_L4_train, X_L12_train], axis = 1)
X_ridge_train = X_ridge_train.dropna()
y_ridge_train = y_train.iloc[:,0]
y_ridge_train_3m = scale(y_ridge_train.rolling(3).mean().dropna())
y_ridge_train_6m = scale(y_ridge_train.rolling(6).mean().dropna())
y_ridge_train_12m = scale(y_ridge_train.rolling(12).mean().dropna())
X_ridge_test = pd.concat([X_L1_test, X_L2_test, X_L3_test, X_L4_test, X_L12_test], axis = 1)
X_ridge_test = X_ridge_test.dropna()
y_ridge_test = y_test.iloc[:,0]
y_ridge_test_3m = scale(y_ridge_test.rolling(3).mean().dropna())
y_ridge_test_6m = scale(y_ridge_test.rolling(6).mean().dropna())
y_ridge_test_12m = scale(y_ridge_test.rolling(12).mean().dropna())

k = 0
for p in col_names:
    
    shift = periods[k]

    ridge_model = Ridge(alpha = 1.0)    
    
    if p == '3m':
        y_train_local = y_ridge_train_3m
        y_test_local = y_ridge_test_3m
        X_train_local = X_ridge_train.iloc[:-shift,:]
        X_test_local = X_ridge_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_ridge_train_6m
        y_test_local = y_ridge_test_6m
        X_train_local = X_ridge_train.iloc[:-shift,:]
        X_test_local = X_ridge_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_ridge_train_12m
        y_test_local = y_ridge_test_12m
        X_train_local = X_ridge_train.iloc[:-shift,:]
        X_test_local = X_ridge_test.iloc[:-shift,:]
    else:
        if shift == 0:
            X_train_local = X_ridge_train
            X_test_local = X_ridge_test
            y_train_local = y_ridge_train
            y_test_local = y_ridge_test
        else:
            X_train_local = X_ridge_train.iloc[:-shift,:]
            X_test_local = X_ridge_test.iloc[:-shift,:]
            y_train_local = y_ridge_train.iloc[shift:]
            y_test_local = y_ridge_test.iloc[shift:]
    
    ridge_model_fit = ridge_model.fit(X_train_local, y_train_local)
    y_fit_local = ridge_model.predict(X_train_local)
    y_hat_local = ridge_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Ridge Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Ridge Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_ridge[p][(-len(y_ridge_train)+shift):] = y_fit_local
    df_y_hat_ridge[p][(-len(y_ridge_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# Ridge Regression with CV      #
#################################

str_model = "Ridge_CV"

k = 0
for p in col_names:
    
    shift = periods[k]

    ridge_model_cv = RidgeCV(alphas=[1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100, 1000, 10000])
    
    if p == '3m':
        y_train_local = y_ridge_train_3m
        y_test_local = y_ridge_test_3m
        X_train_local = X_ridge_train.iloc[:-shift,:]
        X_test_local = X_ridge_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_ridge_train_6m
        y_test_local = y_ridge_test_6m
        X_train_local = X_ridge_train.iloc[:-shift,:]
        X_test_local = X_ridge_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_ridge_train_12m
        y_test_local = y_ridge_test_12m
        X_train_local = X_ridge_train.iloc[:-shift,:]
        X_test_local = X_ridge_test.iloc[:-shift,:]
    else:
        if shift == 0:
            X_train_local = X_ridge_train
            X_test_local = X_ridge_test
            y_train_local = y_ridge_train
            y_test_local = y_ridge_test
        else:
            X_train_local = X_ridge_train.iloc[:-shift,:]
            X_test_local = X_ridge_test.iloc[:-shift,:]
            y_train_local = y_ridge_train.iloc[shift:]
            y_test_local = y_ridge_test.iloc[shift:]
    
    ridge_model_cv_fit = ridge_model_cv.fit(X_train_local, y_train_local)
    y_fit_local = ridge_model_cv.predict(X_train_local)
    y_hat_local = ridge_model_cv.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Ridge CV Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Ridge CV Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_ridge_cv[p][(-len(y_ridge_train)+shift):] = y_fit_local
    df_y_hat_ridge_cv[p][(-len(y_ridge_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# Bayesian Ridge                #
#################################

str_model = 'Bayesian_Ridge'

k = 0
for p in col_names:
    
    shift = periods[k]

    bridge_model = BayesianRidge(n_iter=1000, tol=1e-05, alpha_1=1e-02, alpha_2=1e-02, lambda_1=1e-02, lambda_2=1e-02, 
                             alpha_init=None, lambda_init=None, 
                             compute_score=False, fit_intercept=True, 
                             normalize=False, copy_X=True, verbose=False)
    
    if p == '3m':
        y_train_local = y_ridge_train_3m
        y_test_local = y_ridge_test_3m
        X_train_local = X_ridge_train.iloc[:-shift,:]
        X_test_local = X_ridge_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_ridge_train_6m
        y_test_local = y_ridge_test_6m
        X_train_local = X_ridge_train.iloc[:-shift,:]
        X_test_local = X_ridge_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_ridge_train_12m
        y_test_local = y_ridge_test_12m
        X_train_local = X_ridge_train.iloc[:-shift,:]
        X_test_local = X_ridge_test.iloc[:-shift,:]
    else:
        if shift == 0:
            X_train_local = X_ridge_train
            X_test_local = X_ridge_test
            y_train_local = y_ridge_train
            y_test_local = y_ridge_test
        else:
            X_train_local = X_ridge_train.iloc[:-shift,:]
            X_test_local = X_ridge_test.iloc[:-shift,:]
            y_train_local = y_ridge_train.iloc[shift:]
            y_test_local = y_ridge_test.iloc[shift:]
    
    bridge_model_fit = bridge_model.fit(X_train_local, y_train_local)
    y_fit_local = bridge_model.predict(X_train_local)
    y_hat_local = bridge_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Bayesian Ridge - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Bayesian Ridge - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_bridge[p][(-len(y_ridge_train)+shift):] = y_fit_local
    df_y_hat_bridge[p][(-len(y_ridge_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# LASSO                         #
#################################

str_model = 'Lasso'

X_lasso_train = pd.concat([X_L1_train, X_L2_train, X_L3_train, X_L4_train, X_L5_train, X_L6_train, X_L7_train,
                           X_L8_train, X_L9_train, X_L10_train, X_L11_train, X_L12_train], axis = 1)
X_lasso_train = X_lasso_train.dropna()
y_lasso_train = y_train.iloc[:,0]
y_lasso_train_3m = scale(y_lasso_train.rolling(3).mean().dropna())
y_lasso_train_6m = scale(y_lasso_train.rolling(6).mean().dropna())
y_lasso_train_12m = scale(y_lasso_train.rolling(12).mean().dropna())
X_lasso_test = pd.concat([X_L1_test, X_L2_test, X_L3_test, X_L4_test, X_L5_test, X_L6_test, X_L7_test,
                           X_L8_test, X_L9_test, X_L10_test, X_L11_test, X_L12_test], axis = 1)
X_lasso_test = X_lasso_test.dropna()
y_lasso_test = y_test.iloc[:,0]
y_lasso_test_3m = scale(y_lasso_test.rolling(3).mean().dropna())
y_lasso_test_6m = scale(y_lasso_test.rolling(6).mean().dropna())
y_lasso_test_12m = scale(y_lasso_test.rolling(12).mean().dropna())

k = 0
for p in col_names:
    
    shift = periods[k]

    lasso_model = Lasso(alpha = 1e-3)
    
    if p == '3m':
        y_train_local = y_lasso_train_3m
        y_test_local = y_lasso_test_3m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_lasso_train_6m
        y_test_local = y_lasso_test_6m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_lasso_train_12m
        y_test_local = y_lasso_test_12m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_lasso_train
            X_test_local = X_lasso_test
            y_train_local = y_lasso_train
            y_test_local = y_lasso_test
        else:
            X_train_local = X_lasso_train.iloc[:-shift,:]
            X_test_local = X_lasso_test.iloc[:-shift,:]
            y_train_local = y_lasso_train.iloc[shift:]
            y_test_local = y_lasso_test.iloc[shift:]
    
    lasso_model_fit = lasso_model.fit(X_train_local, y_train_local)
    y_fit_local = lasso_model.predict(X_train_local)
    y_hat_local = lasso_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('LASSO Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('LASSO Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_lasso[p][(-len(y_lasso_train)+shift):] = y_fit_local
    df_y_hat_lasso[p][(-len(y_lasso_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# LASSO with CV                 #
#################################

str_model = 'Lasso_CV'

k = 0
for p in col_names:
    
    shift = periods[k]

    lasso_cv_model = LassoCV(alphas=[1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100, 1000, 10000])
    
    if p == '3m':
        y_train_local = y_lasso_train_3m
        y_test_local = y_lasso_test_3m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_lasso_train_6m
        y_test_local = y_lasso_test_6m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_lasso_train_12m
        y_test_local = y_lasso_test_12m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_lasso_train
            X_test_local = X_lasso_test
            y_train_local = y_lasso_train
            y_test_local = y_lasso_test
        else:
            X_train_local = X_lasso_train.iloc[:-shift,:]
            X_test_local = X_lasso_test.iloc[:-shift,:]
            y_train_local = y_lasso_train.iloc[shift:]
            y_test_local = y_lasso_test.iloc[shift:]
    
    lasso_cv_model_fit = lasso_cv_model.fit(X_train_local, y_train_local)
    y_fit_local = lasso_cv_model.predict(X_train_local)
    y_hat_local = lasso_cv_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('LASSO CV Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('LASSO CV Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_lasso_cv[p][(-len(y_lasso_train)+shift):] = y_fit_local
    df_y_hat_lasso_cv[p][(-len(y_lasso_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# Bayesian Lasso                #
#################################

str_model = 'Bayesian_Lasso'
importr('monomvn')

X_blasso_train = pd.concat([X_OI_L1_train_pca, X_LM_L1_train_pca, X_H_L1_train_pca, X_COI_L1_train_pca, 
                                X_MC_L1_train_pca, X_INTFX_L1_train_pca, X_P_L1_train_pca, 
                                X_S_L1_train_pca], axis = 1).dropna()
X_blasso_train = X_blasso_train.dropna()
y_blasso_train = y_train.iloc[:,0]
y_blasso_train_3m = scale(y_blasso_train.rolling(3).mean().dropna())
y_blasso_train_6m = scale(y_blasso_train.rolling(6).mean().dropna())
y_blasso_train_12m = scale(y_blasso_train.rolling(12).mean().dropna())
X_blasso_test = X_L1_test_pca
X_blasso_test = pd.concat([X_OI_L1_test_pca, X_LM_L1_test_pca, X_H_L1_test_pca, X_COI_L1_test_pca, 
                                X_MC_L1_test_pca, X_INTFX_L1_test_pca, X_P_L1_test_pca, 
                                X_S_L1_test_pca], axis = 1).dropna()
y_blasso_test = y_test.iloc[:,0]
y_blasso_test_3m = scale(y_blasso_test.rolling(3).mean().dropna())
y_blasso_test_6m = scale(y_blasso_test.rolling(6).mean().dropna())
y_blasso_test_12m = scale(y_blasso_test.rolling(12).mean().dropna())

k = 0
for p in col_names:
    
    shift = periods[k]
    
    if p == '3m':
        y_train_local = y_blasso_train_3m
        y_test_local = y_blasso_test_3m
        X_train_local = X_blasso_train.iloc[:-shift,:]
        X_test_local = X_blasso_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_blasso_train_6m
        y_test_local = y_blasso_test_6m
        X_train_local = X_blasso_train.iloc[:-shift,:]
        X_test_local = X_blasso_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_blasso_train_12m
        y_test_local = y_blasso_test_12m
        X_train_local = X_blasso_train.iloc[:-shift,:]
        X_test_local = X_blasso_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_blasso_train
            X_test_local = X_blasso_test
            y_train_local = y_blasso_train
            y_test_local = y_blasso_test
        else:
            X_train_local = X_blasso_train.iloc[:-shift,:]
            X_test_local = X_blasso_test.iloc[:-shift,:]
            y_train_local = y_blasso_train.iloc[shift:]
            y_test_local = y_blasso_test.iloc[shift:]
    
    blasso_model_fit = r['blasso'](X = X_train_local, y = y_train_local, T = 400)
    betas = np.mean(pd.DataFrame(blasso_model_fit.rx2('beta')))
    y_fit_local = np.dot(betas, X_train_local.T)
    y_hat_local = np.dot(betas, X_test_local.T)
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Bayesian LASSO - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Bayesian LASSO - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_blasso[p][(-len(y_blasso_train)+shift):] = y_fit_local
    df_y_hat_blasso[p][(-len(y_blasso_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1
    
#################################
# Elastic Net with CV           #
#################################

str_model = 'ENet'

# Note that a good choice of list of values for l1_ratio is often to put 
# more values close to 1 (i.e. Lasso) and less close to 0 (i.e. Ridge)

importr('glmnet')
k = 0
for p in col_names:
    
    shift = periods[k]
    
    if p == '3m':
        y_train_local = y_lasso_train_3m
        y_test_local = y_lasso_test_3m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_lasso_train_6m
        y_test_local = y_lasso_test_6m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_lasso_train_12m
        y_test_local = y_lasso_test_12m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_lasso_train
            X_test_local = X_lasso_test
            y_train_local = y_lasso_train
            y_test_local = y_lasso_test
        else:
            X_train_local = X_lasso_train.iloc[:-shift,:]
            X_test_local = X_lasso_test.iloc[:-shift,:]
            y_train_local = y_lasso_train.iloc[shift:]
            y_test_local = y_lasso_test.iloc[shift:]
    
    mse = np.inf
    for alpha in [.01, .1, .3, .5, .7, .9, .95, .99]:
        enet_model_fit_aux = r['glmnet'](np.array(X_train_local), np.array(y_train_local), alpha = alpha, nlambda = 100)
        mse_aux = np.inf
        y_fit_local = r['predict'](enet_model_fit_aux, newx = np.array(X_train_local))
        y_hat_local = r['predict'](enet_model_fit_aux, newx = np.array(X_test_local))
        for j in range(0, y_fit_local.shape[1]):
            mse_temp = MSE(y_hat_local[:,j], y_test_local)
            if mse_temp < mse_aux:
                j_aux = j
                mse_aux = mse_temp
        if mse_aux < mse:
            mse = mse_aux
            alpha_min = alpha
            j_min = j_aux
            enet_model_fit = enet_model_fit_aux
    y_fit_local = r['predict'](enet_model_fit, newx = np.array(X_train_local))[:,j_min]
    y_hat_local = r['predict'](enet_model_fit, newx = np.array(X_test_local))[:,j_min]
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('ENet CV Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('ENet CV Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_enet[p][(-len(y_lasso_train)+shift):] = y_fit_local
    df_y_hat_enet[p][(-len(y_lasso_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# Support Vector Regression     #
#################################

# C is the regularization parameter. 
# The strength of the regularization is inversely proportional to C.
# Must be strictly positive. The penalty is a squared l2 penalty.

# Epsilon: Epsilon in the epsilon-SVR model. It specifies the epsilon-tube within which
# no penalty is associated in the training loss function with points predicted within 
# a distance epsilon from the actual value.

# Kernels: it seems that 'rbf' and 'linear' are the most appropriate for our data
    
str_model = 'SVR'

k = 0
for p in col_names:
    
    shift = periods[k]

    svr_model = make_pipeline(StandardScaler(), SVR(C=1, epsilon=0.1, gamma = 'scale', kernel = 'rbf'))
    
    if p == '3m':
        y_train_local = y_lasso_train_3m
        y_test_local = y_lasso_test_3m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_lasso_train_6m
        y_test_local = y_lasso_test_6m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_lasso_train_12m
        y_test_local = y_lasso_test_12m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_lasso_train
            X_test_local = X_lasso_test
            y_train_local = y_lasso_train
            y_test_local = y_lasso_test
        else:
            X_train_local = X_lasso_train.iloc[:-shift,:]
            X_test_local = X_lasso_test.iloc[:-shift,:]
            y_train_local = y_lasso_train.iloc[shift:]
            y_test_local = y_lasso_test.iloc[shift:]
    
    svr_model_fit = svr_model.fit(X_train_local, y_train_local)
    y_fit_local = svr_model.predict(X_train_local)
    y_hat_local = svr_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('SVR Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('SVR Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_svr[p][(-len(y_lasso_train)+shift):] = y_fit_local
    df_y_hat_svr[p][(-len(y_lasso_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1
    
#################################
# Random Forest                 #
#################################

str_model = 'Random_Forest'

# Data
X_rf_train = pd.concat([X_OI_L1_train_pca, X_LM_L1_train_pca, X_H_L1_train_pca, X_COI_L1_train_pca, 
                                X_MC_L1_train_pca, X_INTFX_L1_train_pca, X_P_L1_train_pca, X_S_L1_train_pca, 
                                X_OI_L2_train_pca, X_LM_L2_train_pca, X_H_L2_train_pca, X_COI_L2_train_pca, 
                                X_MC_L2_train_pca, X_INTFX_L2_train_pca, X_P_L2_train_pca, X_S_L2_train_pca, 
                                X_OI_L3_train_pca, X_LM_L3_train_pca, X_H_L3_train_pca, X_COI_L3_train_pca, 
                                X_MC_L3_train_pca, X_INTFX_L3_train_pca, X_P_L3_train_pca, X_S_L3_train_pca, 
                                X_OI_L4_train_pca, X_LM_L4_train_pca, X_H_L4_train_pca, X_COI_L4_train_pca, 
                                X_MC_L4_train_pca, X_INTFX_L4_train_pca, X_P_L4_train_pca, X_S_L4_train_pca, 
                                X_OI_L12_train_pca, X_LM_L12_train_pca, X_H_L12_train_pca, X_COI_L12_train_pca, 
                                X_MC_L12_train_pca, X_INTFX_L12_train_pca, 
                                X_P_L12_train_pca, X_S_L12_train_pca], axis = 1)
X_rf_train = X_rf_train.dropna()
y_rf_train = y_train.iloc[:,0]
y_rf_train_3m = scale(y_rf_train.rolling(3).mean().dropna())
y_rf_train_6m = scale(y_rf_train.rolling(6).mean().dropna())
y_rf_train_12m = scale(y_rf_train.rolling(12).mean().dropna())

X_rf_test = pd.concat([X_OI_L1_test_pca, X_LM_L1_test_pca, X_H_L1_test_pca, X_COI_L1_test_pca, 
                                X_MC_L1_test_pca, X_INTFX_L1_test_pca, X_P_L1_test_pca, X_S_L1_test_pca, 
                                X_OI_L2_test_pca, X_LM_L2_test_pca, X_H_L2_test_pca, X_COI_L2_test_pca, 
                                X_MC_L2_test_pca, X_INTFX_L2_test_pca, X_P_L2_test_pca, X_S_L2_test_pca, 
                                X_OI_L3_test_pca, X_LM_L3_test_pca, X_H_L3_test_pca, X_COI_L3_test_pca, 
                                X_MC_L3_test_pca, X_INTFX_L3_test_pca, X_P_L3_test_pca, X_S_L3_test_pca, 
                                X_OI_L4_test_pca, X_LM_L4_test_pca, X_H_L4_test_pca, X_COI_L4_test_pca, 
                                X_MC_L4_test_pca, X_INTFX_L4_test_pca, X_P_L4_test_pca, X_S_L4_test_pca, 
                                X_OI_L12_test_pca, X_LM_L12_test_pca, X_H_L12_test_pca, X_COI_L12_test_pca, 
                                X_MC_L12_test_pca, X_INTFX_L12_test_pca, X_P_L12_test_pca, X_S_L12_test_pca], axis = 1)
X_rf_test = X_rf_test.dropna()
y_rf_test = y_test.iloc[:,0]
y_rf_test_3m = scale(y_rf_test.rolling(3).mean().dropna())
y_rf_test_6m = scale(y_rf_test.rolling(6).mean().dropna())
y_rf_test_12m = scale(y_rf_test.rolling(12).mean().dropna())

k = 0
for p in col_names:
    
    shift = periods[k]

    rf_model = RandomForestRegressor(n_estimators = 20, 
                           max_depth = None,
                           min_samples_split = 4,
                           random_state = rnd_state)
    
    if p == '3m':
        y_train_local = y_rf_train_3m
        y_test_local = y_rf_test_3m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_rf_train_6m
        y_test_local = y_rf_test_6m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_rf_train_12m
        y_test_local = y_rf_test_12m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_rf_train
            X_test_local = X_rf_test
            y_train_local = y_rf_train
            y_test_local = y_rf_test
        else:
            X_train_local = X_rf_train.iloc[:-shift,:]
            X_test_local = X_rf_test.iloc[:-shift,:]
            y_train_local = y_rf_train.iloc[shift:]
            y_test_local = y_rf_test.iloc[shift:]
    
    rf_model_fit = rf_model.fit(X_train_local, y_train_local)
    y_fit_local = rf_model.predict(X_train_local)
    y_hat_local = rf_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('RF Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('RF Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_rf[p][(-len(y_rf_train)+shift):] = y_fit_local
    df_y_hat_rf[p][(-len(y_rf_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1
    
#################################
# kNN                           #
#################################

str_model = 'kNN'

k = 0
for p in col_names:
    
    shift = periods[k]

    knn_model = KNeighborsRegressor(n_neighbors=25)
    
    if p == '3m':
        y_train_local = y_rf_train_3m
        y_test_local = y_rf_test_3m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_rf_train_6m
        y_test_local = y_rf_test_6m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_rf_train_12m
        y_test_local = y_rf_test_12m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_rf_train
            X_test_local = X_rf_test
            y_train_local = y_rf_train
            y_test_local = y_rf_test
        else:
            X_train_local = X_rf_train.iloc[:-shift,:]
            X_test_local = X_rf_test.iloc[:-shift,:]
            y_train_local = y_rf_train.iloc[shift:]
            y_test_local = y_rf_test.iloc[shift:]
    
    knn_model_fit = knn_model.fit(X_train_local, y_train_local)
    y_fit_local = knn_model.predict(X_train_local)
    y_hat_local = knn_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('RF Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('RF Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_knn[p][(-len(y_rf_train)+shift):] = y_fit_local
    df_y_hat_knn[p][(-len(y_rf_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1
    
#################################
# BART                          #
#################################

str_model = 'BART'

# See also 'BART' package!
importr('BayesTree')

k = 0
for p in col_names:
    
    shift = periods[k]
    
    print(p)
    
    if p == '3m':
        y_train_local = pd.Series(y_rf_train_3m)
        y_test_local = pd.Series(y_rf_test_3m)
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = pd.Series(y_rf_train_6m)
        y_test_local = pd.Series(y_rf_test_6m)
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = pd.Series(y_rf_train_12m)
        y_test_local = pd.Series(y_rf_test_12m)
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_rf_train
            X_test_local = X_rf_test
            y_train_local = y_rf_train
            y_test_local = y_rf_test
        else:
            X_train_local = X_rf_train.iloc[:-shift,:]
            X_test_local = X_rf_test.iloc[:-shift,:]
            y_train_local = y_rf_train.iloc[shift:]
            y_test_local = y_rf_test.iloc[shift:]
    
    rb = r['bart'](X_train_local, y_train_local, X_test_local)

    y_fit_local = rb.rx2['yhat.train.mean']
    y_hat_local = rb.rx2['yhat.test.mean']
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('BART - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('BART - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_bart[p][(-len(y_rf_train)+shift):] = y_fit_local
    df_y_hat_bart[p][(-len(y_rf_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# Bagging                       #
#################################

str_model = 'BAGGING'

k = 0
for p in col_names:
    
    shift = periods[k]

    bg_model = BaggingRegressor()
    
    if p == '3m':
        y_train_local = y_rf_train_3m
        y_test_local = y_rf_test_3m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_rf_train_6m
        y_test_local = y_rf_test_6m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_rf_train_12m
        y_test_local = y_rf_test_12m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_rf_train
            X_test_local = X_rf_test
            y_train_local = y_rf_train
            y_test_local = y_rf_test
        else:
            X_train_local = X_rf_train.iloc[:-shift,:]
            X_test_local = X_rf_test.iloc[:-shift,:]
            y_train_local = y_rf_train.iloc[shift:]
            y_test_local = y_rf_test.iloc[shift:]
    
    bg_model.fit(X_train_local, y_train_local)
    
    y_fit_local = bg_model.predict(X_train_local)
    y_hat_local = bg_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Bagging Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Bagging Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_bagging[p][(-len(y_rf_train)+shift):] = y_fit_local
    df_y_hat_bagging[p][(-len(y_rf_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# Robust Regression             #
#################################

str_model = 'Huber'

k = 0
for p in col_names:
    
    shift = periods[k]

    huber_model = HuberRegressor()
    
    if p == '3m':
        y_train_local = y_lasso_train_3m
        y_test_local = y_lasso_test_3m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_lasso_train_6m
        y_test_local = y_lasso_test_6m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_lasso_train_12m
        y_test_local = y_lasso_test_12m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_lasso_train
            X_test_local = X_lasso_test
            y_train_local = y_lasso_train
            y_test_local = y_lasso_test
        else:
            X_train_local = X_lasso_train.iloc[:-shift,:]
            X_test_local = X_lasso_test.iloc[:-shift,:]
            y_train_local = y_lasso_train.iloc[shift:]
            y_test_local = y_lasso_test.iloc[shift:]
    
    huber_model_fit = huber_model.fit(X_train_local, y_train_local)
    y_fit_local = huber_model.predict(X_train_local)
    y_hat_local = huber_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Huber Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Huber Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_huber[p][(-len(y_lasso_train)+shift):] = y_fit_local
    df_y_hat_huber[p][(-len(y_lasso_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# Theil-Sen Regression          #
#################################

str_model = 'Theil_Sen'

k = 0
for p in col_names:
    
    shift = periods[k]

    ts_model = TheilSenRegressor()
    
    if p == '3m':
        y_train_local = y_lasso_train_3m
        y_test_local = y_lasso_test_3m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_lasso_train_6m
        y_test_local = y_lasso_test_6m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_lasso_train_12m
        y_test_local = y_lasso_test_12m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_lasso_train
            X_test_local = X_lasso_test
            y_train_local = y_lasso_train
            y_test_local = y_lasso_test
        else:
            X_train_local = X_lasso_train.iloc[:-shift,:]
            X_test_local = X_lasso_test.iloc[:-shift,:]
            y_train_local = y_lasso_train.iloc[shift:]
            y_test_local = y_lasso_test.iloc[shift:]
    
    ts_model_fit = ts_model.fit(X_train_local, y_train_local)
    y_fit_local = ts_model.predict(X_train_local)
    y_hat_local = ts_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Theil-Sen Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Theil-Sen Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_ts[p][(-len(y_lasso_train)+shift):] = y_fit_local
    df_y_hat_ts[p][(-len(y_lasso_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# Factor analysis               #
#################################

str_model = 'Factor'

k = 0
for p in col_names:
    
    shift = periods[k]
    
    if p == '3m':
        y_train_local = y_rf_train_3m
        y_test_local = y_rf_test_3m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_rf_train_6m
        y_test_local = y_rf_test_6m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_rf_train_12m
        y_test_local = y_rf_test_12m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_rf_train
            X_test_local = X_rf_test
            y_train_local = y_rf_train
            y_test_local = y_rf_test
        else:
            X_train_local = X_rf_train.iloc[:-shift,:]
            X_test_local = X_rf_test.iloc[:-shift,:]
            y_train_local = y_rf_train.iloc[shift:]
            y_test_local = y_rf_test.iloc[shift:]
    
    factor_model = sm.OLS(np.array(y_train_local), np.array(X_train_local))
    factor_model_fit = factor_model.fit()
    X_train_local_adj = pd.DataFrame()
    X_test_local_adj = pd.DataFrame()
    for j in range(0,len(factor_model_fit.pvalues)):
        if factor_model_fit.pvalues[j] < 0.05:
            X_train_local_adj = pd.concat([X_train_local_adj, X_train_local.iloc[:,j]], axis = 1)
            X_test_local_adj = pd.concat([X_test_local_adj, X_test_local.iloc[:,j]], axis = 1)
    factor_model = sm.OLS(np.array(y_train_local), np.array(X_train_local_adj))
    factor_model_fit = factor_model.fit()
    X_train_local_adj2 = pd.DataFrame()
    X_test_local_adj2 = pd.DataFrame()
    for j in range(0,len(factor_model_fit.pvalues)):
        if factor_model_fit.pvalues[j] < 0.05:
            X_train_local_adj2 = pd.concat([X_train_local_adj, X_train_local.iloc[:,j]], axis = 1)
            X_test_local_adj2 = pd.concat([X_test_local_adj, X_test_local.iloc[:,j]], axis = 1)
    y_fit_local = factor_model_fit.predict(np.array(X_train_local_adj))
    y_hat_local = factor_model_fit.predict(np.array(X_test_local_adj))
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Factor Model - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Factor Model - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_factor[p][(-len(y_rf_train)+shift):] = y_fit_local
    df_y_hat_factor[p][(-len(y_rf_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# GARCH                         #
#################################

str_model = 'GARCH'

importr('rugarch')

n_train = len(index_train)
n_test = len(index_test)
y_fit_local = np.zeros(n_train+n_test)
y_sim_local = np.zeros(n_train+n_test)
y_hat_local = np.zeros(n_test)
y_hat_local_3m = np.zeros(n_test)
y_hat_local_6m = np.zeros(n_test)
y_hat_local_12m = np.zeros(n_test)

spec = r['ugarchspec']() # GARCH(1,1)
fit = r['ugarchfit'](spec = spec, data = y)
sim = r['ugarchsim'](fit, n_train + n_test)
y_sim_local = scale(r['fitted'](sim))
        
for i in range(0,n_test+1):
    print(i)
    y_window = np.array(y_sim_local[i:(i+n_train)])
    try:
        spec = r['ugarchspec']() # GARCH(1,1)
        fit = r['ugarchfit'](spec = spec, data = y_window)
        forc = r['ugarchforecast'](fit, 'NULL', 12)
        y_fit_local[i:(i+n_train)] = r['fitted'](fit)[:,0]
        aux = r['fitted'](forc)[:,0]
        if i <= n_test-1:
            y_hat_local[i] = aux[0]
            y_hat_local_3m[i] = np.mean(aux[0:2])
            y_hat_local_6m[i] = np.mean(aux[0:5])
            y_hat_local_12m[i] = np.mean(aux[0:11])
    except:
        if i <= n_test-1:
            y_hat_local[i] = y_hat_local[i-1]
            y_hat_local_3m[i] = y_hat_local_3m[i-1]
            y_hat_local_6m[i] = y_hat_local_6m[i-1]
            y_hat_local_12m[i] = y_hat_local_12m[i-1]

y_fit_local = np.array(y_fit_local[0:n_train])
y_fit_local = pd.DataFrame(y_fit_local, columns = ['Train'])
y_hat_local = np.array(y_hat_local)
y_hat_local = pd.DataFrame(y_hat_local, columns = ['Test'])
y_hat_local_3m = np.array(y_hat_local_3m[:-2])
y_hat_local_3m = pd.DataFrame(y_hat_local_3m, columns = ['Test'])
y_hat_local_6m = np.array(y_hat_local_6m[:-5])
y_hat_local_6m = pd.DataFrame(y_hat_local_6m, columns = ['Test'])
y_hat_local_12m = np.array(y_hat_local_12m[:-11])
y_hat_local_12m = pd.DataFrame(y_hat_local_12m, columns = ['Test'])
y_true = np.array(y_sim_local[n_train:(n_train+n_test)])
y_true = pd.DataFrame(y_true, columns = ['Observed'])

sns.lineplot(data=pd.concat([y_true, y_hat_local],axis=1))

k = 0
for p in col_names:
    
    shift = periods[k]
    
    print(p)
    
    if p == '3m':
        
        y_local = scale(y_true.rolling(3).mean().dropna())
        y_fit = scale(y_fit_local.rolling(3).mean().dropna())
        y_hat = scale(y_hat_local_3m)
        
    elif p == '6m':
        
        y_local = scale(y_true.rolling(6).mean().dropna())
        y_fit = scale(y_fit_local.rolling(6).mean().dropna())
        y_hat = scale(y_hat_local_6m)
        
    elif p == '12m':
        
        y_local = scale(y_true.rolling(12).mean().dropna())
        y_fit = scale(y_fit_local.rolling(12).mean().dropna())
        y_hat = scale(y_hat_local_12m)
        
    else:
        
        if p == 'p0':
            y_local = y_true
            y_fit = y_fit_local
            y_hat = y_hat_local
        else:
            y_local = y_true[shift:]
            y_fit = y_fit_local[:-shift]
            y_hat = y_hat_local[:-shift]
    
    y_local = np.array(y_local)[:,0]
    y_fit = np.array(y_fit)[:,0]
    y_hat = np.array(y_hat)[:,0]
    
    df_y_fit_garch[p][(-n_train+shift):] = y_fit
    df_y_hat_garch[p][(-n_test+shift):] = y_hat
    
    df_MSE.loc[str_model, p] = MSE(y_local, y_hat)
    df_MAE.loc[str_model, p] = MAE(y_local, y_hat)
    df_RMSE.loc[str_model, p] = RMSE(y_local, y_hat)
    df_MAPE.loc[str_model, p] = MAPE(y_local, y_hat)
    df_CS.loc[str_model, p] = cos_sim2(y_local, y_hat)
    
    k = k + 1

#################################
# VECM                          #
#################################

str_model = 'VECM'

data_vecm = pd.concat([X['CPIAUCSL'], X['TB3MS'], X['M1SL'], X['OILPRICEx'], X['TWEXAFEGSMTHx'], X['INDPRO']], axis = 1)

importr('tsDyn')
stats = importr('stats')

n_train = len(index_train)
n_test = len(index_test)
y_fit_local = np.zeros(n_train+n_test)
y_hat_local = np.zeros(n_test)
y_hat_local_3m = np.zeros(n_test)
y_hat_local_6m = np.zeros(n_test)
y_hat_local_12m = np.zeros(n_test)

vecm_fit = r['VECM'](data_vecm, 12)
data_vecm_sim = pd.DataFrame(scale(r['VECM.boot'](vecm_fit)))

for i in range(0,n_test+1):
    print(i)
    data_vecm_window = np.array(data_vecm_sim.iloc[i:(i+n_train),:])
    try:
        vecm_fit = r['VECM'](data_vecm_window, 12)
        aux = stats.predict(object = vecm_fit, **{'n.ahead':12})[:,0]
        y_fit_local[(i+13):(i+n_train)] = r['fitted'](vecm_fit)[:,0]
        if i <= n_test-1:
            y_hat_local[i] = aux[0]
            y_hat_local_3m[i] = np.mean(aux[0:2])
            y_hat_local_6m[i] = np.mean(aux[0:5])
            y_hat_local_12m[i] = np.mean(aux[0:11])
    except:
        if i <= n_test-1:
            y_hat_local[i] = y_hat_local[i-1]
            y_hat_local_3m[i] = y_hat_local_3m[i-1]
            y_hat_local_6m[i] = y_hat_local_6m[i-1]
            y_hat_local_12m[i] = y_hat_local_12m[i-1]

y_fit_local = np.array(y_fit_local[0:n_train])
y_fit_local = pd.DataFrame(y_fit_local, columns = ['Train'])
y_hat_local = np.array(y_hat_local)
y_hat_local = pd.DataFrame(y_hat_local, columns = ['Test'])
y_hat_local_3m = np.array(y_hat_local_3m[:-2])
y_hat_local_3m = pd.DataFrame(y_hat_local_3m, columns = ['Test'])
y_hat_local_6m = np.array(y_hat_local_6m[:-5])
y_hat_local_6m = pd.DataFrame(y_hat_local_6m, columns = ['Test'])
y_hat_local_12m = np.array(y_hat_local_12m[:-11])
y_hat_local_12m = pd.DataFrame(y_hat_local_12m, columns = ['Test'])
y_true = np.array(data_vecm_sim.iloc[n_train:(n_train+n_test),0])
y_true = pd.DataFrame(y_true, columns = ['Observed'])

sns.lineplot(data=pd.concat([y_true, y_hat_local],axis=1))

k = 0
for p in col_names:
    
    shift = periods[k]
    
    print(p)
    
    if p == '3m':
        
        y_local = scale(y_true.rolling(3).mean().dropna())
        y_fit = scale(y_fit_local.rolling(3).mean().dropna())
        y_hat = scale(y_hat_local_3m)
        
    elif p == '6m':
        
        y_local = scale(y_true.rolling(6).mean().dropna())
        y_fit = scale(y_fit_local.rolling(6).mean().dropna())
        y_hat = scale(y_hat_local_6m)
        
    elif p == '12m':
        
        y_local = scale(y_true.rolling(12).mean().dropna())
        y_fit = scale(y_fit_local.rolling(12).mean().dropna())
        y_hat = scale(y_hat_local_12m)
        
    else:
        
        if p == 'p0':
            y_local = y_true
            y_fit = y_fit_local
            y_hat = y_hat_local
        else:
            y_local = y_true[shift:]
            y_fit = y_fit_local[:-shift]
            y_hat = y_hat_local[:-shift]
    
    y_local = np.array(y_local)[:,0]
    y_fit = np.array(y_fit)[:,0]
    y_hat = np.array(y_hat)[:,0]
    
    df_y_fit_vecm[p][(-n_train+shift):] = y_fit
    df_y_hat_vecm[p][(-n_test+shift):] = y_hat
    
    df_MSE.loc[str_model, p] = MSE(y_local, y_hat)
    df_MAE.loc[str_model, p] = MAE(y_local, y_hat)
    df_RMSE.loc[str_model, p] = RMSE(y_local, y_hat)
    df_MAPE.loc[str_model, p] = MAPE(y_local, y_hat)
    df_CS.loc[str_model, p] = cos_sim2(y_local, y_hat)
    
    k = k + 1

#################################
# SETAR                         #
#################################

str_model = 'SETAR'

data_setar = y

importr('tsDyn')
stats = importr('stats')

n_train = len(index_train)
n_test = len(index_test)
y_fit_local = np.zeros(n_train+n_test)
y_hat_local = np.zeros(n_test)
y_hat_local_3m = np.zeros(n_test)
y_hat_local_6m = np.zeros(n_test)
y_hat_local_12m = np.zeros(n_test)

setar_fit = r['setar'](x = np.array(data_setar), m = 2, d = 1, nthresh = 1, thDelay = 1, th = 0)
data_setar_sim = r['setar.sim'](setarObject = setar_fit, type = 'boot')
data_setar_sim = pd.DataFrame(scale(data_setar_sim.rx2('serie')))
        
for i in range(0,n_test+1):
    print(i)
    data_setar_window = np.array(data_setar_sim.iloc[i:(i+n_train),:])
    try:
        setar_fit = r['setar'](x = data_setar_window, m = 2, d = 1, nthresh = 1, thDelay = 1, th = 0)
        aux = stats.predict(object = setar_fit, **{'n.ahead':12})
        fitted_values = r['fitted.values'](setar_fit)
        fitted_values = fitted_values[~np.isnan(fitted_values)]
        y_fit_local[(i+2):(i+n_train)] = fitted_values
        if i <= n_test-1:
            y_hat_local[i] = aux[0]
            y_hat_local_3m[i] = np.mean(aux[0:2])
            y_hat_local_6m[i] = np.mean(aux[0:5])
            y_hat_local_12m[i] = np.mean(aux[0:11])
    except:
        if i <= n_test-1:
            y_hat_local[i] = y_hat_local[i-1]
            y_hat_local_3m[i] = y_hat_local_3m[i-1]
            y_hat_local_6m[i] = y_hat_local_6m[i-1]
            y_hat_local_12m[i] = y_hat_local_12m[i-1]

y_fit_local = np.array(y_fit_local[0:n_train])
y_fit_local = pd.DataFrame(y_fit_local, columns = ['Train'])
y_hat_local = np.array(y_hat_local)
y_hat_local = pd.DataFrame(y_hat_local, columns = ['Test'])
y_hat_local_3m = np.array(y_hat_local_3m[:-2])
y_hat_local_3m = pd.DataFrame(y_hat_local_3m, columns = ['Test'])
y_hat_local_6m = np.array(y_hat_local_6m[:-5])
y_hat_local_6m = pd.DataFrame(y_hat_local_6m, columns = ['Test'])
y_hat_local_12m = np.array(y_hat_local_12m[:-11])
y_hat_local_12m = pd.DataFrame(y_hat_local_12m, columns = ['Test'])
y_true = np.array(data_setar_sim[n_train:(n_train+n_test)])
y_true = pd.DataFrame(y_true, columns = ['Observed'])

sns.lineplot(data=pd.concat([y_true, y_hat_local],axis=1))

k = 0
for p in col_names:
    
    shift = periods[k]
    
    print(p)
    
    if p == '3m':
        
        y_local = scale(y_true.rolling(3).mean().dropna())
        y_fit = scale(y_fit_local.rolling(3).mean().dropna())
        y_hat = scale(y_hat_local_3m)
        
    elif p == '6m':
        
        y_local = scale(y_true.rolling(6).mean().dropna())
        y_fit = scale(y_fit_local.rolling(6).mean().dropna())
        y_hat = scale(y_hat_local_6m)
        
    elif p == '12m':
        
        y_local = scale(y_true.rolling(12).mean().dropna())
        y_fit = scale(y_fit_local.rolling(12).mean().dropna())
        y_hat = scale(y_hat_local_12m)
        
    else:
        
        if p == 'p0':
            y_local = y_true
            y_fit = y_fit_local
            y_hat = y_hat_local
        else:
            y_local = y_true[shift:]
            y_fit = y_fit_local[:-shift]
            y_hat = y_hat_local[:-shift]
    
    y_local = np.array(y_local)[:,0]
    y_fit = np.array(y_fit)[:,0]
    y_hat = np.array(y_hat)[:,0]
    
    df_y_fit_setar[p][(-n_train+shift):] = y_fit
    df_y_hat_setar[p][(-n_test+shift):] = y_hat
    
    df_MSE.loc[str_model, p] = MSE(y_local, y_hat)
    df_MAE.loc[str_model, p] = MAE(y_local, y_hat)
    df_RMSE.loc[str_model, p] = RMSE(y_local, y_hat)
    df_MAPE.loc[str_model, p] = MAPE(y_local, y_hat)
    df_CS.loc[str_model, p] = cos_sim2(y_local, y_hat)
    
    k = k + 1

#################################
# AO (2001) Moving Average      #
#################################

# Model
str_model = 'MA'

X_ma = y.rolling(12).mean().dropna()
y_ma = y
y_ma_3m = y.rolling(3).mean().dropna()
y_ma_6m = y.rolling(6).mean().dropna()
y_ma_12m = y.rolling(12).mean().dropna()
y_ma = y_ma.loc[X_ma.index]
y_ma_3m = y_ma_3m.loc[X_ma.index]
y_ma_6m = y_ma_6m.loc[X_ma.index]
y_ma_12m = y_ma_12m.loc[X_ma.index]
X_ma = np.array(X_ma)[:,0]
y_ma = np.array(y_ma)[:,0]
y_ma_3m = np.array(y_ma_3m)[:,0]
y_ma_6m = np.array(y_ma_6m)[:,0]
y_ma_12m = np.array(y_ma_12m)[:,0]

k = 0
for p in col_names:
    
    shift = periods[k]
    
    # Moving Average Model
    
    if shift == 0:
        y_fit_local = X_ma
    else:
        y_fit_local = X_ma[:-shift]
    
    sns.lineplot(data = [y_ma[shift:], y_fit_local])
    plt.legend(['Observed', 'Fitted'], loc='lower right')
    plt.tight_layout()
    plt.show()
    plt.close()
    
    # df_y_fit_ma[p][(-len(y_fit_local)):] = y_fit_local
        
    df_MSE.loc[str_model, p] = MSE(y_ma[shift:], y_fit_local)
    df_MAE.loc[str_model, p] = MAE(y_ma[shift:], y_fit_local)
    df_RMSE.loc[str_model, p] = RMSE(y_ma[shift:], y_fit_local)
    df_MAPE.loc[str_model, p] = MAPE(y_ma[shift:], y_fit_local)
    df_CS.loc[str_model, p] = cos_sim(y_ma[shift:], y_fit_local)
    
    k = k + 1

#################################
# SARIMA                        #
#################################

# Model
str_model = 'SARIMA'

importr('stats')
importr('forecast')

data_arima = y

n_train = len(index_train)
n_test = len(index_test)
y_fit_local = np.zeros(n_train+n_test)
y_hat_local = np.zeros(n_test)
y_hat_local_3m = np.zeros(n_test)
y_hat_local_6m = np.zeros(n_test)
y_hat_local_12m = np.zeros(n_test)

arima_fit = r['arima'](x = data_arima, order = FloatVector([1,0,1]))
data_arima_sim = pd.DataFrame(r['simulate'](object = arima_fit))

for i in range(0,n_test+1):
    print(i)
    data_arima_window = np.array(data_arima_sim.iloc[i:(i+n_train),:])
    try:
        arima_fit = r['arima'](x = data_arima_window, order = FloatVector([1,0,1]))
        aux = r['predict'](arima_fit, **{'n.ahead':13})
        aux = aux.rx2('pred')
        fitted_values = r['fitted'](arima_fit)[:,0]
        fitted_values = fitted_values[~np.isnan(fitted_values)]
        y_fit_local[i:(i+n_train)] = fitted_values
        if i <= n_test-1:
            y_hat_local[i] = aux[0]
            y_hat_local_3m[i] = np.mean(aux[0:2])
            y_hat_local_6m[i] = np.mean(aux[0:5])
            y_hat_local_12m[i] = np.mean(aux[0:11])
    except:
        if i <= n_test-1:
            y_hat_local[i] = y_hat_local[i-1]
            y_hat_local_3m[i] = y_hat_local_3m[i-1]
            y_hat_local_6m[i] = y_hat_local_6m[i-1]
            y_hat_local_12m[i] = y_hat_local_12m[i-1]

y_fit_local = np.array(y_fit_local[0:n_train])
y_fit_local = pd.DataFrame(y_fit_local, columns = ['Train'])
y_hat_local = np.array(y_hat_local)
y_hat_local = pd.DataFrame(y_hat_local, columns = ['Test'])
y_hat_local_3m = np.array(y_hat_local_3m[:-2])
y_hat_local_3m = pd.DataFrame(y_hat_local_3m, columns = ['Test'])
y_hat_local_6m = np.array(y_hat_local_6m[:-5])
y_hat_local_6m = pd.DataFrame(y_hat_local_6m, columns = ['Test'])
y_hat_local_12m = np.array(y_hat_local_12m[:-11])
y_hat_local_12m = pd.DataFrame(y_hat_local_12m, columns = ['Test'])
y_true = np.array(data_arima_sim[n_train:(n_train+n_test)])
y_true = pd.DataFrame(y_true, columns = ['Observed'])

sns.lineplot(data=pd.concat([y_true, y_hat_local],axis=1))

k = 0
for p in col_names:
    
    shift = periods[k]
    
    print(p)
    
    if p == '3m':
        
        y_local = scale(y_true.rolling(3).mean().dropna())
        y_fit = scale(y_fit_local.rolling(3).mean().dropna())
        y_hat = scale(y_hat_local_3m)
        
    elif p == '6m':
        
        y_local = scale(y_true.rolling(6).mean().dropna())
        y_fit = scale(y_fit_local.rolling(6).mean().dropna())
        y_hat = scale(y_hat_local_6m)
        
    elif p == '12m':
        
        y_local = scale(y_true.rolling(12).mean().dropna())
        y_fit = scale(y_fit_local.rolling(12).mean().dropna())
        y_hat = scale(y_hat_local_12m)
        
    else:
        
        if p == 'p0':
            y_local = y_true
            y_fit = y_fit_local
            y_hat = y_hat_local
        else:
            y_local = y_true[shift:]
            y_fit = y_fit_local[:-shift]
            y_hat = y_hat_local[:-shift]
    
    y_local = np.array(y_local)[:,0]
    y_fit = np.array(y_fit)[:,0]
    y_hat = np.array(y_hat)[:,0]
    
    df_y_fit_sarima[p][(-n_train+shift):] = y_fit
    df_y_hat_sarima[p][(-n_test+shift):] = y_hat
    
    df_MSE.loc[str_model, p] = MSE(y_local, y_hat)
    df_MAE.loc[str_model, p] = MAE(y_local, y_hat)
    df_RMSE.loc[str_model, p] = RMSE(y_local, y_hat)
    df_MAPE.loc[str_model, p] = MAPE(y_local, y_hat)
    df_CS.loc[str_model, p] = cos_sim2(y_local, y_hat)
    
    k = k + 1

#################################
# ARFIMA                        #
#################################

# Model
str_model = 'ARFIMA'

importr('stats')
importr('forecast')

data_arfima = y

n_train = len(index_train)
n_test = len(index_test)
y_fit_local = np.zeros(n_train+n_test)
y_hat_local = np.zeros(n_test)
y_hat_local_3m = np.zeros(n_test)
y_hat_local_6m = np.zeros(n_test)
y_hat_local_12m = np.zeros(n_test)

arfima_fit = r['arfima'](FloatVector(np.array(data_arfima)))
data_arfima_sim = pd.DataFrame(r['simulate'](object = arfima_fit))

for i in range(0,n_test+1):
    print(i)
    data_arfima_window = np.array(data_arfima_sim.iloc[i:(i+n_train),:])
    try:
        arfima_fit = r['arfima'](FloatVector(data_arfima_window))
        aux = r['forecast'](arfima_fit, h=12)
        aux = aux.rx2('mean')
        fitted_values = arfima_fit.rx2('fitted')
        fitted_values = fitted_values[~np.isnan(fitted_values)]
        y_fit_local[i:(i+n_train)] = fitted_values
        if i <= n_test-1:
            y_hat_local[i] = aux[0]
            y_hat_local_3m[i] = np.mean(aux[0:2])
            y_hat_local_6m[i] = np.mean(aux[0:5])
            y_hat_local_12m[i] = np.mean(aux[0:11])
    except:
        if i <= n_test-1:
            y_hat_local[i] = y_hat_local[i-1]
            y_hat_local_3m[i] = y_hat_local_3m[i-1]
            y_hat_local_6m[i] = y_hat_local_6m[i-1]
            y_hat_local_12m[i] = y_hat_local_12m[i-1]

y_fit_local = np.array(y_fit_local[0:n_train])
y_fit_local = pd.DataFrame(y_fit_local, columns = ['Train'])
y_hat_local = np.array(y_hat_local)
y_hat_local = pd.DataFrame(y_hat_local, columns = ['Test'])
y_hat_local_3m = np.array(y_hat_local_3m[:-2])
y_hat_local_3m = pd.DataFrame(y_hat_local_3m, columns = ['Test'])
y_hat_local_6m = np.array(y_hat_local_6m[:-5])
y_hat_local_6m = pd.DataFrame(y_hat_local_6m, columns = ['Test'])
y_hat_local_12m = np.array(y_hat_local_12m[:-11])
y_hat_local_12m = pd.DataFrame(y_hat_local_12m, columns = ['Test'])
y_true = np.array(data_arfima_sim[n_train:(n_train+n_test)])
y_true = pd.DataFrame(y_true, columns = ['Observed'])

sns.lineplot(data=pd.concat([y_true, y_hat_local],axis=1))

k = 0
for p in col_names:
    
    shift = periods[k]
    
    print(p)
    
    if p == '3m':
        
        y_local = scale(y_true.rolling(3).mean().dropna())
        y_fit = scale(y_fit_local.rolling(3).mean().dropna())
        y_hat = scale(y_hat_local_3m)
        
    elif p == '6m':
        
        y_local = scale(y_true.rolling(6).mean().dropna())
        y_fit = scale(y_fit_local.rolling(6).mean().dropna())
        y_hat = scale(y_hat_local_6m)
        
    elif p == '12m':
        
        y_local = scale(y_true.rolling(12).mean().dropna())
        y_fit = scale(y_fit_local.rolling(12).mean().dropna())
        y_hat = scale(y_hat_local_12m)
        
    else:
        
        if p == 'p0':
            y_local = y_true
            y_fit = y_fit_local
            y_hat = y_hat_local
        else:
            y_local = y_true[shift:]
            y_fit = y_fit_local[:-shift]
            y_hat = y_hat_local[:-shift]
    
    y_local = np.array(y_local)[:,0]
    y_fit = np.array(y_fit)[:,0]
    y_hat = np.array(y_hat)[:,0]
    
    df_y_fit_arfima[p][(-n_train+shift):] = y_fit
    df_y_hat_arfima[p][(-n_test+shift):] = y_hat
    
    df_MSE.loc[str_model, p] = MSE(y_local, y_hat)
    df_MAE.loc[str_model, p] = MAE(y_local, y_hat)
    df_RMSE.loc[str_model, p] = RMSE(y_local, y_hat)
    df_MAPE.loc[str_model, p] = MAPE(y_local, y_hat)
    df_CS.loc[str_model, p] = cos_sim2(y_local, y_hat)
    
    k = k + 1
   
#################################
# Gradient Boosting             #
################################# 

str_model = 'GradBoost'

k = 0
for p in col_names:
    
    shift = periods[k]

    gradboost_model = GradientBoostingRegressor(random_state = rnd_state)
    
    if p == '3m':
        y_train_local = y_lasso_train_3m
        y_test_local = y_lasso_test_3m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_lasso_train_6m
        y_test_local = y_lasso_test_6m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_lasso_train_12m
        y_test_local = y_lasso_test_12m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_lasso_train
            X_test_local = X_lasso_test
            y_train_local = y_lasso_train
            y_test_local = y_lasso_test
        else:
            X_train_local = X_lasso_train.iloc[:-shift,:]
            X_test_local = X_lasso_test.iloc[:-shift,:]
            y_train_local = y_lasso_train.iloc[shift:]
            y_test_local = y_lasso_test.iloc[shift:]
    
    gradboost_model_fit = gradboost_model.fit(X_train_local, y_train_local)
    y_fit_local = gradboost_model.predict(X_train_local)
    y_hat_local = gradboost_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('GradBoost Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('GradBoost Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_gradboost[p][(-len(y_lasso_train)+shift):] = y_fit_local
    df_y_hat_gradboost[p][(-len(y_lasso_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# AdaBoost                      #
#################################
   
str_model = 'AdaBoost'

k = 0
for p in col_names:
    
    shift = periods[k]

    adaboost_model = AdaBoostRegressor(random_state = rnd_state)
    
    if p == '3m':
        y_train_local = y_lasso_train_3m
        y_test_local = y_lasso_test_3m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_lasso_train_6m
        y_test_local = y_lasso_test_6m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_lasso_train_12m
        y_test_local = y_lasso_test_12m
        X_train_local = X_lasso_train.iloc[:-shift,:]
        X_test_local = X_lasso_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_lasso_train
            X_test_local = X_lasso_test
            y_train_local = y_lasso_train
            y_test_local = y_lasso_test
        else:
            X_train_local = X_lasso_train.iloc[:-shift,:]
            X_test_local = X_lasso_test.iloc[:-shift,:]
            y_train_local = y_lasso_train.iloc[shift:]
            y_test_local = y_lasso_test.iloc[shift:]
    
    adaboost_model_fit = adaboost_model.fit(X_train_local, y_train_local)
    y_fit_local = adaboost_model.predict(X_train_local)
    y_hat_local = adaboost_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('GradBoost Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('GradBoost Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_adaboost[p][(-len(y_lasso_train)+shift):] = y_fit_local
    df_y_hat_adaboost[p][(-len(y_lasso_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

#################################
# Bayesian ARDR                 #
#################################

str_model = 'BayesRegression'

k = 0
for p in col_names:
    
    shift = periods[k]

    bayesreg_model = ARDRegression()
    
    if p == '3m':
        y_train_local = y_rf_train_3m
        y_test_local = y_rf_test_3m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
        
    elif p == '6m':
        y_train_local = y_rf_train_6m
        y_test_local = y_rf_test_6m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]

    elif p == '12m':
        y_train_local = y_rf_train_12m
        y_test_local = y_rf_test_12m
        X_train_local = X_rf_train.iloc[:-shift,:]
        X_test_local = X_rf_test.iloc[:-shift,:]
    
    else:
        if shift == 0:
            X_train_local = X_rf_train
            X_test_local = X_rf_test
            y_train_local = y_rf_train
            y_test_local = y_rf_test
        else:
            X_train_local = X_rf_train.iloc[:-shift,:]
            X_test_local = X_rf_test.iloc[:-shift,:]
            y_train_local = y_rf_train.iloc[shift:]
            y_test_local = y_rf_test.iloc[shift:]
    
    bayesreg_model_fit = bayesreg_model.fit(X_train_local, y_train_local)
    y_fit_local = bayesreg_model.predict(X_train_local)
    y_hat_local = bayesreg_model.predict(X_test_local)    
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_train_local)),
                                 pd.Series(np.array(y_fit_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Bayes Regression - Training Sample')
    plt.tight_layout()
    plt.show()
    
    sns.lineplot(data=pd.concat([pd.Series(np.array(y_test_local)),
                                 pd.Series(np.array(y_hat_local))], 
                                 axis = 1))
    plt.legend(['Observed', 'Fitted'])
    plt.title('Bayes Regression - Test Sample')
    plt.tight_layout()
    plt.show()
    
    df_y_fit_bayesreg[p][(-len(y_rf_train)+shift):] = y_fit_local
    df_y_hat_bayesreg[p][(-len(y_rf_test)+shift):] = y_hat_local
        
    df_MSE.loc[str_model, p] = MSE(y_test_local, y_hat_local)
    df_MAE.loc[str_model, p] = MAE(y_test_local, y_hat_local)
    df_RMSE.loc[str_model, p] = RMSE(y_test_local, y_hat_local)
    df_MAPE.loc[str_model, p] = MAPE(y_test_local, y_hat_local)
    df_CS.loc[str_model, p] = cos_sim(y_test_local, y_hat_local)
    
    k = k + 1

In [None]:
#####################################################################################################################
#                                                                                                                   #
# Out-of-Sample Performance Comparison                                                                              #
#                                                                                                                   #
#####################################################################################################################

# Saves matrices

df_MSE.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' MSE.csv')
df_MAE.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' MAE.csv')
df_RMSE.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' RMSE.csv')
df_MAPE.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' MAPE.csv')
df_CS.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' CS.csv')

df_y_fit_lstm_m1.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_lstm_m1.csv')
df_y_hat_lstm_m1.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_lstm_m1.csv')

df_y_fit_lstm_m2.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_lstm_m2.csv')
df_y_hat_lstm_m2.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_lstm_m2.csv')

df_y_fit_lstm_m3.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_lstm_m3.csv')
df_y_hat_lstm_m3.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_lstm_m3.csv')

df_y_fit_lstm_m4.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_lstm_m4.csv')
df_y_hat_lstm_m4.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_lstm_m4.csv')

df_y_fit_lstm_m5.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_lstm_m5.csv')
df_y_hat_lstm_m5.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_lstm_m5.csv')

df_y_fit_lstm_m6.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_lstm_m6.csv')
df_y_hat_lstm_m6.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_lstm_m6.csv')

df_y_fit_mlp.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_mlp.csv')
df_y_hat_mlp.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_mlp.csv')

df_y_fit_mlp2.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_mlp2.csv')
df_y_hat_mlp2.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_mlp2.csv')

df_y_fit_rw.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_rw.csv')
df_y_hat_rw.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_rw.csv')

df_y_fit_ridge.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_ridge.csv')
df_y_hat_ridge.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_ridge.csv')

df_y_fit_ridge_cv.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_ridge_cv.csv')
df_y_hat_ridge_cv.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_ridge_cv.csv')

df_y_fit_bridge.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_bridge.csv')
df_y_hat_bridge.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_bridge.csv')

df_y_fit_lasso.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_lasso.csv')
df_y_hat_lasso.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_lasso.csv')

df_y_fit_lasso_cv.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_lasso_cv.csv')
df_y_hat_lasso_cv.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_lasso_cv.csv')

df_y_fit_blasso.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_blasso.csv')
df_y_hat_blasso.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_blasso.csv')

df_y_fit_enet.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_enet.csv')
df_y_hat_enet.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_enet.csv')

df_y_fit_svr.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_svr.csv')
df_y_hat_svr.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_svr.csv')

df_y_fit_rf.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_rf.csv')
df_y_hat_rf.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_rf.csv')

df_y_fit_bart.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_bart.csv')
df_y_hat_bart.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_bart.csv')

df_y_fit_bagging.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_bagging.csv')
df_y_hat_bagging.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_bagging.csv')

df_y_fit_ma.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_ma.csv')
df_y_hat_ma.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_ma.csv')

df_y_fit_sarima.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_sarima.csv')
df_y_hat_sarima.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_sarima.csv')

df_y_fit_huber.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_huber.csv')
df_y_hat_huber.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_huber.csv')

df_y_fit_ts.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_ts.csv')
df_y_hat_ts.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_ts.csv')

df_y_fit_factor.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_factor.csv')
df_y_hat_factor.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_factor.csv')

df_y_fit_gradboost.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_gradboost.csv')
df_y_hat_gradboost.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_gradboost.csv')

df_y_fit_adaboost.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_adaboost.csv')
df_y_hat_adaboost.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_adaboost.csv')

df_y_fit_arfima.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_arfima.csv')
df_y_hat_arfima.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_arfima.csv')

df_y_fit_garch.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_garch.csv')
df_y_hat_garch.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_garch.csv')

df_y_fit_setar.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_setar.csv')
df_y_hat_setar.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_setar.csv')

df_y_fit_bayesreg.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_bayesreg.csv')
df_y_hat_bayesreg.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_bayesreg.csv')

df_y_fit_knn.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_fit_knn.csv')
df_y_hat_knn.to_csv(path_or_buf = str_Dir_Results + str(rnd_state) + ' y_hat_knn.csv')