In [None]:
# R Path - required by rpy2 prior to importing libraries
import os
os.environ["R_HOME"] = "D:\R-4.0.2"

# Basics
import numpy as np
import pandas as pd
import scipy as sp
from scipy import stats
from scipy.stats import jarque_bera
from datetime import datetime
import xarray as xr

# Data
from pandas_datareader.data import DataReader

# R
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import r, DataFrame, FloatVector
from rpy2.robjects import pandas2ri

# Kalman Filter
from pykalman import KalmanFilter

# Wavelets
import pywt

# Machine Learning - tensorflow, keras, and sklearn
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras import backend as K
from keras import losses
from keras.models import Sequential, Model
from keras.layers import Input, Dense, LSTM, ConvLSTM2D, Flatten, BatchNormalization, Lambda 
from keras.layers import MaxPooling2D, MaxPooling3D, Conv3D, RepeatVector, TimeDistributed
from sklearn.neural_network import MLPRegressor

# SKLearn Models
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.linear_model import Lasso, LassoCV
from sklearn.linear_model import ElasticNetCV
from sklearn.linear_model import BayesianRidge
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import export_graphviz
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import scale 
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression

# Statsmodels
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller 
from statsmodels.tsa.statespace.sarimax import SARIMAX 
from statsmodels.stats.diagnostic import acorr_ljungbox 
from statsmodels.stats.diagnostic import het_arch
from statsmodels.tsa.api import VAR
from statsmodels.tsa.vector_ar.svar_model import SVAR
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.vector_ar.vecm import VECM
from statsmodels.tsa.vector_ar.vecm import select_coint_rank
from statsmodels.tsa.vector_ar.output import VARSummary

# Univariate GARCH
from arch import arch_model

# Plots
from matplotlib import pyplot as plt
from matplotlib import dates as md
import seaborn as sns
from matplotlib.ticker import PercentFormatter

# Misc
import pydot
import warnings


In [None]:
#####################################################################################################################
#                                                                                                                   #
# Constants and Parameters                                                                                          #
#                                                                                                                   #
#####################################################################################################################

# Folders and worksheet names
str_Dir_Plan_FRED = 'D:/Estudos/2 Mestrado/Insper/Dissertação/Dados/'
str_Dir_Plan_Data = 'C:/Users/alext/Desktop/Inflation Temp/'
str_Dir_Plan_PC = 'D:/Estudos/2 Mestrado/Insper/Dissertação/Dados/PC/'
str_Nome_Plan_FRED_MD = 'FRED_MD_2020_04'
str_Nome_Plan_FRED_QD = 'FRED_QD_2020_04'
str_Nome_Plan_FRED_MD_Desc = 'Data_Description_MD'
str_Nome_Plan_FRED_QD_Desc = 'Data_Description_QD'

# How to display plots
%matplotlib inline 
plt.rcParams['figure.dpi'] = 200 # Plot resolution (dpi)

# Required to convert datatypes from Python to R and vice-versa
pandas2ri.activate()

# Remove warnings
warnings.filterwarnings('ignore')

# Color style (plots)
sns.set(color_codes = True)

# Statistical significance for hypothesis testing
# Using 1% due to the high number of tests carried out
alfa = 0.01

# Test size (share of observations used to build the test sample)
share_test_size = 0.20

# Validation sample size (share of observations used to build the validation sample)
share_validation_size = 0.20

# Number of lags considered when splitting the data - see LSTM models
n_lags_lstm = 4

# Number of lags considered when splitting the data - see ConvLSTM models
n_lags_conv = 4

# Number of sequences into which sample are broken when fitting ConvLSTM
# Note: n_lags = n_seq * n_steps
n_seq_conv = 1

# Size of each sequence into which sample are broken when fitting ConvLSTM
# Note: n_lags = n_seq * n_steps
n_steps_conv = int(n_lags_conv / n_seq_conv)

# Activation function
act_fun = 'selu'


In [None]:
#####################################################################################################################
#                                                                                                                   #
# Auxiliary Functions                                                                                               #
#                                                                                                                   #
#####################################################################################################################

# Split a univariate sequence into samples
def split_sequence_uni(sequence, n_steps, per_ahead, cum = False):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix + per_ahead - 1 > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        if cum == False:
            seq_x, seq_y = sequence[i:end_ix], sequence[end_ix + per_ahead - 1]
        else:
            seq_x, seq_y = sequence[i:end_ix], np.sum(sequence[end_ix:(end_ix + per_ahead)])
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

# Split a multivariate sequence into samples
def split_sequence_mult(sequences, n_steps, per_ahead, cum = False):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix + per_ahead - 1 > len(sequences)-1:
            break
        # gather input and output parts of the pattern
        if cum == False:
            seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix + per_ahead - 1, -1]
        else:
            seq_x, seq_y = sequences[i:end_ix, :-1], np.sum(sequences[end_ix:(end_ix + per_ahead), -1])
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

# Kalman filter regression
# If EM = True, then EM algorithm is used for estimation
# delta is related to the variance of the betas. Delta -> 1 makes betas more volatile, which may lead to overfitting.
# However, delta -> 0 may increase the MSE.

def KFReg(X, y, delta, obs_cov, init_mean, init_cov, EM = False):
    n_features = X.shape[1]
    obs_mat = X[:, np.newaxis, :]
    if EM == False:
        trans_cov = (delta/(1 - delta))*np.eye(n_features)
        kf = KalmanFilter(n_dim_obs = 1, n_dim_state = n_features, 
                          initial_state_mean = init_mean,
                          initial_state_covariance = init_cov,
                          transition_matrices = np.eye(n_features),
                          observation_matrices = obs_mat,
                          observation_covariance = obs_cov,
                          transition_covariance = trans_cov)
        state_means, state_covs = kf.filter(y)
    else:
        kf = KalmanFilter(n_dim_obs = 1, n_dim_state = n_features, 
                          initial_state_mean = init_mean, 
                          initial_state_covariance = init_cov,
                          observation_matrices = obs_mat)
    state_means, state_covs = kf.em(y).filter(y)
    return state_means, state_covs, kf

# Mean Absolute Error
def MAE(y_obs, y_hat):
    return np.mean(np.abs(y_obs - y_hat))

# Mean Squared Error
def MSE(y_obs, y_hat):
    return np.mean((y_obs - y_hat)**2)

# RMSE
def RMSE(y_obs, y_hat):
    return np.sqrt(MSE(y_obs, y_hat))

def MAPE(y_obs, y_hat):
    return np.mean(np.abs(y_obs - y_hat)/y_obs)

def cos_sim(y_obs, y_hat):
    return np.dot(y_obs, y_hat)/(np.linalg.norm(y_obs)*np.linalg.norm(y_hat))

def R2(y_obs, y_hat):
    SSR = np.sum((y_obs - y_hat)**2)
    SST = np.sum((y_obs - np.mean(y_obs))**2)
    return (1 - SSR/SST)

# Variational autoencoder
# Use those parameters to sample new points from the latent space:
# reparameterization trick
# instead of sampling from Q(z|X), sample epsilon = N(0,I)
# z = z_mean + sqrt(var) * epsilon
def sampling(args):
    """Reparameterization trick by sampling from an isotropic unit Gaussian.
    # Arguments
        args (tensor): mean and log of variance of Q(z|X)
    # Returns
        z (tensor): sampled latent vector
    """
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean = 0 and std = 1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

# Variational autoencoder
# As in the Keras tutorial, we define a custom loss function:
def vae_loss(x, x_decoded_mean):
    xent_loss = losses.binary_crossentropy(x, x_decoded_mean)
    kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    return xent_loss + kl_loss

# Variational autoencoder
def vae(X_train, X_test, intermediate_dim, latent_dim, batch_size, epochs, verbose, plot_name):
    
    original_dim = X_train.shape[1]
    input_shape = (original_dim, )
    
    # Map inputs to the latent distribution parameters:
    # VAE model = encoder + decoder
    # build encoder model
    inputs = Input(shape=input_shape, name='encoder_input')
    x = Dense(intermediate_dim, activation='relu', name='intermediate_encoding')(inputs)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)

    # use reparameterization trick to push the sampling out as input
    # note that "output_shape" isn't necessary with the TensorFlow backend
    # z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
    z = Lambda(sampling, name='z')([z_mean, z_log_var])

    # Instantiate the encoder model:
    # encoder = Model(inputs, z_mean)
    encoder = Model(inputs=inputs, outputs=[z_mean, z_log_var, z], name='encoder')
    #encoder = Model(inputs=inputs, outputs=z, name='encoder')
    encoder.summary()

    # Build the decoder model:
    latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
    x = Dense(intermediate_dim, activation='relu', name='intermediate_decoding')(latent_inputs)
    outputs = Dense(original_dim, activation='sigmoid')(x)

    # Instantiate the decoder model:
    decoder = Model(latent_inputs, outputs, name='decoder')
    decoder.summary()

    # Instantiate the VAE model:
    outputs = decoder(encoder(inputs)[2])
    # outputs = decoder(encoder(inputs))
    vae = Model(inputs, outputs, name='vae_mlp')
    
    # Loss function
    reconstruction_loss = losses.binary_crossentropy(inputs, outputs)
    reconstruction_loss *= original_dim
    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    vae_loss = K.mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)

    # We compile the model:
    # vae.compile(optimizer='rmsprop', loss=vae_loss)
    vae.compile(optimizer='rmsprop', loss=None)
    
    # Finally, we train the model:
    tf.config.run_functions_eagerly(True)
    results = vae.fit(X_train, X_train,
            shuffle=True,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=0.1,
            verbose=verbose)
    tf.config.run_functions_eagerly(False)
    
    '''
    # Plot training & validation loss values
    plt.plot(results.history['loss'])
    plt.plot(results.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper right')
    plt.savefig(plot_name + '_loss')
    plt.close()
    '''

    # Use the encoded layer to encode the training input
    encoded_data = encoder.predict(X_train)[2]
    print(encoded_data)
    encoded_data = pd.DataFrame(data = encoded_data, 
                                index = X_train.index, 
                                columns = ["PC_vae" + str(i) for i in np.arange(0, latent_dim)])
    
    '''
    # Plots encoded data
    sns.lineplot(data = encoded_data)
    plt.savefig(plot_name + '_PC')
    plt.close()
    '''
    
    # Correlation matrix
    print(encoded_data.corr())
    
    # Encoded data
    X_train_encoded_vae = encoded_data
    encoded_data = encoder.predict(X_test)[2]
    encoded_data = pd.DataFrame(data = encoded_data, 
                                index = X_test.index, 
                                columns = ["PC_vae" + str(i) for i in np.arange(0, latent_dim)])
    X_test_encoded_vae = encoded_data
    
    X_train_encoded_vae.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + plot_name + '_train.csv')
    X_test_encoded_vae.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + plot_name + '_test.csv')

    return X_train_encoded_vae, X_test_encoded_vae, vae

def deep_ae(X_train, X_test, intermediate_dim, latent_dim, batch_size, epochs, verbose, plot_name):
    
    # Number of time series
    input_dim = X_train.shape[1]
    
    # Dimension of encoding units (roughly equivalent to principal components)
    encoding_dim1 = intermediate_dim
    encoding_dim2 = latent_dim
    
    # Autoencoder architecture
    input_img = Input(shape=(input_dim,), name = 'encoder_input')
    encoded_partial = Dense(encoding_dim1, activation = "selu", name = 'intermediate_encoding')(input_img)
    encoded = Dense(encoding_dim2, activation="selu", name = 'encoding_layer')(encoded_partial)
    decoded_partial = Dense(encoding_dim1, activation="selu", name = 'intermediate_decoding')(encoded)
    decoded = Dense(input_dim, activation="selu", name = 'decoding_layer')(decoded_partial)
    autoencoder = Model(input_img, decoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    print(autoencoder.summary())
    
    # Fits the autoencoder
    hist_autoencoder = autoencoder.fit(X_train, X_train,
                epochs=epochs,
                batch_size=batch_size,
                shuffle=True,
                validation_split=0.1,
                verbose=verbose)
    
    # Use the encoded layer to encode the training input
    encoder = Model(input_img, encoded)
    encoded_input = Input(shape=(encoding_dim1,))
    decoder_layer = autoencoder.layers[-1]
    decoder = Model(encoded_input, decoder_layer(encoded_input))
    encoded_data = encoder.predict(X_train)
    
    '''
    # Plots loss function
    plt.plot(hist_autoencoder.history['loss'])
    plt.plot(hist_autoencoder.history['val_loss'])
    plt.title('Model Train vs. Validation Loss (MSE)')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper right')
    plt.savefig(plot_name + '_loss')
    plt.close()
    '''
    
    # Converts encoded data to a labeled dataframe
    encoded_data = pd.DataFrame(data = encoded_data, 
                                index = X_train.index, 
                                columns = ["PC_ae" + str(i) for i in np.arange(0, encoding_dim2)])
    
    '''
    # Plots encoded data
    sns.lineplot(data = encoded_data)
    plt.savefig(plot_name + '_PC')
    plt.close()
    '''

    # Correlation matrix
    encoded_data.corr()

    # Stores the encoded data
    X_train_encoded_ae = encoded_data
    encoded_data = encoder.predict(X_test)
    encoded_data = pd.DataFrame(data = encoded_data, 
                                index = X_test.index, 
                                columns = ["PC_ae" + str(i) for i in np.arange(0, encoding_dim2)])
    X_test_encoded_ae = encoded_data
    
    X_train_encoded_ae.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + plot_name + '_train.csv')
    X_test_encoded_ae.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + plot_name + '_test.csv')

    return X_train_encoded_ae, X_test_encoded_ae

def pca_decomp(X_train, X_test, threshold, plot_name):
    
    # Runs PCA for the maximum number of components possible
    n_series = X_train.shape[1]
    pca = PCA(n_components = n_series, svd_solver = 'full')
    pca.fit(X_train)
    
    # Selects the number of PCs required for explained variance > threshold
    total_var = 0
    n_comp = 0
    for var in pca.explained_variance_ratio_:
        total_var = var + total_var
        n_comp = n_comp + 1
        if total_var > threshold:
            break
    
    # Runs PCA for the number of components selected
    pca = PCA(n_components = n_comp, svd_solver = 'full')
    pca.fit(X_train)
    
    # Applies transformation to training data
    X_train_pca = pca.transform(X_train)
    X_train_pca = X_train_pca.reshape(X_train_pca.shape[0], n_comp)
    X_train_pca = pd.DataFrame(data = X_train_pca, 
                           index = X_train.index, 
                           columns = ["PC" + str(i) for i in np.arange(0,n_comp)])
    
    # Applies transformation to test data
    X_test_pca = pca.transform(X_test)
    X_test_pca = X_test_pca.reshape(X_test_pca.shape[0], n_comp)
    X_test_pca = pd.DataFrame(data = X_test_pca, 
                           index = X_test.index, 
                           columns = ["PC" + str(i) for i in np.arange(0,n_comp)])
    
    '''
    # Plots training data
    sns.lineplot(data = X_train_pca)
    plt.savefig(plot_name + '_PC')
    plt.close()
    '''

    # Correlation matrix
    X_train_pca.corr()
    
    # Saves results
    X_train_pca.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + plot_name + '_train.csv')
    X_test_pca.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + plot_name + '_test.csv')
    
    return X_train_pca, X_test_pca


In [None]:
#####################################################################################################################
#                                                                                                                   #
# Data                                                                                                              #
#                                                                                                                   #
#####################################################################################################################

usrec = DataReader('USREC', 'fred', start=datetime(1947, 1, 1), end=datetime(2019, 4, 1))

df_FRED_MD = pd.read_csv(filepath_or_buffer = str_Dir_Plan_FRED + str_Nome_Plan_FRED_MD + '.csv', sep = ';')
df_FRED_MD.index = pd.to_datetime(df_FRED_MD.iloc[:,0])
df_FRED_MD = df_FRED_MD.drop(columns = 'Date')
df_FRED_MD.head()

df_FRED_MD.tail()

df_FRED_QD = pd.read_csv(filepath_or_buffer = str_Dir_Plan_FRED + str_Nome_Plan_FRED_QD + '.csv', sep = ';')
df_FRED_QD.index = pd.to_datetime(df_FRED_QD.iloc[:,0])
df_FRED_QD = df_FRED_QD.drop(columns = 'Date')
df_FRED_QD.head()

df_FRED_QD.tail()

df_FRED_Desc_MD = pd.read_csv(filepath_or_buffer = str_Dir_Plan_FRED + str_Nome_Plan_FRED_MD_Desc + '.csv', sep = ';')
df_FRED_Desc_MD = df_FRED_Desc_MD.drop(columns = 'Index')
df_FRED_Desc_MD.head()

df_FRED_Desc_MD.tail()

df_FRED_Desc_QD = pd.read_csv(filepath_or_buffer = str_Dir_Plan_FRED + str_Nome_Plan_FRED_QD_Desc + '.csv', sep = ';')
df_FRED_Desc_QD = df_FRED_Desc_QD.drop(columns = 'Index')
df_FRED_Desc_QD.head()

df_FRED_Desc_QD.tail()

df_FRED_MD.describe()
df_FRED_QD.describe()

# Data transformation according to McCraken and Ng (2016)
# Monthly database

qty_series = df_FRED_MD.shape[1]
df_FRED_MD_t = df_FRED_MD.copy()

for i in range(0,qty_series):
    str_transf = df_FRED_Desc_MD.iloc[i,2]
    str_ticker = df_FRED_Desc_MD.iloc[i,3]
    col_ticker = np.where(df_FRED_MD_t.columns == str_ticker)
    if len(col_ticker[0]) > 0:
        col_ticker = col_ticker[0][0]
        df_series = df_FRED_MD_t.iloc[:,col_ticker]
        if str_transf == "First difference of log":
            df_FRED_MD_t.iloc[:,col_ticker] = np.log(df_series).diff()
        elif str_transf == "First difference":
            df_FRED_MD_t.iloc[:,col_ticker] = df_series.diff()
        elif str_transf == "Log":
            df_FRED_MD_t.iloc[:,col_ticker] = np.log(df_series)
        elif str_transf == "Second difference of log":
            df_FRED_MD_t.iloc[:,col_ticker] = np.log(df_series).diff().diff()
        elif str_transf == "Second difference":
            df_FRED_MD_t.iloc[:,col_ticker] = df_series.diff().diff()
        elif str_transf == "First difference of (ratio - 1)":
            df_FRED_MD_t.iloc[:,col_ticker] = df_series.pct_change().diff()

df_FRED_MD_t = df_FRED_MD_t.iloc[2:,:] # Removes the first 2 rows due to differencing
df_FRED_MD_t.head()

# Normalization (mean = 0 and std = 1)
df_FRED_MD_t_norm = pd.DataFrame(data = scale(df_FRED_MD_t), 
                                 index = df_FRED_MD_t.index, 
                                 columns = df_FRED_MD_t.columns)
df_FRED_MD_t_norm.head()

# Data preparation using normalized data

# Exclude nan from the transformed time series
df_FRED_MD_t_norm_ex_nan = df_FRED_MD_t_norm.dropna()

# Dataframe containing inflation time series only
# CPIAUCSL - Consumer Price Index for All Urban Consumers: All Items
df_cpi_t_norm = df_FRED_MD_t_norm_ex_nan["CPIAUCSL"]

# Dataframe excluding inflation time series
df_FRED_MD_t_norm_ex_nan_inf = df_FRED_MD_t_norm_ex_nan.drop(columns = ["CPIAUCSL","CPIAPPSL","CPITRNSL",
                                        "CPIMEDSL","CUSR0000SAC","CUSR0000SAD","CUSR0000SAS","CPIULFSL"])

# Dataframes containing shifted time series
df_FRED_MD_t_norm_ex_nan_L1 = df_FRED_MD_t_norm_ex_nan.shift(1).dropna()
df_FRED_MD_t_norm_ex_nan_L2 = df_FRED_MD_t_norm_ex_nan.shift(2).dropna()
df_FRED_MD_t_norm_ex_nan_L3 = df_FRED_MD_t_norm_ex_nan.shift(3).dropna()
df_FRED_MD_t_norm_ex_nan_L4 = df_FRED_MD_t_norm_ex_nan.shift(4).dropna()
df_FRED_MD_t_norm_ex_nan_L5 = df_FRED_MD_t_norm_ex_nan.shift(5).dropna()
df_FRED_MD_t_norm_ex_nan_L6 = df_FRED_MD_t_norm_ex_nan.shift(6).dropna()
df_FRED_MD_t_norm_ex_nan_L7 = df_FRED_MD_t_norm_ex_nan.shift(7).dropna()
df_FRED_MD_t_norm_ex_nan_L8 = df_FRED_MD_t_norm_ex_nan.shift(8).dropna()
df_FRED_MD_t_norm_ex_nan_L9 = df_FRED_MD_t_norm_ex_nan.shift(9).dropna()
df_FRED_MD_t_norm_ex_nan_L10 = df_FRED_MD_t_norm_ex_nan.shift(10).dropna()
df_FRED_MD_t_norm_ex_nan_L11 = df_FRED_MD_t_norm_ex_nan.shift(11).dropna()
df_FRED_MD_t_norm_ex_nan_L12 = df_FRED_MD_t_norm_ex_nan.shift(12).dropna()

# Dataframes containing shifted time series - excluding inflation time series
df_FRED_MD_t_norm_ex_nan_inf_L1 = df_FRED_MD_t_norm_ex_nan_inf.shift(1).dropna()
df_FRED_MD_t_norm_ex_nan_inf_L2 = df_FRED_MD_t_norm_ex_nan_inf.shift(2).dropna()
df_FRED_MD_t_norm_ex_nan_inf_L3 = df_FRED_MD_t_norm_ex_nan_inf.shift(3).dropna()
df_FRED_MD_t_norm_ex_nan_inf_L4 = df_FRED_MD_t_norm_ex_nan_inf.shift(4).dropna()
df_FRED_MD_t_norm_ex_nan_inf_L5 = df_FRED_MD_t_norm_ex_nan_inf.shift(5).dropna()
df_FRED_MD_t_norm_ex_nan_inf_L6 = df_FRED_MD_t_norm_ex_nan_inf.shift(6).dropna()
df_FRED_MD_t_norm_ex_nan_inf_L7 = df_FRED_MD_t_norm_ex_nan_inf.shift(7).dropna()
df_FRED_MD_t_norm_ex_nan_inf_L8 = df_FRED_MD_t_norm_ex_nan_inf.shift(8).dropna()
df_FRED_MD_t_norm_ex_nan_inf_L9 = df_FRED_MD_t_norm_ex_nan_inf.shift(9).dropna()
df_FRED_MD_t_norm_ex_nan_inf_L10 = df_FRED_MD_t_norm_ex_nan_inf.shift(10).dropna()
df_FRED_MD_t_norm_ex_nan_inf_L11 = df_FRED_MD_t_norm_ex_nan_inf.shift(11).dropna()
df_FRED_MD_t_norm_ex_nan_inf_L12 = df_FRED_MD_t_norm_ex_nan_inf.shift(12).dropna()

# Grouping series according to their classification

# Output and income
df_FRED_MD_t_norm_ex_nan_OI = pd.DataFrame()
df_FRED_MD_t_norm_ex_nan_OI_L1 = pd.DataFrame()

# Labor market
df_FRED_MD_t_norm_ex_nan_LM = pd.DataFrame()
df_FRED_MD_t_norm_ex_nan_LM_L1 = pd.DataFrame()

# Housing
df_FRED_MD_t_norm_ex_nan_H = pd.DataFrame()
df_FRED_MD_t_norm_ex_nan_H_L1 = pd.DataFrame()

# Consumption, Orders, and Inventories
df_FRED_MD_t_norm_ex_nan_COI = pd.DataFrame()
df_FRED_MD_t_norm_ex_nan_COI_L1 = pd.DataFrame()

# Money and Credit
df_FRED_MD_t_norm_ex_nan_MC = pd.DataFrame()
df_FRED_MD_t_norm_ex_nan_MC_L1 = pd.DataFrame()

# Interest and Exchange Rates
df_FRED_MD_t_norm_ex_nan_INTFX = pd.DataFrame()
df_FRED_MD_t_norm_ex_nan_INTFX_L1 = pd.DataFrame()

# Prices
df_FRED_MD_t_norm_ex_nan_P = pd.DataFrame()
df_FRED_MD_t_norm_ex_nan_P_L1 = pd.DataFrame()

# Stock Market
df_FRED_MD_t_norm_ex_nan_S = pd.DataFrame()
df_FRED_MD_t_norm_ex_nan_S_L1 = pd.DataFrame()

for i in range(0,qty_series):
    str_group = df_FRED_Desc_MD.iloc[i,0]
    str_ticker = df_FRED_Desc_MD.iloc[i,3]
    col_ticker = np.where(df_FRED_MD_t_norm_ex_nan.columns == str_ticker)
    if len(col_ticker[0]) > 0:
        col_ticker = col_ticker[0][0]
        df_series = df_FRED_MD_t_norm_ex_nan.iloc[:,col_ticker]
        if str_group == "Output and Income":
            df_FRED_MD_t_norm_ex_nan_OI = pd.concat([df_FRED_MD_t_norm_ex_nan_OI, df_series], axis = 1)
            #df_FRED_MD_t_norm_ex_nan_OI_L1 = pd.concat([df_FRED_MD_t_norm_ex_nan_OI_L1, 
            #                                           df_series.shift(1).dropna()], axis = 1)
        elif str_group == "Labor Market":
            df_FRED_MD_t_norm_ex_nan_LM = pd.concat([df_FRED_MD_t_norm_ex_nan_LM, df_series], axis = 1)
            #df_FRED_MD_t_norm_ex_nan_LM_L1 = pd.concat([df_FRED_MD_t_norm_ex_nan_LM_L1, 
            #                                            df_series.shift(1).dropna()], axis = 1)
        elif str_group == "Housing":
            df_FRED_MD_t_norm_ex_nan_H = pd.concat([df_FRED_MD_t_norm_ex_nan_H, df_series], axis = 1)
            #df_FRED_MD_t_norm_ex_nan_H_L1 = pd.concat([df_FRED_MD_t_norm_ex_nan_H_L1, 
            #                                            df_series.shift(1).dropna()], axis = 1)
        elif str_group == "Consumption, Orders and Inventories":
            df_FRED_MD_t_norm_ex_nan_COI = pd.concat([df_FRED_MD_t_norm_ex_nan_COI, df_series], axis = 1)
            #df_FRED_MD_t_norm_ex_nan_COI_L1 = pd.concat([df_FRED_MD_t_norm_ex_nan_COI_L1, 
            #                                            df_series.shift(1).dropna()], axis = 1)
        elif str_group == "Money and Credit":
            df_FRED_MD_t_norm_ex_nan_MC = pd.concat([df_FRED_MD_t_norm_ex_nan_MC, df_series], axis = 1)
            #df_FRED_MD_t_norm_ex_nan_MC_L1 = pd.concat([df_FRED_MD_t_norm_ex_nan_MC_L1, 
            #                                            df_series.shift(1).dropna()], axis = 1)
        elif str_group == "Interest and Exchange Rates":
            df_FRED_MD_t_norm_ex_nan_INTFX = pd.concat([df_FRED_MD_t_norm_ex_nan_INTFX, df_series], axis = 1)
            #df_FRED_MD_t_norm_ex_nan_INTFX_L1 = pd.concat([df_FRED_MD_t_norm_ex_nan_INTFX_L1, 
            #                                            df_series.shift(1).dropna()], axis = 1)
        elif str_group == "Prices":
            df_FRED_MD_t_norm_ex_nan_P = pd.concat([df_FRED_MD_t_norm_ex_nan_P, df_series], axis = 1)
            #df_FRED_MD_t_norm_ex_nan_P_L1 = pd.concat([df_FRED_MD_t_norm_ex_nan_P_L1, 
            #                                            df_series.shift(1).dropna()], axis = 1)
        elif str_group == "Stock Market":
            df_FRED_MD_t_norm_ex_nan_S = pd.concat([df_FRED_MD_t_norm_ex_nan_S, df_series], axis = 1)
            #df_FRED_MD_t_norm_ex_nan_S_L1 = pd.concat([df_FRED_MD_t_norm_ex_nan_S_L1, 
            #                                            df_series.shift(1).dropna()], axis = 1)

df_FRED_MD_t_norm_ex_nan_OI_L1 = df_FRED_MD_t_norm_ex_nan_OI.shift(1).dropna()
df_FRED_MD_t_norm_ex_nan_LM_L1 = df_FRED_MD_t_norm_ex_nan_LM.shift(1).dropna()
df_FRED_MD_t_norm_ex_nan_H_L1 = df_FRED_MD_t_norm_ex_nan_H.shift(1).dropna()
df_FRED_MD_t_norm_ex_nan_COI_L1 = df_FRED_MD_t_norm_ex_nan_COI.shift(1).dropna()
df_FRED_MD_t_norm_ex_nan_MC_L1 = df_FRED_MD_t_norm_ex_nan_MC.shift(1).dropna()
df_FRED_MD_t_norm_ex_nan_INTFX_L1 = df_FRED_MD_t_norm_ex_nan_INTFX.shift(1).dropna()
df_FRED_MD_t_norm_ex_nan_P_L1 = df_FRED_MD_t_norm_ex_nan_P.shift(1).dropna()
df_FRED_MD_t_norm_ex_nan_S_L1 = df_FRED_MD_t_norm_ex_nan_S.shift(1).dropna()

df_FRED_MD_t_norm_ex_nan_OI_L2 = df_FRED_MD_t_norm_ex_nan_OI.shift(2).dropna()
df_FRED_MD_t_norm_ex_nan_LM_L2 = df_FRED_MD_t_norm_ex_nan_LM.shift(2).dropna()
df_FRED_MD_t_norm_ex_nan_H_L2 = df_FRED_MD_t_norm_ex_nan_H.shift(2).dropna()
df_FRED_MD_t_norm_ex_nan_COI_L2 = df_FRED_MD_t_norm_ex_nan_COI.shift(2).dropna()
df_FRED_MD_t_norm_ex_nan_MC_L2 = df_FRED_MD_t_norm_ex_nan_MC.shift(2).dropna()
df_FRED_MD_t_norm_ex_nan_INTFX_L2 = df_FRED_MD_t_norm_ex_nan_INTFX.shift(2).dropna()
df_FRED_MD_t_norm_ex_nan_P_L2 = df_FRED_MD_t_norm_ex_nan_P.shift(2).dropna()
df_FRED_MD_t_norm_ex_nan_S_L2 = df_FRED_MD_t_norm_ex_nan_S.shift(2).dropna()

df_FRED_MD_t_norm_ex_nan_OI_L3 = df_FRED_MD_t_norm_ex_nan_OI.shift(3).dropna()
df_FRED_MD_t_norm_ex_nan_LM_L3 = df_FRED_MD_t_norm_ex_nan_LM.shift(3).dropna()
df_FRED_MD_t_norm_ex_nan_H_L3 = df_FRED_MD_t_norm_ex_nan_H.shift(3).dropna()
df_FRED_MD_t_norm_ex_nan_COI_L3 = df_FRED_MD_t_norm_ex_nan_COI.shift(3).dropna()
df_FRED_MD_t_norm_ex_nan_MC_L3 = df_FRED_MD_t_norm_ex_nan_MC.shift(3).dropna()
df_FRED_MD_t_norm_ex_nan_INTFX_L3 = df_FRED_MD_t_norm_ex_nan_INTFX.shift(3).dropna()
df_FRED_MD_t_norm_ex_nan_P_L3 = df_FRED_MD_t_norm_ex_nan_P.shift(3).dropna()
df_FRED_MD_t_norm_ex_nan_S_L3 = df_FRED_MD_t_norm_ex_nan_S.shift(3).dropna()

df_FRED_MD_t_norm_ex_nan_OI_L4 = df_FRED_MD_t_norm_ex_nan_OI.shift(4).dropna()
df_FRED_MD_t_norm_ex_nan_LM_L4 = df_FRED_MD_t_norm_ex_nan_LM.shift(4).dropna()
df_FRED_MD_t_norm_ex_nan_H_L4 = df_FRED_MD_t_norm_ex_nan_H.shift(4).dropna()
df_FRED_MD_t_norm_ex_nan_COI_L4 = df_FRED_MD_t_norm_ex_nan_COI.shift(4).dropna()
df_FRED_MD_t_norm_ex_nan_MC_L4 = df_FRED_MD_t_norm_ex_nan_MC.shift(4).dropna()
df_FRED_MD_t_norm_ex_nan_INTFX_L4 = df_FRED_MD_t_norm_ex_nan_INTFX.shift(4).dropna()
df_FRED_MD_t_norm_ex_nan_P_L4 = df_FRED_MD_t_norm_ex_nan_P.shift(4).dropna()
df_FRED_MD_t_norm_ex_nan_S_L4 = df_FRED_MD_t_norm_ex_nan_S.shift(4).dropna()

df_FRED_MD_t_norm_ex_nan_OI_L12 = df_FRED_MD_t_norm_ex_nan_OI.shift(12).dropna()
df_FRED_MD_t_norm_ex_nan_LM_L12 = df_FRED_MD_t_norm_ex_nan_LM.shift(12).dropna()
df_FRED_MD_t_norm_ex_nan_H_L12 = df_FRED_MD_t_norm_ex_nan_H.shift(12).dropna()
df_FRED_MD_t_norm_ex_nan_COI_L12 = df_FRED_MD_t_norm_ex_nan_COI.shift(12).dropna()
df_FRED_MD_t_norm_ex_nan_MC_L12 = df_FRED_MD_t_norm_ex_nan_MC.shift(12).dropna()
df_FRED_MD_t_norm_ex_nan_INTFX_L12 = df_FRED_MD_t_norm_ex_nan_INTFX.shift(12).dropna()
df_FRED_MD_t_norm_ex_nan_P_L12 = df_FRED_MD_t_norm_ex_nan_P.shift(12).dropna()
df_FRED_MD_t_norm_ex_nan_S_L12 = df_FRED_MD_t_norm_ex_nan_S.shift(12).dropna()

# Index adjustment
index_ref = df_FRED_MD_t_norm_ex_nan_L12.index

# index_refnflatindex_refon
df_cpi_t_norm = df_cpi_t_norm.loc[index_ref]

# Full database
df_FRED_MD_t_norm_ex_nan = df_FRED_MD_t_norm_ex_nan.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L1 = df_FRED_MD_t_norm_ex_nan_L1.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L2 = df_FRED_MD_t_norm_ex_nan_L2.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L3 = df_FRED_MD_t_norm_ex_nan_L3.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L4 = df_FRED_MD_t_norm_ex_nan_L4.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L5 = df_FRED_MD_t_norm_ex_nan_L5.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L6 = df_FRED_MD_t_norm_ex_nan_L6.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L7 = df_FRED_MD_t_norm_ex_nan_L7.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L8 = df_FRED_MD_t_norm_ex_nan_L8.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L9 = df_FRED_MD_t_norm_ex_nan_L9.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L10 = df_FRED_MD_t_norm_ex_nan_L10.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L11 = df_FRED_MD_t_norm_ex_nan_L11.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_L12 = df_FRED_MD_t_norm_ex_nan_L12.loc[index_ref]

# Output and index_refncome
df_FRED_MD_t_norm_ex_nan_OI = df_FRED_MD_t_norm_ex_nan_OI.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_OI_L1 = df_FRED_MD_t_norm_ex_nan_OI_L1.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_OI_L2 = df_FRED_MD_t_norm_ex_nan_OI_L2.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_OI_L3 = df_FRED_MD_t_norm_ex_nan_OI_L3.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_OI_L4 = df_FRED_MD_t_norm_ex_nan_OI_L4.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_OI_L12 = df_FRED_MD_t_norm_ex_nan_OI_L12.loc[index_ref]

# Labor market
df_FRED_MD_t_norm_ex_nan_LM = df_FRED_MD_t_norm_ex_nan_LM.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_LM_L1 = df_FRED_MD_t_norm_ex_nan_LM_L1.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_LM_L2 = df_FRED_MD_t_norm_ex_nan_LM_L2.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_LM_L3 = df_FRED_MD_t_norm_ex_nan_LM_L3.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_LM_L4 = df_FRED_MD_t_norm_ex_nan_LM_L4.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_LM_L12 = df_FRED_MD_t_norm_ex_nan_LM_L12.loc[index_ref]

# Housindex_refng
df_FRED_MD_t_norm_ex_nan_H = df_FRED_MD_t_norm_ex_nan_H.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_H_L1 = df_FRED_MD_t_norm_ex_nan_H_L1.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_H_L2 = df_FRED_MD_t_norm_ex_nan_H_L2.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_H_L3 = df_FRED_MD_t_norm_ex_nan_H_L3.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_H_L4 = df_FRED_MD_t_norm_ex_nan_H_L4.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_H_L12 = df_FRED_MD_t_norm_ex_nan_H_L12.loc[index_ref]

# Consumptindex_refon, Orders, and index_refnventorindex_refes
df_FRED_MD_t_norm_ex_nan_COI = df_FRED_MD_t_norm_ex_nan_COI.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_COI_L1 = df_FRED_MD_t_norm_ex_nan_COI_L1.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_COI_L2 = df_FRED_MD_t_norm_ex_nan_COI_L2.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_COI_L3 = df_FRED_MD_t_norm_ex_nan_COI_L3.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_COI_L4 = df_FRED_MD_t_norm_ex_nan_COI_L4.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_COI_L12 = df_FRED_MD_t_norm_ex_nan_COI_L12.loc[index_ref]

# Money and Credindex_reft
df_FRED_MD_t_norm_ex_nan_MC = df_FRED_MD_t_norm_ex_nan_MC.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_MC_L1 = df_FRED_MD_t_norm_ex_nan_MC_L1.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_MC_L2 = df_FRED_MD_t_norm_ex_nan_MC_L2.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_MC_L3 = df_FRED_MD_t_norm_ex_nan_MC_L3.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_MC_L4 = df_FRED_MD_t_norm_ex_nan_MC_L4.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_MC_L12 = df_FRED_MD_t_norm_ex_nan_MC_L12.loc[index_ref]

# index_refnterest and Exchange Rates
df_FRED_MD_t_norm_ex_nan_INTFX = df_FRED_MD_t_norm_ex_nan_INTFX.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_INTFX_L1 = df_FRED_MD_t_norm_ex_nan_INTFX_L1.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_INTFX_L2 = df_FRED_MD_t_norm_ex_nan_INTFX_L2.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_INTFX_L3 = df_FRED_MD_t_norm_ex_nan_INTFX_L3.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_INTFX_L4 = df_FRED_MD_t_norm_ex_nan_INTFX_L4.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_INTFX_L12 = df_FRED_MD_t_norm_ex_nan_INTFX_L12.loc[index_ref]

# Prindex_refces
df_FRED_MD_t_norm_ex_nan_P = df_FRED_MD_t_norm_ex_nan_P.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_P_L1 = df_FRED_MD_t_norm_ex_nan_P_L1.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_P_L2 = df_FRED_MD_t_norm_ex_nan_P_L2.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_P_L3 = df_FRED_MD_t_norm_ex_nan_P_L3.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_P_L4 = df_FRED_MD_t_norm_ex_nan_P_L4.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_P_L12 = df_FRED_MD_t_norm_ex_nan_P_L12.loc[index_ref]

# Stock Market
df_FRED_MD_t_norm_ex_nan_S = df_FRED_MD_t_norm_ex_nan_S.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_S_L1 = df_FRED_MD_t_norm_ex_nan_S_L1.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_S_L2 = df_FRED_MD_t_norm_ex_nan_S_L2.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_S_L3 = df_FRED_MD_t_norm_ex_nan_S_L3.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_S_L4 = df_FRED_MD_t_norm_ex_nan_S_L4.loc[index_ref]
df_FRED_MD_t_norm_ex_nan_S_L12 = df_FRED_MD_t_norm_ex_nan_S_L12.loc[index_ref]


In [None]:
#####################################################################################################################
#                                                                                                                   #
# Training and Test Samples                                                                                         #
#                                                                                                                   #
#####################################################################################################################

# Complete database

X = df_FRED_MD_t_norm_ex_nan
X_L1 = df_FRED_MD_t_norm_ex_nan_L1
X_L2 = df_FRED_MD_t_norm_ex_nan_L2
X_L3 = df_FRED_MD_t_norm_ex_nan_L3
X_L4 = df_FRED_MD_t_norm_ex_nan_L4
X_L5 = df_FRED_MD_t_norm_ex_nan_L5
X_L6 = df_FRED_MD_t_norm_ex_nan_L6
X_L7 = df_FRED_MD_t_norm_ex_nan_L7
X_L8 = df_FRED_MD_t_norm_ex_nan_L8
X_L9 = df_FRED_MD_t_norm_ex_nan_L9
X_L10 = df_FRED_MD_t_norm_ex_nan_L10
X_L11 = df_FRED_MD_t_norm_ex_nan_L11
X_L12 = df_FRED_MD_t_norm_ex_nan_L12
y = df_cpi_t_norm

# Indices for splitting samples

for rnd_state in range(0,100):
    
    index_train, index_test = train_test_split(index_ref, test_size = share_test_size, random_state = rnd_state)
    
    # Save raw database

    X.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X.csv')
    X_L1.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L1.csv')
    X_L2.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L2.csv')
    X_L3.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L3.csv')
    X_L4.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L4.csv')
    X_L5.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L5.csv')
    X_L6.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L6.csv')
    X_L7.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L7.csv')
    X_L8.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L8.csv')
    X_L9.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L9.csv')
    X_L10.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L10.csv')
    X_L11.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L11.csv')
    X_L12.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L12.csv')
    y.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'y.csv')
    
    # Split samples
    
    y_train, y_test = y.loc[index_train], y.loc[index_test]
    X_train, X_test = X.loc[index_train], X.loc[index_test]
    X_L1_train, X_L1_test = X_L1.loc[index_train], X_L1.loc[index_test]
    X_L2_train, X_L2_test = X_L2.loc[index_train], X_L2.loc[index_test]
    X_L3_train, X_L3_test = X_L3.loc[index_train], X_L3.loc[index_test]
    X_L4_train, X_L4_test = X_L4.loc[index_train], X_L4.loc[index_test]
    X_L5_train, X_L5_test = X_L5.loc[index_train], X_L5.loc[index_test]
    X_L6_train, X_L6_test = X_L6.loc[index_train], X_L6.loc[index_test]
    X_L7_train, X_L7_test = X_L7.loc[index_train], X_L7.loc[index_test]
    X_L8_train, X_L8_test = X_L8.loc[index_train], X_L8.loc[index_test]
    X_L9_train, X_L9_test = X_L9.loc[index_train], X_L9.loc[index_test]
    X_L10_train, X_L10_test = X_L10.loc[index_train], X_L10.loc[index_test]
    X_L11_train, X_L11_test = X_L11.loc[index_train], X_L11.loc[index_test]
    X_L12_train, X_L12_test = X_L12.loc[index_train], X_L12.loc[index_test]
       
    # Normalization
    
    y_train = pd.DataFrame(data = scale(y_train), index = y_train.index)
    y_test = pd.DataFrame(data = scale(y_test), index = y_test.index)
    
    X_L1_train = pd.DataFrame(data = scale(X_L1_train), index = X_L1_train.index, columns = X_L1_train.columns)
    X_L2_train = pd.DataFrame(data = scale(X_L2_train), index = X_L2_train.index, columns = X_L2_train.columns)
    X_L3_train = pd.DataFrame(data = scale(X_L3_train), index = X_L3_train.index, columns = X_L3_train.columns)
    X_L4_train = pd.DataFrame(data = scale(X_L4_train), index = X_L4_train.index, columns = X_L4_train.columns)
    X_L5_train = pd.DataFrame(data = scale(X_L5_train), index = X_L5_train.index, columns = X_L5_train.columns)
    X_L6_train = pd.DataFrame(data = scale(X_L6_train), index = X_L6_train.index, columns = X_L6_train.columns)
    X_L7_train = pd.DataFrame(data = scale(X_L7_train), index = X_L7_train.index, columns = X_L7_train.columns)
    X_L8_train = pd.DataFrame(data = scale(X_L8_train), index = X_L8_train.index, columns = X_L8_train.columns)
    X_L9_train = pd.DataFrame(data = scale(X_L9_train), index = X_L9_train.index, columns = X_L9_train.columns)
    X_L10_train = pd.DataFrame(data = scale(X_L10_train), index = X_L10_train.index, columns = X_L10_train.columns)
    X_L11_train = pd.DataFrame(data = scale(X_L11_train), index = X_L11_train.index, columns = X_L11_train.columns)
    X_L12_train = pd.DataFrame(data = scale(X_L12_train), index = X_L12_train.index, columns = X_L12_train.columns)
    
    X_L1_test = pd.DataFrame(data = scale(X_L1_test), index = X_L1_test.index, columns = X_L1_test.columns)
    X_L2_test = pd.DataFrame(data = scale(X_L2_test), index = X_L2_test.index, columns = X_L2_test.columns)
    X_L3_test = pd.DataFrame(data = scale(X_L3_test), index = X_L3_test.index, columns = X_L3_test.columns)
    X_L4_test = pd.DataFrame(data = scale(X_L4_test), index = X_L4_test.index, columns = X_L4_test.columns)
    X_L5_test = pd.DataFrame(data = scale(X_L5_test), index = X_L5_test.index, columns = X_L5_test.columns)
    X_L6_test = pd.DataFrame(data = scale(X_L6_test), index = X_L6_test.index, columns = X_L6_test.columns)
    X_L7_test = pd.DataFrame(data = scale(X_L7_test), index = X_L7_test.index, columns = X_L7_test.columns)
    X_L8_test = pd.DataFrame(data = scale(X_L8_test), index = X_L8_test.index, columns = X_L8_test.columns)
    X_L9_test = pd.DataFrame(data = scale(X_L9_test), index = X_L9_test.index, columns = X_L9_test.columns)
    X_L10_test = pd.DataFrame(data = scale(X_L10_test), index = X_L10_test.index, columns = X_L10_test.columns)
    X_L11_test = pd.DataFrame(data = scale(X_L11_test), index = X_L11_test.index, columns = X_L11_test.columns)
    X_L12_test = pd.DataFrame(data = scale(X_L12_test), index = X_L12_test.index, columns = X_L12_test.columns)
    
    # Save raw database (split)
    
    y_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'y_train.csv')
    y_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'y_test.csv')
    X_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_train.csv')
    X_test.to_csv(str_Dir_Plan_PC+ str(rnd_state) + ' ' + 'X_test.csv')
    X_L1_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L1_train.csv')
    X_L1_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L1_test.csv')
    X_L2_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L2_train.csv')
    X_L2_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L2_test.csv')
    X_L3_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L3_train.csv')
    X_L3_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L3_test.csv')
    X_L4_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L4_train.csv')
    X_L4_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L4_test.csv')
    X_L5_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L5_train.csv')
    X_L5_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L5_test.csv')
    X_L6_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L6_train.csv')
    X_L6_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L6_test.csv')
    X_L7_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L7_train.csv')
    X_L7_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L7_test.csv')
    X_L8_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L8_train.csv')
    X_L8_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L8_test.csv')
    X_L9_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L9_train.csv')
    X_L9_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L9_test.csv')
    X_L10_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L10_train.csv')
    X_L10_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L10_test.csv')
    X_L11_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L11_train.csv')
    X_L11_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L11_test.csv')
    X_L12_train.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L12_train.csv')
    X_L12_test.to_csv(str_Dir_Plan_PC + str(rnd_state) + ' ' + 'X_L12_test.csv')
    
    
    # Grouped series
    
    X_OI = df_FRED_MD_t_norm_ex_nan_OI
    X_LM = df_FRED_MD_t_norm_ex_nan_LM
    X_H = df_FRED_MD_t_norm_ex_nan_H
    X_COI = df_FRED_MD_t_norm_ex_nan_COI
    X_MC = df_FRED_MD_t_norm_ex_nan_MC
    X_INTFX = df_FRED_MD_t_norm_ex_nan_INTFX
    X_P = df_FRED_MD_t_norm_ex_nan_P
    X_S = df_FRED_MD_t_norm_ex_nan_S
    
    X_OI_L1 = df_FRED_MD_t_norm_ex_nan_OI_L1
    X_LM_L1 = df_FRED_MD_t_norm_ex_nan_LM_L1
    X_H_L1 = df_FRED_MD_t_norm_ex_nan_H_L1
    X_COI_L1 = df_FRED_MD_t_norm_ex_nan_COI_L1
    X_MC_L1 = df_FRED_MD_t_norm_ex_nan_MC_L1
    X_INTFX_L1 = df_FRED_MD_t_norm_ex_nan_INTFX_L1
    X_P_L1 = df_FRED_MD_t_norm_ex_nan_P_L1
    X_S_L1 = df_FRED_MD_t_norm_ex_nan_S_L1
    
    X_OI_L2 = df_FRED_MD_t_norm_ex_nan_OI_L2
    X_LM_L2 = df_FRED_MD_t_norm_ex_nan_LM_L2
    X_H_L2 = df_FRED_MD_t_norm_ex_nan_H_L2
    X_COI_L2 = df_FRED_MD_t_norm_ex_nan_COI_L2
    X_MC_L2 = df_FRED_MD_t_norm_ex_nan_MC_L2
    X_INTFX_L2 = df_FRED_MD_t_norm_ex_nan_INTFX_L2
    X_P_L2 = df_FRED_MD_t_norm_ex_nan_P_L2
    X_S_L2 = df_FRED_MD_t_norm_ex_nan_S_L2
    
    X_OI_L3 = df_FRED_MD_t_norm_ex_nan_OI_L3
    X_LM_L3 = df_FRED_MD_t_norm_ex_nan_LM_L3
    X_H_L3 = df_FRED_MD_t_norm_ex_nan_H_L3
    X_COI_L3 = df_FRED_MD_t_norm_ex_nan_COI_L3
    X_MC_L3 = df_FRED_MD_t_norm_ex_nan_MC_L3
    X_INTFX_L3 = df_FRED_MD_t_norm_ex_nan_INTFX_L3
    X_P_L3 = df_FRED_MD_t_norm_ex_nan_P_L3
    X_S_L3 = df_FRED_MD_t_norm_ex_nan_S_L3
    
    X_OI_L4 = df_FRED_MD_t_norm_ex_nan_OI_L4
    X_LM_L4 = df_FRED_MD_t_norm_ex_nan_LM_L4
    X_H_L4 = df_FRED_MD_t_norm_ex_nan_H_L4
    X_COI_L4 = df_FRED_MD_t_norm_ex_nan_COI_L4
    X_MC_L4 = df_FRED_MD_t_norm_ex_nan_MC_L4
    X_INTFX_L4 = df_FRED_MD_t_norm_ex_nan_INTFX_L4
    X_P_L4 = df_FRED_MD_t_norm_ex_nan_P_L4
    X_S_L4 = df_FRED_MD_t_norm_ex_nan_S_L4
    
    X_OI_L12 = df_FRED_MD_t_norm_ex_nan_OI_L12
    X_LM_L12 = df_FRED_MD_t_norm_ex_nan_LM_L12
    X_H_L12 = df_FRED_MD_t_norm_ex_nan_H_L12
    X_COI_L12 = df_FRED_MD_t_norm_ex_nan_COI_L12
    X_MC_L12 = df_FRED_MD_t_norm_ex_nan_MC_L12
    X_INTFX_L12 = df_FRED_MD_t_norm_ex_nan_INTFX_L12
    X_P_L12 = df_FRED_MD_t_norm_ex_nan_P_L12
    X_S_L12 = df_FRED_MD_t_norm_ex_nan_S_L12
    
    # Split grouped series
    
    X_OI_train, X_OI_test = X_OI.loc[index_train], X_OI.loc[index_test]
    X_LM_train, X_LM_test = X_LM.loc[index_train], X_LM.loc[index_test]
    X_H_train, X_H_test = X_H.loc[index_train], X_H.loc[index_test]
    X_COI_train, X_COI_test = X_COI.loc[index_train], X_COI.loc[index_test]
    X_MC_train, X_MC_test = X_MC.loc[index_train], X_MC.loc[index_test]
    X_INTFX_train, X_INTFX_test = X_INTFX.loc[index_train], X_INTFX.loc[index_test]
    X_P_train, X_P_test = X_P.loc[index_train], X_P.loc[index_test]
    X_S_train, X_S_test = X_S.loc[index_train], X_S.loc[index_test]
    
    X_OI_L1_train, X_OI_L1_test = X_OI_L1.loc[index_train], X_OI_L1.loc[index_test]
    X_LM_L1_train, X_LM_L1_test = X_LM_L1.loc[index_train], X_LM_L1.loc[index_test]
    X_H_L1_train, X_H_L1_test = X_H_L1.loc[index_train], X_H_L1.loc[index_test]
    X_COI_L1_train, X_COI_L1_test = X_COI_L1.loc[index_train], X_COI_L1.loc[index_test]
    X_MC_L1_train, X_MC_L1_test = X_MC_L1.loc[index_train], X_MC_L1.loc[index_test]
    X_INTFX_L1_train, X_INTFX_L1_test = X_INTFX_L1.loc[index_train], X_INTFX_L1.loc[index_test]
    X_P_L1_train, X_P_L1_test = X_P_L1.loc[index_train], X_P_L1.loc[index_test]
    X_S_L1_train, X_S_L1_test = X_S_L1.loc[index_train], X_S_L1.loc[index_test]
    
    X_OI_L2_train, X_OI_L2_test = X_OI_L2.loc[index_train], X_OI_L2.loc[index_test]
    X_LM_L2_train, X_LM_L2_test = X_LM_L2.loc[index_train], X_LM_L2.loc[index_test]
    X_H_L2_train, X_H_L2_test = X_H_L2.loc[index_train], X_H_L2.loc[index_test]
    X_COI_L2_train, X_COI_L2_test = X_COI_L2.loc[index_train], X_COI_L2.loc[index_test]
    X_MC_L2_train, X_MC_L2_test = X_MC_L2.loc[index_train], X_MC_L2.loc[index_test]
    X_INTFX_L2_train, X_INTFX_L2_test = X_INTFX_L2.loc[index_train], X_INTFX_L2.loc[index_test]
    X_P_L2_train, X_P_L2_test = X_P_L2.loc[index_train], X_P_L2.loc[index_test]
    X_S_L2_train, X_S_L2_test = X_S_L2.loc[index_train], X_S_L2.loc[index_test]
    
    X_OI_L3_train, X_OI_L3_test = X_OI_L3.loc[index_train], X_OI_L3.loc[index_test]
    X_LM_L3_train, X_LM_L3_test = X_LM_L3.loc[index_train], X_LM_L3.loc[index_test]
    X_H_L3_train, X_H_L3_test = X_H_L3.loc[index_train], X_H_L3.loc[index_test]
    X_COI_L3_train, X_COI_L3_test = X_COI_L3.loc[index_train], X_COI_L3.loc[index_test]
    X_MC_L3_train, X_MC_L3_test = X_MC_L3.loc[index_train], X_MC_L3.loc[index_test]
    X_INTFX_L3_train, X_INTFX_L3_test = X_INTFX_L3.loc[index_train], X_INTFX_L3.loc[index_test]
    X_P_L3_train, X_P_L3_test = X_P_L3.loc[index_train], X_P_L3.loc[index_test]
    X_S_L3_train, X_S_L3_test = X_S_L3.loc[index_train], X_S_L3.loc[index_test]
    
    X_OI_L4_train, X_OI_L4_test = X_OI_L4.loc[index_train], X_OI_L4.loc[index_test]
    X_LM_L4_train, X_LM_L4_test = X_LM_L4.loc[index_train], X_LM_L4.loc[index_test]
    X_H_L4_train, X_H_L4_test = X_H_L4.loc[index_train], X_H_L4.loc[index_test]
    X_COI_L4_train, X_COI_L4_test = X_COI_L4.loc[index_train], X_COI_L4.loc[index_test]
    X_MC_L4_train, X_MC_L4_test = X_MC_L4.loc[index_train], X_MC_L4.loc[index_test]
    X_INTFX_L4_train, X_INTFX_L4_test = X_INTFX_L4.loc[index_train], X_INTFX_L4.loc[index_test]
    X_P_L4_train, X_P_L4_test = X_P_L4.loc[index_train], X_P_L4.loc[index_test]
    X_S_L4_train, X_S_L4_test = X_S_L4.loc[index_train], X_S_L4.loc[index_test]
    
    X_OI_L12_train, X_OI_L12_test = X_OI_L12.loc[index_train], X_OI_L12.loc[index_test]
    X_LM_L12_train, X_LM_L12_test = X_LM_L12.loc[index_train], X_LM_L12.loc[index_test]
    X_H_L12_train, X_H_L12_test = X_H_L12.loc[index_train], X_H_L12.loc[index_test]
    X_COI_L12_train, X_COI_L12_test = X_COI_L12.loc[index_train], X_COI_L12.loc[index_test]
    X_MC_L12_train, X_MC_L12_test = X_MC_L12.loc[index_train], X_MC_L12.loc[index_test]
    X_INTFX_L12_train, X_INTFX_L12_test = X_INTFX_L12.loc[index_train], X_INTFX_L12.loc[index_test]
    X_P_L12_train, X_P_L12_test = X_P_L12.loc[index_train], X_P_L12.loc[index_test]
    X_S_L12_train, X_S_L12_test = X_S_L12.loc[index_train], X_S_L12.loc[index_test]
    
    # Normalization
    
    X_OI_train = pd.DataFrame(data = scale(X_OI_train), 
                              index = X_OI_train.index, columns = X_OI_train.columns)
    X_LM_train = pd.DataFrame(data = scale(X_LM_train), 
                              index = X_LM_train.index, columns = X_LM_train.columns)
    X_H_train = pd.DataFrame(data = scale(X_H_train), 
                             index = X_H_train.index, columns = X_H_train.columns)
    X_COI_train = pd.DataFrame(data = scale(X_COI_train), 
                               index = X_COI_train.index, columns = X_COI_train.columns)
    X_MC_train = pd.DataFrame(data = scale(X_MC_train), 
                              index = X_MC_train.index, columns = X_MC_train.columns)
    X_INTFX_train = pd.DataFrame(data = scale(X_INTFX_train), 
                                 index = X_INTFX_train.index, columns = X_INTFX_train.columns)
    X_P_train = pd.DataFrame(data = scale(X_P_train), 
                             index = X_P_train.index, columns = X_P_train.columns)
    X_S_train = pd.DataFrame(data = scale(X_S_train), 
                             index = X_S_train.index, columns = X_S_train.columns)
    
    X_OI_L1_train = pd.DataFrame(data = scale(X_OI_L1_train), 
                                 index = X_OI_L1_train.index, columns = X_OI_L1_train.columns)
    X_LM_L1_train = pd.DataFrame(data = scale(X_LM_L1_train), 
                                 index = X_LM_L1_train.index, columns = X_LM_L1_train.columns)
    X_H_L1_train = pd.DataFrame(data = scale(X_H_L1_train), 
                                index = X_H_L1_train.index, columns = X_H_L1_train.columns)
    X_COI_L1_train = pd.DataFrame(data = scale(X_COI_L1_train), 
                                  index = X_COI_L1_train.index, columns = X_COI_L1_train.columns)
    X_MC_L1_train = pd.DataFrame(data = scale(X_MC_L1_train), 
                                 index = X_MC_L1_train.index, columns = X_MC_L1_train.columns)
    X_INTFX_L1_train = pd.DataFrame(data = scale(X_INTFX_L1_train), 
                                    index = X_INTFX_L1_train.index, columns = X_INTFX_L1_train.columns)
    X_P_L1_train = pd.DataFrame(data = scale(X_P_L1_train), 
                                index = X_P_L1_train.index, columns = X_P_L1_train.columns)
    X_S_L1_train = pd.DataFrame(data = scale(X_S_L1_train), 
                                index = X_S_L1_train.index, columns = X_S_L1_train.columns)
    
    X_OI_L2_train = pd.DataFrame(data = scale(X_OI_L2_train), 
                                 index = X_OI_L2_train.index, columns = X_OI_L2_train.columns)
    X_LM_L2_train = pd.DataFrame(data = scale(X_LM_L2_train), 
                                 index = X_LM_L2_train.index, columns = X_LM_L2_train.columns)
    X_H_L2_train = pd.DataFrame(data = scale(X_H_L2_train), 
                                index = X_H_L2_train.index, columns = X_H_L2_train.columns)
    X_COI_L2_train = pd.DataFrame(data = scale(X_COI_L2_train), 
                                  index = X_COI_L2_train.index, columns = X_COI_L2_train.columns)
    X_MC_L2_train = pd.DataFrame(data = scale(X_MC_L2_train), 
                                 index = X_MC_L2_train.index, columns = X_MC_L2_train.columns)
    X_INTFX_L2_train = pd.DataFrame(data = scale(X_INTFX_L2_train), 
                                    index = X_INTFX_L2_train.index, columns = X_INTFX_L2_train.columns)
    X_P_L2_train = pd.DataFrame(data = scale(X_P_L2_train), 
                                index = X_P_L2_train.index, columns = X_P_L2_train.columns)
    X_S_L2_train = pd.DataFrame(data = scale(X_S_L2_train), 
                                index = X_S_L2_train.index, columns = X_S_L2_train.columns)
    
    X_OI_L3_train = pd.DataFrame(data = scale(X_OI_L3_train), 
                                 index = X_OI_L3_train.index, columns = X_OI_L3_train.columns)
    X_LM_L3_train = pd.DataFrame(data = scale(X_LM_L3_train), 
                                 index = X_LM_L3_train.index, columns = X_LM_L3_train.columns)
    X_H_L3_train = pd.DataFrame(data = scale(X_H_L3_train), 
                                index = X_H_L3_train.index, columns = X_H_L3_train.columns)
    X_COI_L3_train = pd.DataFrame(data = scale(X_COI_L3_train), 
                                  index = X_COI_L3_train.index, columns = X_COI_L3_train.columns)
    X_MC_L3_train = pd.DataFrame(data = scale(X_MC_L3_train), 
                                 index = X_MC_L3_train.index, columns = X_MC_L3_train.columns)
    X_INTFX_L3_train = pd.DataFrame(data = scale(X_INTFX_L3_train), 
                                    index = X_INTFX_L3_train.index, columns = X_INTFX_L3_train.columns)
    X_P_L3_train = pd.DataFrame(data = scale(X_P_L3_train), 
                                index = X_P_L3_train.index, columns = X_P_L3_train.columns)
    X_S_L3_train = pd.DataFrame(data = scale(X_S_L3_train), 
                                index = X_S_L3_train.index, columns = X_S_L3_train.columns)
    
    X_OI_L4_train = pd.DataFrame(data = scale(X_OI_L4_train), 
                                 index = X_OI_L4_train.index, columns = X_OI_L4_train.columns)
    X_LM_L4_train = pd.DataFrame(data = scale(X_LM_L4_train), 
                                 index = X_LM_L4_train.index, columns = X_LM_L4_train.columns)
    X_H_L4_train = pd.DataFrame(data = scale(X_H_L4_train), 
                                index = X_H_L4_train.index, columns = X_H_L4_train.columns)
    X_COI_L4_train = pd.DataFrame(data = scale(X_COI_L4_train), 
                                  index = X_COI_L4_train.index, columns = X_COI_L4_train.columns)
    X_MC_L4_train = pd.DataFrame(data = scale(X_MC_L4_train), 
                                 index = X_MC_L4_train.index, columns = X_MC_L4_train.columns)
    X_INTFX_L4_train = pd.DataFrame(data = scale(X_INTFX_L4_train), 
                                    index = X_INTFX_L4_train.index, columns = X_INTFX_L4_train.columns)
    X_P_L4_train = pd.DataFrame(data = scale(X_P_L4_train), 
                                index = X_P_L4_train.index, columns = X_P_L4_train.columns)
    X_S_L4_train = pd.DataFrame(data = scale(X_S_L4_train), 
                                index = X_S_L4_train.index, columns = X_S_L4_train.columns)
    
    X_OI_L12_train = pd.DataFrame(data = scale(X_OI_L12_train), 
                                  index = X_OI_L12_train.index, columns = X_OI_L12_train.columns)
    X_LM_L12_train = pd.DataFrame(data = scale(X_LM_L12_train), 
                                  index = X_LM_L12_train.index, columns = X_LM_L12_train.columns)
    X_H_L12_train = pd.DataFrame(data = scale(X_H_L12_train), 
                                 index = X_H_L12_train.index, columns = X_H_L12_train.columns)
    X_COI_L12_train = pd.DataFrame(data = scale(X_COI_L12_train), 
                                   index = X_COI_L12_train.index, columns = X_COI_L12_train.columns)
    X_MC_L12_train = pd.DataFrame(data = scale(X_MC_L12_train), 
                                  index = X_MC_L12_train.index, columns = X_MC_L12_train.columns)
    X_INTFX_L12_train = pd.DataFrame(data = scale(X_INTFX_L12_train), 
                                     index = X_INTFX_L12_train.index, columns = X_INTFX_L12_train.columns)
    X_P_L12_train = pd.DataFrame(data = scale(X_P_L12_train), 
                                 index = X_P_L12_train.index, columns = X_P_L12_train.columns)
    X_S_L12_train = pd.DataFrame(data = scale(X_S_L12_train), 
                                 index = X_S_L12_train.index, columns = X_S_L12_train.columns)
    
    # Normalization
    
    X_OI_test = pd.DataFrame(data = scale(X_OI_test), 
                              index = X_OI_test.index, columns = X_OI_test.columns)
    X_LM_test = pd.DataFrame(data = scale(X_LM_test), 
                              index = X_LM_test.index, columns = X_LM_test.columns)
    X_H_test = pd.DataFrame(data = scale(X_H_test), 
                             index = X_H_test.index, columns = X_H_test.columns)
    X_COI_test = pd.DataFrame(data = scale(X_COI_test), 
                               index = X_COI_test.index, columns = X_COI_test.columns)
    X_MC_test = pd.DataFrame(data = scale(X_MC_test), 
                              index = X_MC_test.index, columns = X_MC_test.columns)
    X_INTFX_test = pd.DataFrame(data = scale(X_INTFX_test), 
                                 index = X_INTFX_test.index, columns = X_INTFX_test.columns)
    X_P_test = pd.DataFrame(data = scale(X_P_test), 
                             index = X_P_test.index, columns = X_P_test.columns)
    X_S_test = pd.DataFrame(data = scale(X_S_test), 
                             index = X_S_test.index, columns = X_S_test.columns)
    
    X_OI_L1_test = pd.DataFrame(data = scale(X_OI_L1_test), 
                                 index = X_OI_L1_test.index, columns = X_OI_L1_test.columns)
    X_LM_L1_test = pd.DataFrame(data = scale(X_LM_L1_test), 
                                 index = X_LM_L1_test.index, columns = X_LM_L1_test.columns)
    X_H_L1_test = pd.DataFrame(data = scale(X_H_L1_test), 
                                index = X_H_L1_test.index, columns = X_H_L1_test.columns)
    X_COI_L1_test = pd.DataFrame(data = scale(X_COI_L1_test), 
                                  index = X_COI_L1_test.index, columns = X_COI_L1_test.columns)
    X_MC_L1_test = pd.DataFrame(data = scale(X_MC_L1_test), 
                                 index = X_MC_L1_test.index, columns = X_MC_L1_test.columns)
    X_INTFX_L1_test = pd.DataFrame(data = scale(X_INTFX_L1_test), 
                                    index = X_INTFX_L1_test.index, columns = X_INTFX_L1_test.columns)
    X_P_L1_test = pd.DataFrame(data = scale(X_P_L1_test), 
                                index = X_P_L1_test.index, columns = X_P_L1_test.columns)
    X_S_L1_test = pd.DataFrame(data = scale(X_S_L1_test), 
                                index = X_S_L1_test.index, columns = X_S_L1_test.columns)
    
    X_OI_L2_test = pd.DataFrame(data = scale(X_OI_L2_test), 
                                 index = X_OI_L2_test.index, columns = X_OI_L2_test.columns)
    X_LM_L2_test = pd.DataFrame(data = scale(X_LM_L2_test), 
                                 index = X_LM_L2_test.index, columns = X_LM_L2_test.columns)
    X_H_L2_test = pd.DataFrame(data = scale(X_H_L2_test), 
                                index = X_H_L2_test.index, columns = X_H_L2_test.columns)
    X_COI_L2_test = pd.DataFrame(data = scale(X_COI_L2_test), 
                                  index = X_COI_L2_test.index, columns = X_COI_L2_test.columns)
    X_MC_L2_test = pd.DataFrame(data = scale(X_MC_L2_test), 
                                 index = X_MC_L2_test.index, columns = X_MC_L2_test.columns)
    X_INTFX_L2_test = pd.DataFrame(data = scale(X_INTFX_L2_test), 
                                    index = X_INTFX_L2_test.index, columns = X_INTFX_L2_test.columns)
    X_P_L2_test = pd.DataFrame(data = scale(X_P_L2_test), 
                                index = X_P_L2_test.index, columns = X_P_L2_test.columns)
    X_S_L2_test = pd.DataFrame(data = scale(X_S_L2_test), 
                                index = X_S_L2_test.index, columns = X_S_L2_test.columns)
    
    X_OI_L3_test = pd.DataFrame(data = scale(X_OI_L3_test), 
                                 index = X_OI_L3_test.index, columns = X_OI_L3_test.columns)
    X_LM_L3_test = pd.DataFrame(data = scale(X_LM_L3_test), 
                                 index = X_LM_L3_test.index, columns = X_LM_L3_test.columns)
    X_H_L3_test = pd.DataFrame(data = scale(X_H_L3_test), 
                                index = X_H_L3_test.index, columns = X_H_L3_test.columns)
    X_COI_L3_test = pd.DataFrame(data = scale(X_COI_L3_test), 
                                  index = X_COI_L3_test.index, columns = X_COI_L3_test.columns)
    X_MC_L3_test = pd.DataFrame(data = scale(X_MC_L3_test), 
                                 index = X_MC_L3_test.index, columns = X_MC_L3_test.columns)
    X_INTFX_L3_test = pd.DataFrame(data = scale(X_INTFX_L3_test), 
                                    index = X_INTFX_L3_test.index, columns = X_INTFX_L3_test.columns)
    X_P_L3_test = pd.DataFrame(data = scale(X_P_L3_test), 
                                index = X_P_L3_test.index, columns = X_P_L3_test.columns)
    X_S_L3_test = pd.DataFrame(data = scale(X_S_L3_test), 
                                index = X_S_L3_test.index, columns = X_S_L3_test.columns)
    
    X_OI_L4_test = pd.DataFrame(data = scale(X_OI_L4_test), 
                                 index = X_OI_L4_test.index, columns = X_OI_L4_test.columns)
    X_LM_L4_test = pd.DataFrame(data = scale(X_LM_L4_test), 
                                 index = X_LM_L4_test.index, columns = X_LM_L4_test.columns)
    X_H_L4_test = pd.DataFrame(data = scale(X_H_L4_test), 
                                index = X_H_L4_test.index, columns = X_H_L4_test.columns)
    X_COI_L4_test = pd.DataFrame(data = scale(X_COI_L4_test), 
                                  index = X_COI_L4_test.index, columns = X_COI_L4_test.columns)
    X_MC_L4_test = pd.DataFrame(data = scale(X_MC_L4_test), 
                                 index = X_MC_L4_test.index, columns = X_MC_L4_test.columns)
    X_INTFX_L4_test = pd.DataFrame(data = scale(X_INTFX_L4_test), 
                                    index = X_INTFX_L4_test.index, columns = X_INTFX_L4_test.columns)
    X_P_L4_test = pd.DataFrame(data = scale(X_P_L4_test), 
                                index = X_P_L4_test.index, columns = X_P_L4_test.columns)
    X_S_L4_test = pd.DataFrame(data = scale(X_S_L4_test), 
                                index = X_S_L4_test.index, columns = X_S_L4_test.columns)
    
    X_OI_L12_test = pd.DataFrame(data = scale(X_OI_L12_test), 
                                  index = X_OI_L12_test.index, columns = X_OI_L12_test.columns)
    X_LM_L12_test = pd.DataFrame(data = scale(X_LM_L12_test), 
                                  index = X_LM_L12_test.index, columns = X_LM_L12_test.columns)
    X_H_L12_test = pd.DataFrame(data = scale(X_H_L12_test), 
                                 index = X_H_L12_test.index, columns = X_H_L12_test.columns)
    X_COI_L12_test = pd.DataFrame(data = scale(X_COI_L12_test), 
                                   index = X_COI_L12_test.index, columns = X_COI_L12_test.columns)
    X_MC_L12_test = pd.DataFrame(data = scale(X_MC_L12_test), 
                                  index = X_MC_L12_test.index, columns = X_MC_L12_test.columns)
    X_INTFX_L12_test = pd.DataFrame(data = scale(X_INTFX_L12_test), 
                                     index = X_INTFX_L12_test.index, columns = X_INTFX_L12_test.columns)
    X_P_L12_test = pd.DataFrame(data = scale(X_P_L12_test), 
                                 index = X_P_L12_test.index, columns = X_P_L12_test.columns)
    X_S_L12_test = pd.DataFrame(data = scale(X_S_L12_test), 
                                 index = X_S_L12_test.index, columns = X_S_L12_test.columns)

    #####################################################################################################################
    #                                                                                                                   #
    # Denoising and Compression                                                                                         #
    #                                                                                                                   #
    #####################################################################################################################
    
    X_train_pca, X_test_pca = pca_decomp(X_train, X_test, threshold = 0.9, plot_name = 'X_full_pca')
    X_OI_train_pca, X_OI_test_pca = pca_decomp(X_OI_train, X_OI_test, 
                                                     threshold = 0.9, plot_name = 'X_full_OI_pca')
    X_LM_train_pca, X_LM_test_pca = pca_decomp(X_LM_train, X_LM_test, 
                                                     threshold = 0.9, plot_name = 'X_full_LM_pca')
    X_H_train_pca, X_H_test_pca = pca_decomp(X_H_train, X_H_test, 
                                                   threshold = 0.9, plot_name = 'X_full_H_pca')
    X_COI_train_pca, X_COI_test_pca = pca_decomp(X_COI_train, X_COI_test, 
                                                       threshold = 0.9, plot_name = 'X_full_COI_pca')
    X_MC_train_pca, X_MC_test_pca = pca_decomp(X_MC_train, X_MC_test, 
                                                     threshold = 0.9, plot_name = 'X_full_MC_pca')
    X_INTFX_train_pca, X_INTFX_test_pca = pca_decomp(X_INTFX_train, X_INTFX_test, 
                                                           threshold = 0.9, plot_name = 'X_full_INTFX_pca')
    X_P_train_pca, X_P_test_pca = pca_decomp(X_P_train, X_P_test, 
                                                   threshold = 0.9, plot_name = 'X_full_P_pca')
    X_S_train_pca, X_S_test_pca = pca_decomp(X_S_train, X_S_test, 
                                                   threshold = 0.9, plot_name = 'X_full_S_pca')
    
    X_L1_train_pca, X_L1_test_pca = pca_decomp(X_L1_train, X_L1_test, threshold = 0.9, plot_name = 'X_full_L1_pca')
    X_OI_L1_train_pca, X_OI_L1_test_pca = pca_decomp(X_OI_L1_train, X_OI_L1_test, 
                                                     threshold = 0.9, plot_name = 'X_full_OI_L1_pca')
    X_LM_L1_train_pca, X_LM_L1_test_pca = pca_decomp(X_LM_L1_train, X_LM_L1_test, 
                                                     threshold = 0.9, plot_name = 'X_full_LM_L1_pca')
    X_H_L1_train_pca, X_H_L1_test_pca = pca_decomp(X_H_L1_train, X_H_L1_test, 
                                                   threshold = 0.9, plot_name = 'X_full_H_L1_pca')
    X_COI_L1_train_pca, X_COI_L1_test_pca = pca_decomp(X_COI_L1_train, X_COI_L1_test, 
                                                       threshold = 0.9, plot_name = 'X_full_COI_L1_pca')
    X_MC_L1_train_pca, X_MC_L1_test_pca = pca_decomp(X_MC_L1_train, X_MC_L1_test, 
                                                     threshold = 0.9, plot_name = 'X_full_MC_L1_pca')
    X_INTFX_L1_train_pca, X_INTFX_L1_test_pca = pca_decomp(X_INTFX_L1_train, X_INTFX_L1_test, 
                                                           threshold = 0.9, plot_name = 'X_full_INTFX_L1_pca')
    X_P_L1_train_pca, X_P_L1_test_pca = pca_decomp(X_P_L1_train, X_P_L1_test, 
                                                   threshold = 0.9, plot_name = 'X_full_P_L1_pca')
    X_S_L1_train_pca, X_S_L1_test_pca = pca_decomp(X_S_L1_train, X_S_L1_test, 
                                                   threshold = 0.9, plot_name = 'X_full_S_L1_pca')
    
    X_L2_train_pca, X_L2_test_pca = pca_decomp(X_L2_train, X_L2_test, threshold = 0.9, plot_name = 'X_full_L2_pca')
    X_OI_L2_train_pca, X_OI_L2_test_pca = pca_decomp(X_OI_L2_train, X_OI_L2_test, 
                                                     threshold = 0.9, plot_name = 'X_full_OI_L2_pca')
    X_LM_L2_train_pca, X_LM_L2_test_pca = pca_decomp(X_LM_L2_train, X_LM_L2_test, 
                                                     threshold = 0.9, plot_name = 'X_full_LM_L2_pca')
    X_H_L2_train_pca, X_H_L2_test_pca = pca_decomp(X_H_L2_train, X_H_L2_test, 
                                                   threshold = 0.9, plot_name = 'X_full_H_L2_pca')
    X_COI_L2_train_pca, X_COI_L2_test_pca = pca_decomp(X_COI_L2_train, X_COI_L2_test, 
                                                       threshold = 0.9, plot_name = 'X_full_COI_L2_pca')
    X_MC_L2_train_pca, X_MC_L2_test_pca = pca_decomp(X_MC_L2_train, X_MC_L2_test, 
                                                     threshold = 0.9, plot_name = 'X_full_MC_L2_pca')
    X_INTFX_L2_train_pca, X_INTFX_L2_test_pca = pca_decomp(X_INTFX_L2_train, X_INTFX_L2_test, 
                                                           threshold = 0.9, plot_name = 'X_full_INTFX_L2_pca')
    X_P_L2_train_pca, X_P_L2_test_pca = pca_decomp(X_P_L2_train, X_P_L2_test, 
                                                   threshold = 0.9, plot_name = 'X_full_P_L2_pca')
    X_S_L2_train_pca, X_S_L2_test_pca = pca_decomp(X_S_L2_train, X_S_L2_test, 
                                                   threshold = 0.9, plot_name = 'X_full_S_L2_pca')
    
    X_L3_train_pca, X_L3_test_pca = pca_decomp(X_L3_train, X_L3_test, threshold = 0.9, plot_name = 'X_full_L3_pca')
    X_OI_L3_train_pca, X_OI_L3_test_pca = pca_decomp(X_OI_L3_train, X_OI_L3_test, 
                                                     threshold = 0.9, plot_name = 'X_full_OI_L3_pca')
    X_LM_L3_train_pca, X_LM_L3_test_pca = pca_decomp(X_LM_L3_train, X_LM_L3_test, 
                                                     threshold = 0.9, plot_name = 'X_full_LM_L3_pca')
    X_H_L3_train_pca, X_H_L3_test_pca = pca_decomp(X_H_L3_train, X_H_L3_test, 
                                                   threshold = 0.9, plot_name = 'X_full_H_L3_pca')
    X_COI_L3_train_pca, X_COI_L3_test_pca = pca_decomp(X_COI_L3_train, X_COI_L3_test, 
                                                       threshold = 0.9, plot_name = 'X_full_COI_L3_pca')
    X_MC_L3_train_pca, X_MC_L3_test_pca = pca_decomp(X_MC_L3_train, X_MC_L3_test, 
                                                     threshold = 0.9, plot_name = 'X_full_MC_L3_pca')
    X_INTFX_L3_train_pca, X_INTFX_L3_test_pca = pca_decomp(X_INTFX_L3_train, X_INTFX_L3_test, 
                                                           threshold = 0.9, plot_name = 'X_full_INTFX_L3_pca')
    X_P_L3_train_pca, X_P_L3_test_pca = pca_decomp(X_P_L3_train, X_P_L3_test, 
                                                   threshold = 0.9, plot_name = 'X_full_P_L3_pca')
    X_S_L3_train_pca, X_S_L3_test_pca = pca_decomp(X_S_L3_train, X_S_L3_test, 
                                                   threshold = 0.9, plot_name = 'X_full_S_L3_pca')
    
    X_L4_train_pca, X_L4_test_pca = pca_decomp(X_L4_train, X_L4_test, threshold = 0.9, plot_name = 'X_full_L4_pca')
    X_OI_L4_train_pca, X_OI_L4_test_pca = pca_decomp(X_OI_L4_train, X_OI_L4_test, 
                                                     threshold = 0.9, plot_name = 'X_full_OI_L4_pca')
    X_LM_L4_train_pca, X_LM_L4_test_pca = pca_decomp(X_LM_L4_train, X_LM_L4_test, 
                                                     threshold = 0.9, plot_name = 'X_full_LM_L4_pca')
    X_H_L4_train_pca, X_H_L4_test_pca = pca_decomp(X_H_L4_train, X_H_L4_test, 
                                                   threshold = 0.9, plot_name = 'X_full_H_L4_pca')
    X_COI_L4_train_pca, X_COI_L4_test_pca = pca_decomp(X_COI_L4_train, X_COI_L4_test, 
                                                       threshold = 0.9, plot_name = 'X_full_COI_L4_pca')
    X_MC_L4_train_pca, X_MC_L4_test_pca = pca_decomp(X_MC_L4_train, X_MC_L4_test, 
                                                     threshold = 0.9, plot_name = 'X_full_MC_L4_pca')
    X_INTFX_L4_train_pca, X_INTFX_L4_test_pca = pca_decomp(X_INTFX_L4_train, X_INTFX_L4_test, 
                                                           threshold = 0.9, plot_name = 'X_full_INTFX_L4_pca')
    X_P_L4_train_pca, X_P_L4_test_pca = pca_decomp(X_P_L4_train, X_P_L4_test, 
                                                   threshold = 0.9, plot_name = 'X_full_P_L4_pca')
    X_S_L4_train_pca, X_S_L4_test_pca = pca_decomp(X_S_L4_train, X_S_L4_test, 
                                                   threshold = 0.9, plot_name = 'X_full_S_L4_pca')
    
    X_L12_train_pca, X_L12_test_pca = pca_decomp(X_L12_train, X_L12_test, threshold = 0.9, plot_name = 'X_full_L12_pca')
    X_OI_L12_train_pca, X_OI_L12_test_pca = pca_decomp(X_OI_L12_train, X_OI_L12_test, 
                                                       threshold = 0.9, plot_name = 'X_full_OI_L12_pca')
    X_LM_L12_train_pca, X_LM_L12_test_pca = pca_decomp(X_LM_L12_train, X_LM_L12_test, 
                                                       threshold = 0.9, plot_name = 'X_full_LM_L12_pca')
    X_H_L12_train_pca, X_H_L12_test_pca = pca_decomp(X_H_L12_train, X_H_L12_test, 
                                                     threshold = 0.9, plot_name = 'X_full_H_L12_pca')
    X_COI_L12_train_pca, X_COI_L12_test_pca = pca_decomp(X_COI_L12_train, X_COI_L12_test, 
                                                         threshold = 0.9, plot_name = 'X_full_COI_L12_pca')
    X_MC_L12_train_pca, X_MC_L12_test_pca = pca_decomp(X_MC_L12_train, X_MC_L12_test, 
                                                       threshold = 0.9, plot_name = 'X_full_MC_L12_pca')
    X_INTFX_L12_train_pca, X_INTFX_L12_test_pca = pca_decomp(X_INTFX_L12_train, X_INTFX_L12_test, 
                                                             threshold = 0.9, plot_name = 'X_full_INTFX_L12_pca')
    X_P_L12_train_pca, X_P_L12_test_pca = pca_decomp(X_P_L12_train, X_P_L12_test, 
                                                     threshold = 0.9, plot_name = 'X_full_P_L12_pca')
    X_S_L12_train_pca, X_S_L12_test_pca = pca_decomp(X_S_L12_train, X_S_L12_test, 
                                                     threshold = 0.9, plot_name = 'X_full_S_L12_pca')
    
    # Reads dimensions from PCA
    n = X_train.shape[1]
    n_OI = X_OI_train.shape[1]
    n_LM = X_LM_train.shape[1]
    n_H = X_H_train.shape[1]
    n_COI = X_COI_train.shape[1]
    n_MC = X_MC_train.shape[1]
    n_INTFX = X_INTFX_train.shape[1]
    n_P = X_P_train.shape[1]
    n_S = X_S_train.shape[1]
    
    n_pca = X_train_pca.shape[1]
    n_OI_pca = X_OI_train_pca.shape[1]
    n_LM_pca = X_LM_train_pca.shape[1]
    n_H_pca = X_H_train_pca.shape[1]
    n_COI_pca = X_COI_train_pca.shape[1]
    n_MC_pca = X_MC_train_pca.shape[1]
    n_INTFX_pca = X_INTFX_train_pca.shape[1]
    n_P_pca = X_P_train_pca.shape[1]
    n_S_pca = X_S_train_pca.shape[1]
    
    X_train_ae, X_test_ae = deep_ae(X_train, X_test, 
                                    intermediate_dim = int((n + n_pca)/2), latent_dim = n_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_full_ae')
    
    X_OI_train_ae, X_OI_test_ae = deep_ae(X_OI_train, X_OI_test, 
                                    intermediate_dim = int((n_OI + n_OI_pca)/2), latent_dim = n_OI_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_OI_ae')
    
    X_LM_train_ae, X_LM_test_ae = deep_ae(X_LM_train, X_LM_test, 
                                    intermediate_dim = int((n_LM + n_LM_pca)/2), latent_dim = n_LM_pca,  
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_LM_ae')
    
    X_H_train_ae, X_H_test_ae = deep_ae(X_H_train, X_H_test, 
                                    intermediate_dim = int((n_H + n_H_pca)/2), latent_dim = n_H_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_H_ae')
    
    X_COI_train_ae, X_COI_test_ae = deep_ae(X_COI_train, X_COI_test, 
                                    intermediate_dim = int((n_COI + n_COI_pca)/2), latent_dim = n_COI_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_COI_ae')
    
    X_MC_train_ae, X_MC_test_ae = deep_ae(X_MC_train, X_MC_test, 
                                    intermediate_dim = int((n_MC + n_MC_pca)/2), latent_dim = n_MC_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_MC_ae')
    
    X_INTFX_train_ae, X_INTFX_test_ae = deep_ae(X_INTFX_train, X_INTFX_test, 
                                    intermediate_dim = int((n_INTFX + n_INTFX_pca)/2), latent_dim = n_INTFX_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_INTFX_ae')
    
    X_P_train_ae, X_P_test_ae = deep_ae(X_P_train, X_P_test, 
                                    intermediate_dim = int((n_P + n_P_pca)/2), latent_dim = n_P_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_P_ae')
    
    X_S_train_ae, X_S_test_ae = deep_ae(X_S_train, X_S_test, 
                                    intermediate_dim = int((n_S + n_S_pca)/2), latent_dim = n_S_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_S_ae')
    
    X_train_vae, X_test_vae = vae(X_train, X_test, 
                                    intermediate_dim = int((n + n_pca)/2), latent_dim = n_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_full_vae')
    
    X_OI_train_vae, X_OI_test_vae = vae(X_OI_train, X_OI_test, 
                                    intermediate_dim = int((n_OI + n_OI_pca)/2), latent_dim = n_OI_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_OI_vae')
    
    X_LM_train_vae, X_LM_test_vae = vae(X_LM_train, X_LM_test, 
                                    intermediate_dim = int((n_LM + n_LM_pca)/2), latent_dim = n_LM_pca,  
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_LM_vae')
    
    X_H_train_vae, X_H_test_vae = vae(X_H_train, X_H_test, 
                                    intermediate_dim = int((n_H + n_H_pca)/2), latent_dim = n_H_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_H_vae')
    
    X_COI_train_vae, X_COI_test_vae = vae(X_COI_train, X_COI_test, 
                                    intermediate_dim = int((n_COI + n_COI_pca)/2), latent_dim = n_COI_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_COI_vae')
    
    X_MC_train_vae, X_MC_test_vae = vae(X_MC_train, X_MC_test, 
                                    intermediate_dim = int((n_MC + n_MC_pca)/2), latent_dim = n_MC_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_MC_vae')
    
    X_INTFX_train_vae, X_INTFX_test_vae = vae(X_INTFX_train, X_INTFX_test, 
                                    intermediate_dim = int((n_INTFX + n_INTFX_pca)/2), latent_dim = n_INTFX_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_INTFX_vae')
    
    X_P_train_vae, X_P_test_vae = vae(X_P_train, X_P_test, 
                                    intermediate_dim = int((n_P + n_P_pca)/2), latent_dim = n_P_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_P_vae')
    
    X_S_train_vae, X_S_test_vae = vae(X_S_train, X_S_test, 
                                    intermediate_dim = int((n_S + n_S_pca)/2), latent_dim = n_S_pca, 
                                    batch_size = 16, epochs = 100, 
                                    verbose = False, plot_name = 'X_S_vae')
    
    