In [1]:
import numpy
import pandas
import matplotlib.pyplot as plt
import ESN_Modules as ESN
from scipy import linalg
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import PowerTransformer

In [2]:
asset = "WEGE"

# TAMANHO DE JANELA DE ENTRADA (INPUT)
len_window_input = 20
# TAMANHO DE JANELA DE SAÍDA (OUTPUT)
len_window_output = 1

# ESPARSIDADE DA MATRIZ, FLOAT, NO CONJUNTO [0, 1]
sparsities = [0.75]
# RAIO ESPECTRAL DESEJADO, FLOAT, NO CONJUNTO (0, INF)
spectral_radius = [0.3]
# TAXA DE VAZAMENTO ESPECTRAL, FLOAT, NO CONJUNTO (0, 1]
leaking_rates = [0.75]
# ESCALA DOS DADOS DE ENTRADA, FLOAT, NO CONJUNTO (0, INF)
input_scalings = [0.5]

In [3]:
pt = PowerTransformer()
data = ESN.reshape(numpy.loadtxt(asset + "Data.txt", delimiter = ","))
pt.fit(data)
ndata = pt.transform(data)
numpy.savetxt(asset + "TrainData.txt", ndata, delimiter = ",")
#print(pt.inverse_transform(ndata))

In [4]:
def train_parameters(X, train_length, transiente, N, K, scale, radius, sparsity, leak):
    numpy.random.seed(X)
    # PESOS DE ENTRADA
    W_input = ESN.input_weights(N = N, K = K, input_scaling = scale)
    # PESOS DE RESERVATORIO
    W_reservoir = ESN.reservoir_weights(N = N, spectral_radius = radius, sparsity = sparsity)

    # MATRIZ DE ESTADOS (TRAINNING)
    state_matrix = ESN.harvesting_states(state_matrix = ESN.create_states(dim = 1 + K + N, patterns = train_length - transiente), 
                                     input_patterns = train_dataset, 
                                     win = W_input, 
                                     wres = W_reservoir, 
                                     initial_state = numpy.zeros((N, 1)),
                                     leaking = leak, 
                                     transiente = transiente)

    # PESOS DE SAÍDA
    W_output = ESN.ridge_regression(states = state_matrix, target = train_target)
    # ERRO DE TREINO
    return(numpy.dot(W_output, state_matrix))

def test_parameters(X, train_dataset, test_dataset, train_length, test_length, transiente, N, K, scale, radius, sparsity, leak):
    numpy.random.seed(X)
    # PESOS DE ENTRADA
    W_input = ESN.input_weights(N = N, K = K, input_scaling = scale)
    # PESOS DE RESERVATORIO
    W_reservoir = ESN.reservoir_weights(N = N, spectral_radius = radius, sparsity = sparsity)
    
    # MATRIZ DE ESTADOS (TRAINNING)
    state_matrix = ESN.harvesting_states(state_matrix = ESN.create_states(dim = 1 + K + N, patterns = train_length - transiente), 
                                         input_patterns = train_dataset, 
                                         win = W_input, 
                                         wres = W_reservoir, 
                                         initial_state = numpy.zeros((N, 1)),
                                         leaking = leak, 
                                         transiente = transiente)

    # PESOS DE SAÍDA
    W_output = ESN.ridge_regression(states = state_matrix, target = train_target)
    
    # O ESTADO INICIAL É O ÚLTIMO ESTADO DO TREINAMENTO
    initial_state = ESN.reshape(state_matrix[K + 1::,-1])

    # MATRIX DE ESTADOS (TESTING)
    state_matrix = ESN.harvesting_states(state_matrix = ESN.create_states(dim = 1 + K + N, patterns = test_length), 
                                         input_patterns = test_dataset, 
                                         win = W_input, 
                                         wres = W_reservoir, 
                                         initial_state = initial_state,
                                         leaking = leak, 
                                         transiente = 0)
    # ERRO DE PREDIÇÃO
    return(numpy.dot(W_output, state_matrix))

def eval_trend(x, y):
    signs = numpy.sign(x) * numpy.sign(y)
    signs, counts = numpy.unique(signs, return_counts = True)
    try:
        return([counts[1]/(counts[1] + counts[0])])
    except IndexError:
        return([1.0])

In [5]:
# PASSOS AHEAD QUE DESEJAMOS PREVER
steps = 1

# NÚMERO DE HIPERPARÂMETROS A SEREM TESTADOS POR CATEGORIA
spam = 10

# NÚMERO DE NEURÔNIOS NA CAMADA OCULTA, INT, N > 0
neurons = numpy.linspace(start = 20, stop = 500, num = 25).astype(int)

# NÚMERO DE RODADAS
rounds = 10

In [6]:
file_name = asset + "TrainData.txt"

# OUTPUT ESPERADO
output_dataset, L = ESN.set_window2(name = file_name, window_len = len_window_output, begin = len_window_input)

# INPUT DO SISTEMA
input_dataset, K = ESN.set_window2(name = file_name, window_len = len_window_input, begin = 0, steps = steps * len_window_output)

dim = input_dataset.shape[0]

# TAMANHO DA JANELA DE TRAINNING
train_length = int(0.7*dim)
# TAMANHO DA JANELA DE TESTING
test_length = dim - train_length - steps

# TRANSIENTE, VALORES A SEREM DESCARTADOS
transiente = int(train_length*0.1)

# DATASET DE TREINO -> [(X0), (X1), (X2), ..., (Xjanela_treino-1)]
train_dataset = input_dataset[0 : train_length, :]

# DATASET DE TREINO (TARGET) -> [(Ysteps+transiente), (Ysteps+transiente+1), ..., (Ysteps+janela_treino-1)]
train_target = output_dataset[None, steps + transiente - 1 : train_length + steps - 1, :]

# DATASET DE TESTE -> [(Xjanela_treino), (Xjanela_treino+1), (Xjanela_treino+2), ..., (Xjanela_treino+janela_teste-1)]
test_dataset = input_dataset[train_length : train_length + test_length, :]

# DATASET DE TESTE (TARGET) -> [(Ysteps+janela_treino+janela_teste), ..., (Ysteps+janela_treino+janela_teste-1)]
test_target = output_dataset[train_length + steps - 1: train_length + test_length, :]

eval_model = test_target.T

In [7]:
trainning_data = pandas.read_csv("ParametrosTreinoReal_" + asset + ".txt")
trainning_data = trainning_data[(trainning_data.LEAK == leaking_rates[0]) & (trainning_data.SCALE == input_scalings[0]) & (trainning_data.SPARSITY == sparsities[0]) & (trainning_data.RADIUS == spectral_radius[0])]
trainning_data

Unnamed: 0,ROUND,WINDOW,NEURONS,LEAK,SPARSITY,SCALE,RADIUS,MSE,RMSE,MAPE,MSE(X),RMSE(X),MAPE(X),MSE(Y),RMSE(Y),MAPE(Y),MSE(Z),RMSE(Z),MAPE(Z),FRAMEWORK
7721,0.0,20.0,20.0,0.75,0.75,0.5,0.3,0.949522,0.682991,1.977635,0.95777,0.978657,2.454823,0.851853,0.922959,2.245472,1.021805,1.010844,1.711016,51926.0
17721,1.0,20.0,20.0,0.75,0.75,0.5,0.3,0.951832,0.682642,1.977661,0.959274,0.979425,2.547175,0.852245,0.923171,2.277889,1.025464,1.012652,1.618361,60794.0
27721,2.0,20.0,20.0,0.75,0.75,0.5,0.3,0.9455,0.681629,2.008085,0.95332,0.976381,2.514265,0.846274,0.919931,2.300246,1.019084,1.009497,1.696204,164022.0
37721,3.0,20.0,20.0,0.75,0.75,0.5,0.3,0.950111,0.682206,1.974965,0.956437,0.977976,2.493937,0.85123,0.922621,2.253756,1.022977,1.011423,1.680885,86852.0
47721,4.0,20.0,20.0,0.75,0.75,0.5,0.3,0.949145,0.682177,1.986095,0.955461,0.977477,2.536929,0.849874,0.921886,2.263112,1.021933,1.010907,1.668977,162268.0
57721,5.0,20.0,20.0,0.75,0.75,0.5,0.3,0.94799,0.682704,1.989922,0.956432,0.977973,2.502396,0.849469,0.921667,2.224479,1.020537,1.010217,1.731085,33027.0
67721,6.0,20.0,20.0,0.75,0.75,0.5,0.3,0.947633,0.683298,2.035506,0.954014,0.976736,2.608002,0.849607,0.921741,2.253003,1.01865,1.009282,1.794716,64152.0
77721,7.0,20.0,20.0,0.75,0.75,0.5,0.3,0.949884,0.683294,1.997075,0.956176,0.977843,2.546702,0.851646,0.922847,2.264272,1.022221,1.011049,1.706076,112784.0
87721,8.0,20.0,20.0,0.75,0.75,0.5,0.3,0.94793,0.683412,1.99583,0.955527,0.977511,2.474274,0.850795,0.922385,2.262913,1.019647,1.009776,1.745135,97533.0
97721,9.0,20.0,20.0,0.75,0.75,0.5,0.3,0.949536,0.682928,1.995803,0.956748,0.978135,2.509826,0.851376,0.9227,2.255524,1.021507,1.010697,1.732286,109832.0


In [13]:
# DEFINE UMA SEMENTE DO GERADOR PSEUDO-ALEATORIO
numpy.random.seed(69)

cols = ["ROUND", "WINDOW", "NEURONS", "LEAK", "SPARSITY", "SCALE", "RADIUS", "MSE", "RMSE", "MAPE", 
        "MSE(X)", "RMSE(X)", "MAPE(X)", "MSE(Y)", "RMSE(Y)", "MAPE(Y)", "MSE(Z)", "RMSE(Z)", "MAPE(Z)",
        "HITS", "FRAMEWORK"]

sample = len(neurons) * rounds
architectures = trainning_data["FRAMEWORK"].astype(int).tolist()
# CORRIGIR DEPOIS
simulation = pandas.DataFrame(numpy.nan, columns = cols, 
                              index = numpy.linspace(start = 0, stop = sample - 1, num = sample).astype(int))
i = 0

for rnd in range(rounds):
    print(rnd)
    for N in neurons:
        for leak in leaking_rates:
            for sparsity in sparsities:
                for scale in input_scalings:
                    for radius in spectral_radius:
                        predict = test_parameters(
                            X = architectures[rnd], 
                            train_dataset = train_dataset, 
                            test_dataset = test_dataset, 
                            train_length = train_length, 
                            test_length = test_length, 
                            transiente = transiente, 
                            N = N, 
                            K = K, 
                            scale = scale, 
                            radius = radius, 
                            sparsity = sparsity, 
                            leak = leak
                        )
                        simulation.iloc[i,:] = [rnd, len_window_input, N, leak, sparsity, scale, radius] + ESN.model_metrics(eval_model, predict) + eval_trend(x = predict.T, y = eval_model) + [architectures[rnd]]
                        i += 1

0
1
2
3
4
5
6
7
8
9


In [14]:
simulation

Unnamed: 0,ROUND,WINDOW,NEURONS,LEAK,SPARSITY,SCALE,RADIUS,MSE,RMSE,MAPE,...,RMSE(X),MAPE(X),MSE(Y),RMSE(Y),MAPE(Y),MSE(Z),RMSE(Z),MAPE(Z),HITS,FRAMEWORK
0,0.0,20.0,20.0,0.75,0.75,0.5,0.3,0.843281,0.662978,1.995595,...,0.928110,2.525412,0.760204,0.871897,2.333169,0.882731,0.939538,1.697859,0.561503,51926.0
1,0.0,20.0,40.0,0.75,0.75,0.5,0.3,0.854585,0.668415,2.122884,...,0.931771,2.713312,0.773103,0.879263,2.459847,0.894297,0.945673,1.810993,0.553268,51926.0
2,0.0,20.0,60.0,0.75,0.75,0.5,0.3,0.857094,0.672113,2.213570,...,0.933097,2.906959,0.772933,0.879166,2.531407,0.900403,0.948895,1.864810,0.561503,51926.0
3,0.0,20.0,80.0,0.75,0.75,0.5,0.3,0.868332,0.676070,2.318136,...,0.940726,2.970293,0.780270,0.883329,2.640850,0.913108,0.955567,2.023417,0.566135,51926.0
4,0.0,20.0,100.0,0.75,0.75,0.5,0.3,0.868822,0.677011,2.447939,...,0.940086,3.251501,0.782976,0.884860,2.687582,0.909993,0.953936,2.147910,0.564076,51926.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,9.0,20.0,420.0,0.75,0.75,0.5,0.3,0.941218,0.718411,3.326344,...,0.974557,4.721986,0.844379,0.918901,3.281185,0.995575,0.997785,3.100231,0.530108,109832.0
246,9.0,20.0,440.0,0.75,0.75,0.5,0.3,0.950450,0.721106,3.374342,...,0.978511,4.822435,0.851968,0.923021,3.344026,1.004568,1.002281,3.129463,0.542975,109832.0
247,9.0,20.0,460.0,0.75,0.75,0.5,0.3,0.949890,0.723698,3.413275,...,0.977332,4.856616,0.854058,0.924152,3.373082,1.001456,1.000728,3.185062,0.551724,109832.0
248,9.0,20.0,480.0,0.75,0.75,0.5,0.3,0.951947,0.727739,3.584759,...,0.975950,5.118096,0.860867,0.927829,3.480342,1.001794,1.000897,3.410635,0.538857,109832.0


In [15]:
reduced = simulation[simulation.NEURONS == 20.0]
best_model = reduced[(reduced.RMSE == numpy.min(reduced.RMSE))]
worst_model = reduced[(reduced.RMSE == numpy.max(reduced.RMSE))]

best_model.index = [0]
worst_model.index = [0]

predict_best_model = train_parameters(
    X = best_model.loc[0,"FRAMEWORK"].astype(int), 
    train_length = train_length, 
    transiente = transiente, 
    N = best_model.loc[0,"NEURONS"].astype(int), 
    K = K, 
    scale = best_model.loc[0,"SCALE"], 
    radius = best_model.loc[0,"RADIUS"], 
    sparsity = best_model.loc[0,"SPARSITY"], 
    leak = best_model.loc[0,"LEAK"]
)

predict_worst_model = train_parameters(
    X = worst_model.loc[0,"FRAMEWORK"].astype(int), 
    train_length = train_length, 
    transiente = transiente, 
    N = worst_model.loc[0,"NEURONS"].astype(int), 
    K = K, 
    scale = worst_model.loc[0,"SCALE"], 
    radius = worst_model.loc[0,"RADIUS"], 
    sparsity = worst_model.loc[0,"SPARSITY"], 
    leak = worst_model.loc[0,"LEAK"]
)

numpy.savetxt("BestPredictor" + asset + ".txt", (train_target[0].T - predict_best_model).T, delimiter = ",")
numpy.savetxt("WorstPredictor" + asset + ".txt", (train_target[0].T - predict_worst_model).T, delimiter = ",")

In [16]:
ESN.model_metrics(eval_model, output_dataset[train_length + steps - 1 - steps: train_length + test_length - steps, :].T)[0:3]

[1.873095712927529, 1.0269395391777096, 5.99352180161738]

In [21]:
eval_trend(x = output_dataset[train_length + steps - 1 - steps: train_length + test_length - steps, :], y = eval_model)

[0.4781266083376222]

In [22]:
simulation.to_csv("ParametrosValidacaoReal_" + asset + ".txt", index = False)