In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import sys
from sklearn import metrics # for the evaluation
from settings import CORR_GROUP, SEED1, SEED2, SEED3
from keras.callbacks import EarlyStopping
import tensorflow as tf

In [2]:

def unique_cols(df):
    a = df.to_numpy() # df.values (pandas<0.24)
    return (a[0] == a).all(0)


In [3]:

def create_supervised_dataset(df, target, feats, n_in=1, n_out=1):
    cols, names = list(), list()
    n_vars = len(feats)
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df[feats].shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df[target].shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(1)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(1)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    agg.dropna(inplace=True)
    return agg.values

In [4]:
def write_results(model_desc, res):
    with open('./results/rmse_results2.csv', 'a') as writer:
        writer.write(model_desc+","+",".join([f'{num:.3f}' for num in res])+'\n')

In [5]:
df = pd.read_csv("data/mongo_data.csv", index_col='ts')
df = df.drop('Unnamed: 0', 1)
df.index = pd.to_datetime(df.index)

df = df.loc[:,np.invert(unique_cols(df))]

# Average window
df_2 = df.groupby(np.arange(len(df))//60).mean()

scaler = MinMaxScaler()
d = scaler.fit_transform(df_2)
scaled_df = pd.DataFrame(d, columns=df_2.columns, index=df_2.index)

callback = EarlyStopping(monitor='val_loss', patience=5)
history_window = 15
prediction_window = 1

model = None

  df = df.drop('Unnamed: 0', 1)


In [11]:
import importlib
import AttentionBiLSTM
import BiLSTM
importlib.reload(AttentionBiLSTM)


rmse_res = []
for k in CORR_GROUP:
    values = create_supervised_dataset(scaled_df, k, CORR_GROUP[k], n_in=history_window, n_out=prediction_window)
    len_values = values.shape[0]
    # split into train and test sets 
    n_train_seconds = int(0.7*len_values) #70% dos valores
    n_cv_seconds =  int(0.9*len_values) #20% dos valores
    train = values[:n_train_seconds, :]
    cv = values[n_train_seconds:n_cv_seconds, :]

    # split into input and outputs
    train_X, train_y = train[:, :-1], train[:, -1:]
    cv_X, cv_y = cv[:, :-1], cv[:, -1:]
    # reshape input to be 3D [samples, timesteps, features]
    train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
    cv_X = cv_X.reshape((cv_X.shape[0], 1, cv_X.shape[1]))
    model = AttentionBiLSTM.create_model(30, 0.10, train_X.shape[2])    
    results = []
    for s in [SEED1, SEED2, SEED3]:
        tf.keras.utils.set_random_seed(s)
        model.fit(train_X, train_y, epochs=10, batch_size=72, validation_data=(cv_X, cv_y), shuffle=False, callbacks=[callback])

        #Test for the day after
        n_test_seconds =  int(0.1*len_values) #10% dos valores
        test = values[-n_test_seconds:, :]

        test_X, test_y = test[:, :-1], test[:, -1:]
        test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
        # make a prediction
        yhat = model.predict(test_X)
        results.append(np.sqrt(metrics.mean_squared_error(test_y, yhat)))
    rmse_res.append(sum(results)/3)

here
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10

KeyboardInterrupt: 

In [9]:
print(rmse_res)

[]


In [None]:
a = [0.11500319364914091, 0.030943999997235447, 0.08280160130150306, 0.05204123461519639, 0.22946918508541425, 0.06302188405063386]