In [1]:
import pandas as pd
import tensorflow as tf

import autokeras as ak

from settings import INPUT_FILE, CORR_GROUP, OUTPUT_FILE
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import logging

logging.basicConfig(format='%(asctime)s %(message)s', filename='autokeras.log', level=logging.DEBUG)
logging.info('Started training')



In [2]:
def unique_cols(df):
    a = df.to_numpy() # df.values (pandas<0.24)
    return (a[0] == a).all(0)

def create_supervised_dataset(df, target, feats, n_in=1, n_out=1):
    cols, names = list(), list()
    n_vars = len(feats)
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df[feats].shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df[target].shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(1)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(1)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    agg.dropna(inplace=True)
    return agg.values

In [3]:
df = pd.read_csv(INPUT_FILE, index_col='ts')
#df = df.drop('Unnamed: 0', 1)
df.index = pd.to_datetime(df.index)

df_2 = df.loc[:,np.invert(unique_cols(df))]
# Average window
# df_2 = df.groupby(np.arange(len(df))//60).mean()

scaler = MinMaxScaler()
d = scaler.fit_transform(df_2)
scaled_df = pd.DataFrame(d, columns=df_2.columns, index=df_2.index)
results = []
train_split = int(0.7*scaled_df.shape[0])
val_split = int(0.9*scaled_df.shape[0])
predict_from = 1
predict_until = 1
lookback = 15

In [4]:
for k in CORR_GROUP:
    data = create_supervised_dataset(scaled_df, k, CORR_GROUP[k], n_in=lookback, n_out=1)
    data_train = data[:train_split]
    data_cv = data[train_split:val_split]
    
    data_x_train, data_y_train = data_train[:, :-1], data_train[:, -1]
    print(data_x_train.shape, data_y_train.shape)

    data_x_val, data_y_val = data_cv[:, :-1], data_cv[:, -1]
    print(data_x_val.shape, data_y_val.shape)
    
    data_test = data[val_split:int(1*scaled_df.shape[0])]
    data_x_test, data_y_test = data_test[:, :-1], data_test[:, -1]
    data_x_test = data_x_test.reshape(-1, lookback, len(CORR_GROUP[k]))

    clf = ak.TimeseriesForecaster(
        lookback=lookback,
        predict_from=predict_from,
        predict_until=predict_until,
        max_trials=3,
        project_name=f'autokeras_ml/{k}_forecaster',
        objective="mean_squared_error",
        overwrite=True
    )

    # Train the TimeSeriesForecaster with train data
    clf.fit(
        x=data_x_train,
        y=data_y_train,
        validation_data=(data_x_val, data_y_val),
        epochs=10,
        batch_size=64
    )

    model = clf.export_model()
    print(model.summary())
    model.save(f'models/{k}_autokeras.h5')
    # Evaluate the best model with testing data.
    print(model.evaluate(data_x_test, data_y_test)[0])
    break


Trial 3 Complete [00h 01m 53s]
mean_squared_error: 0.014870687387883663

Best mean_squared_error So Far: 0.010276983492076397
Total elapsed time: 00h 06m 03s
INFO:tensorflow:Oracle triggered exit
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: ./autokeras_ml/P_SUM_forecaster/best_model/assets
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 15, 285)]         0         
                                                                 
 lstm (LSTM)                 (None, 15, 285)           650940    
                                                                 
 lstm_1 (LSTM)               (None, 285)               650940    
                                                                 
 regression_head_1 (Dense)   (None, 1)                 286       

ValueError: in user code:

    File "/home/poliveira/Electricity-Forecasting/lib/python3.8/site-packages/keras/engine/training.py", line 1557, in test_function  *
        return step_function(self, iterator)
    File "/home/poliveira/Electricity-Forecasting/lib/python3.8/site-packages/keras/engine/training.py", line 1546, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/poliveira/Electricity-Forecasting/lib/python3.8/site-packages/keras/engine/training.py", line 1535, in run_step  **
        outputs = model.test_step(data)
    File "/home/poliveira/Electricity-Forecasting/lib/python3.8/site-packages/keras/engine/training.py", line 1499, in test_step
        y_pred = self(x, training=False)
    File "/home/poliveira/Electricity-Forecasting/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/home/poliveira/Electricity-Forecasting/lib/python3.8/site-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 15, 285), found shape=(None, 15, 19)


In [11]:
k='P_SUM'
train_split = int(0.7*scaled_df.shape[0])
val_split = int(0.9*scaled_df.shape[0])
data_train = scaled_df[:train_split]
data_cv = scaled_df[train_split:val_split]
data_x = data_train[CORR_GROUP[k]]
data_y = data_train[k]

data_x_val = data_cv[CORR_GROUP[k]]
data_y_val = data_cv[k]


(27687, 19) (27687,)
(7911, 19) (7911,)


In [9]:
27687/7911

3.4998103905953735