In [5]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import sys
import os
from sklearn import metrics # for the evaluation

In [4]:
n = 3
list(range(2*n - 1, 0, -2))

[5, 3, 1]

In [27]:
# Model

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import GRU
from keras.layers import Flatten

# create network
def create_model(n_stack, features, timesteps=1):
    model = Sequential()
    model.add(Dense(20))
    model.add(GRU(10, input_shape=(timesteps, features), return_sequences=True))
    for i in range(n_stack-1):
        model.add(Dense(20))
        model.add(GRU(10, return_sequences=True))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam')
    
    return model

In [7]:
def unique_cols(df):
    a = df.to_numpy() # df.values (pandas<0.24)
    return (a[0] == a).all(0)

In [8]:
corr_group = {
    'P_SUM': #Var to Predict
        ['S_SUM', # Sum of apparent power S1, S2, S3
        'S_L3', # Apparent power S3 
        'S_L2', # Apparent Power S2
        'S_L1', # Apparent power S1
        'C_phi_L1', #Fund power CosPhi factor L1
        'C_phi_L2', #Fund power CosPhi factor L2
        'P_SUM', # Sum of powers P1, P2, P3
        'P_L1', # Real Power 1
        'P_L2', # Real Power 2
        'P_L3', # Real Power 3
        'Q_SUM', # SUm of fund reactive power
        'Q_L1', #Fundamental Reactive Power Q1
        'Q_L2', #Fundamental Reactive Power Q2
        'Q_L3', #Fundamental Reactive Power Q3
        'I_L1', # Current L1
        'I_L2', # Current L2
        'I_L3'], # Current L3
    'U_L1_N':
        ['U_L1_L2', # Voltage L1_l2
        'U_L3_L1', # Voltage L3_l1
        'U_L3_N', # Voltage L3_N
        'U_L2_L3', # Voltage L2_l3
        'U_L2_N', # Voltage L2_N
        'U_L1_N'], # VOltage L1_N 
    'I_SUM': 
        ['I_SUM'], # Current Sum 
    'F': 
        ['F'], # Measured Freq
    'RealE_SUM':
        ['RealEc_SUM', # Sum of Consumed Energy 
        'RealEc_L1', # Real Energy Consumed L1
        'RealEc_L2', # Real Energy Consumed L2
        'RealEc_L3', # Real Energy Consumed L3
        'RealE_SUM', # Sum of Real Energy 
        'RealE_L2', # Real Energy L2
        'RealE_L3', # Real Energy L3
        'RealE_L1', # Real Energy L1
        'AE_SUM', # Apparent Energy Sum
        'AE_L1', # Apparent Energy L1
        'AE_L2', # Apparent Energy L2
        'AE_L3', # Apparent Energy L3
        'ReacE_L1'], #Reactive Energy L1
    'C_phi_L3': 
        ['C_phi_L3'] #Fund power CosPhi factor L3
}

In [9]:
def create_supervised_dataset(df, target, feats, n_in=1, n_out=1):
    cols, names = list(), list()
    n_vars = len(feats)
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df[feats].shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df[target].shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(1)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(1)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    agg.dropna(inplace=True)
    return agg.values


In [10]:
df = pd.read_csv("data/mongo_data.csv", index_col='ts')
df = df.drop('Unnamed: 0', 1)
df.index = pd.to_datetime(df.index)

df = df.loc[:,np.invert(unique_cols(df))]

# Average window
df_2 = df.groupby(np.arange(len(df))//60).mean()
print(df_2.shape)
df_2.head()

  df = df.drop('Unnamed: 0', 1)


(32146, 39)


Unnamed: 0,S_SUM,U_L1_L2,I_SUM,F,U_L3_L1,C_phi_L1,U_L3_N,RealEc_L1,RealEc_L2,RealEc_L3,...,S_L3,Q_L1,S_L2,U_L2_N,S_L1,U_L1_N,I_L1,RealEc_SUM,I_L2,I_L3
0,146344.076833,412.533667,1.055,50.025667,410.852,0.984667,237.659333,25878590.0,25174770.0,25036660.0,...,48007.9055,7927.5035,48788.740833,238.315667,49547.431,237.497,208.6315,76090130.0,204.727167,202.014167
1,146255.934333,413.033667,1.063167,50.016833,411.3265,0.985333,237.906,25880380.0,25176530.0,25038390.0,...,48109.608833,7822.9535,48646.116333,238.624667,49500.208667,237.713667,208.246,76095400.0,203.868167,202.225167
2,146212.799833,412.486667,1.038,50.017,411.097,0.984167,237.7675,25881340.0,25177480.0,25039320.0,...,48004.779667,8016.505,48883.147167,238.300167,49324.874167,237.612667,207.5935,76098250.0,205.135833,201.903333
3,146342.066167,412.242667,1.021333,50.006833,410.818333,0.9835,237.794167,25882310.0,25178440.0,25040250.0,...,47692.223333,8381.72,49245.222667,238.209667,49404.619167,237.478667,208.042333,76101110.0,206.738167,200.579
4,145752.608167,411.028833,1.0735,50.009833,409.514167,0.985333,236.878167,25883270.0,25179400.0,25041180.0,...,47807.012667,7659.559833,48889.0295,237.5005,49056.565833,236.737833,207.226333,76103960.0,205.8515,201.828


In [11]:
scaler = MinMaxScaler()
d = scaler.fit_transform(df_2)
scaled_df = pd.DataFrame(d, columns=df_2.columns, index=df_2.index)

history_window = 15
model_cells = 10
dense_cells = 5
prediction_window = 1
rmse_res = []

In [28]:
for k in corr_group:
        values = create_supervised_dataset(scaled_df, k, corr_group[k], n_in=history_window, n_out=prediction_window)
        len_values = values.shape[0]
        # split into train and test sets 
        n_train_seconds = int(0.7*len_values) #70% dos valores
        n_cv_seconds =  int(0.9*len_values) #20% dos valores
        train = values[:n_train_seconds, :]
        cv = values[n_train_seconds:n_cv_seconds, :]
        
        # split into input and outputs
        train_X, train_y = train[:, :-1], train[:, -1:]
        cv_X, cv_y = cv[:, :-1], cv[:, -1:]
        # reshape from [samples, timesteps] into [samples, timesteps, rows, columns, features]
        train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
        cv_X = cv_X.reshape((cv_X.shape[0], 1, cv_X.shape[1]))
        model = create_model(5, train_X.shape[2])
        history = model.fit(train_X, train_y, epochs=100, batch_size=72, validation_data=(cv_X, cv_y), shuffle=False)

        #Test for the day after
        n_test_seconds =  int(0.1*len_values) #10% dos valores
        test = values[-n_test_seconds:, :]
        
        test_X, test_y = test[:, :-1], test[:, -1:]
        test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
        # make a prediction
        yhat = model.predict(test_X)
        rmse_res.append(np.sqrt(metrics.mean_squared_error(test_y, yhat)))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100

KeyboardInterrupt: 