In [1]:
import time
import json
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Reshape, Conv1D, Subtract, Activation, Flatten, Lambda, Add, Multiply, Bidirectional, Dense, BatchNormalization, SpatialDropout1D, LSTM
from tensorflow.keras.losses import MeanSquaredError, MeanAbsoluteError, BinaryCrossentropy
from tensorflow.keras.metrics import Accuracy
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import plot_model


import sys
sys.path.append("../")
from config import APP_CONFIG, JACK_KELLY_INPUT_CONFIG, DEFAULT_STEP_SIZE, FORWARD_FILLING_WINDOW

from __models__ import *
from util import *

In [2]:
# disable tensor INFO
import logging
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'  # filter tensorflow INFO
logging.getLogger('tensorflow').setLevel(logging.FATAL)

In [3]:
# create weigth folder
model_folder = './benchmark/'
if not os.path.exists(model_folder):
    os.makedirs(model_folder)

In [None]:
# training s2s
def __training_s2s__(y_name='app_power', model_type='reg'):
    epochs = 50
    list_models = [JKDAE, JKLSTM, S2S, BiTCNResidual, FullyConvolutionalNetwork]
    list_devices = ["GPU:0", "GPU:1", "GPU:2", "GPU:3"]
    lst_x_names=['agg_power']
    lst_y_names=[y_name]
    off_on_ratio=4
    split_ratio = (0.9,0.05,0.05)
    real_data_only=True

    for appliance in APP_CONFIG.keys():
        # load data
        [X], [Y] = load_data(appliance, lst_x_names, lst_y_names, real_data_only=real_data_only, off_on_ratio=off_on_ratio)
        scaler = STDScaler(X)
        X = scaler.transfer(X)
        if model_type == 'reg':
            scaler_ = STDScaler(Y)
            Y = scaler_.transfer(Y)

        train_x, val_x, test_x = split_data(X, split_ratio)
        train_y, val_y, test_y = split_data(Y, split_ratio)

        train_generator = S2SDataGenerator(train_x, train_y)
        val_generator = S2SDataGenerator(val_x, val_y)
        test_generator = S2SDataGenerator(test_x, test_y)

        for model_prototype in list_models:
            with tf.distribute.MirroredStrategy(list_devices).scope():
                sequence_length = JACK_KELLY_INPUT_CONFIG[appliance]//DEFAULT_STEP_SIZE
                model_class = model_prototype(sequence_length, sequence_length, appliance, model_type, epochs)

                print(model_class.name)
                model_class.train(train_generator, val_generator)

In [None]:
# __training_s2s__(y_name='activate', model_type='clf')

In [None]:
class S2SDataGenerator_beta(keras.utils.Sequence):
    def __init__(self, X, Y, batch_size=32, shuffle=True):
        self.X, self.Y= X, Y
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(self.X[0].shape[0])
        self.on_epoch_end()
    
    def __len__(self):
        'Denotes the number of batches per epoch'
        return self.X[0].shape[0] // self.batch_size

    def __getitem__(self, index):
        'Generate one batch of data'
        selected = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        return (
            [ var[selected] for var in self.X],
            [ var[selected] for var in self.Y],
        )

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        np.random.shuffle(self.indexes)

In [None]:
epochs = 50
list_models = [JKDAE, JKLSTM, S2S, BiTCNResidual, FullyConvolutionalNetwork]
list_devices = ["GPU:0", "GPU:1", "GPU:2", "GPU:3"]
x_name=['agg_power']
y_name=['app_power', 'std_3']
off_on_ratio=4
split_ratio = (0.9,0.05,0.05)
real_data_only=True

for appliance in APP_CONFIG.keys():
    if appliance == 'fridge':
        continue
    # load data
    [X], [Y, std] = load_data(appliance, x_name, y_name, real_data_only=real_data_only, off_on_ratio=off_on_ratio)

    scaler = STDScaler(X)
    X = scaler.transfer(X)
    scaler_ = STDScaler(Y)
    Y = scaler_.transfer(Y)
    std = std / scaler_.std

    train_x, val_x, test_x = split_data(X, split_ratio)
    train_y, val_y, test_y = split_data(Y, split_ratio)
    train_std, val_std, test_std = split_data(Y, split_ratio)
    
    train_generator = S2SDataGenerator_beta([train_x], [train_y, train_std])
    val_generator = S2SDataGenerator_beta([val_x], [val_y, val_std])
    test_generator = S2SDataGenerator_beta([test_x], [test_y, test_std])
                
    for model_prototype in list_models:
        with tf.distribute.MirroredStrategy(list_devices).scope():
            print('******** {}-{} ********'.format(appliance, model_prototype))
            sequence_length = JACK_KELLY_INPUT_CONFIG[appliance]//DEFAULT_STEP_SIZE
            model_class = STD(sequence_length, sequence_length, appliance, 'reg_std', epochs, model_prototype)
            model_class.train(train_generator, val_generator)

In [4]:
def my_loss_fn(neigh_step=1, coef=0.25):
    def __(y_true, y_pred):
        paddings = tf.constant([[0, 0,], [neigh_step, neigh_step]])
        y_pred_padding = tf.pad(y_pred, paddings, "CONSTANT")
        smooth_difference = tf.square(
            y_pred_padding[:, neigh_step:-neigh_step] - y_pred_padding[:, :-neigh_step*2]
        ) + tf.square(
            y_pred_padding[:, neigh_step:-neigh_step] - y_pred_padding[:, neigh_step*2:]
        )

        smooth_loss = tf.math.log(smooth_difference + 1) * tf.cast(
            smooth_difference > tf.reshape(tf.math.reduce_std(smooth_difference, axis=1)**2, [-1,1]), tf.float32)

        mse_loss = tf.square(y_true - y_pred)
        return tf.reduce_mean(mse_loss, axis=-1) + coef * tf.reduce_mean(smooth_loss, axis=-1)
    return __

In [6]:
model_type = 'reg_std_loss'
epochs = 50
list_models = [JKDAE, JKLSTM, S2S, BiTCNResidual, FullyConvolutionalNetwork]
list_devices = ["GPU:0", "GPU:1", "GPU:2", "GPU:3"]
lst_x_names=['agg_power']
lst_y_names=['app_power']
off_on_ratio=4
split_ratio = (0.9,0.05,0.05)
real_data_only=True

for appliance in APP_CONFIG.keys():
    # load data
    [X], [Y] = load_data(appliance, lst_x_names, lst_y_names, real_data_only=real_data_only, off_on_ratio=off_on_ratio)
    scaler = STDScaler(X)
    X = scaler.transfer(X)
    if model_type == 'reg_std_loss':
        scaler_ = STDScaler(Y)
        Y = scaler_.transfer(Y)

    train_x, val_x, test_x = split_data(X, split_ratio)
    train_y, val_y, test_y = split_data(Y, split_ratio)

    train_generator = S2SDataGenerator(train_x, train_y)
    val_generator = S2SDataGenerator(val_x, val_y)
    test_generator = S2SDataGenerator(test_x, test_y)

    for model_prototype in list_models:
        with tf.distribute.MirroredStrategy(list_devices).scope():
            sequence_length = JACK_KELLY_INPUT_CONFIG[appliance]//DEFAULT_STEP_SIZE
            model_class = model_prototype(sequence_length, sequence_length, appliance, model_type, epochs)
            model_class.recompile_model('adam', my_loss_fn(), ['mae'])
            
            print(model_class.name)
            model_class.train(train_generator, val_generator)

Loading Data for kettle
Time elapse: 25.16s
kettle_jkdae_128to128_reg_std_loss
Train for 6589 steps, validate for 366 steps
Epoch 1/50
   1/6589 [..............................] - ETA: 41:57

TypeError: Input 'y' of 'Sub' Op has type float32 that does not match type int64 of argument 'x'.

In [10]:
Y

array([[1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       ...,
       [1, 1, 1, ..., 1, 1, 0],
       [1, 1, 1, ..., 1, 1, 1],
       [0, 0, 0, ..., 0, 0, 0]])

In [11]:
lst_y_names

['app_power']

In [12]:
[X], [Y] = load_data(appliance, lst_x_names, lst_y_names, real_data_only=real_data_only, off_on_ratio=off_on_ratioc

Loading Data for kettle
Time elapse: 24.59s


In [15]:
[X], [Y] = load_data(appliance, lst_x_names, ['app_power'], real_data_only=real_data_only, off_on_ratio=off_on_ratio)

Loading Data for kettle
Time elapse: 25.01s


In [17]:
load_data

<function util.load_data(appliance, lst_x_names, lst_y_names, real_data_only=True, off_on_ratio=1, random_seed=2021)>

In [18]:
def load_data(appliance, lst_x_names, lst_y_names, 
              real_data_only=True, off_on_ratio=1, random_seed=RANDOM_SEED):
    print('Loading Data for {}'.format(appliance))
    st = time.time()
    df = pd.read_csv('..//data/dataset/{}_beta.csv'.format(appliance), 
                    dtype={'s2q_agg_power': str})

    # get real data only
    if real_data_only:
        df = df[df['house_id'] != -1]
        
    # get on & off data
    df_on = df[df['contain_activation'] == 1][lst_x_names + lst_y_names]
    df_off = df[df['contain_activation'] == 0].sample(
        frac=off_on_ratio, 
        random_state=random_seed, 
        replace= True if off_on_ratio > 1 else False
    )[lst_x_names + lst_y_names]
    
    # concat on and off, then shuffle
    df = pd.concat([df_on, df_off]).sample(frac=1, random_state=random_seed)
    
    X = [ np.array([ json.loads(l) for l in df[x_name]]) for x_name in lst_x_names]
    Y = [ np.array([ json.loads(l) for l in df[y_name]]) for y_name in lst_y_names]
    
    print('Time elapse: {:.2f}s'.format(time.time()-st))
    return X, Y

In [19]:
df = pd.read_csv('..//data/dataset/{}_beta.csv'.format(appliance), 
                    dtype={'s2q_agg_power': str})