In [1]:
import sys
import os
from pathlib import Path

root = Path(os.getcwd()).parent.parent
sys.path.append(str(Path(os.getcwd()).parent))

In [2]:
from loader import Power, Weather
from constant import FeatureType
from keras.layers import LSTM, RepeatVector, TimeDistributed, Dense
from tensorflow.python.keras.optimizer_v2.rmsprop import RMSProp
from keras.models import Sequential
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_squared_error,r2_score
from datetime import datetime, timedelta

import json
import argparse
import numpy as np
import pandas as pd
import tensorflow as tf

### Setting

In [3]:
parser = argparse.ArgumentParser()
args = parser.parse_args("")

# ====== Path ====== #
args.root = root

# ====== Model ====== #
args.frame_in = 72
args.frame_out = 24
args.batch_size = 64
args.learning_rate = 0.001
args.patience = 30
args.shuffle = True

# ====== Data ====== #
args.years = [2017, 2018, 2019]
args.region = "Jindo"
args.station = 192
args.ratio = [0.6, 0.2, 0.2]

# ====== Features ====== #
features = [FeatureType.SUNSHINE,
            FeatureType.GROUND_TEMPERATURE,
            FeatureType.HUMIDITY,
            FeatureType.WIND_SPEED,
            FeatureType.WIND_DIRECTION,
            FeatureType.TEMPERATURE,
            FeatureType.VISIBILITY,
            FeatureType.PRECIPITATION,
            FeatureType.STEAM_PRESSURE,
            FeatureType.DEW_POINT_TEMPERATURE,
            FeatureType.ATMOSPHERIC_PRESSURE]

### Model

In [4]:
def encoder_decoder(args, dataset, budget):
    print("train")

    X_train, y_train = dataset['train']
    X_val, y_val = dataset['val']
    X_test, y_test = dataset['test']

    with tf.device('/GPU:0'):
        model = Sequential()
        optimizer = RMSProp(learning_rate=args.learning_rate)

        model.add(LSTM(256, input_shape=(args.frame_in, args.feature_len)))
        model.add(RepeatVector(args.frame_out))
        model.add(LSTM(256, return_sequences=True))
        model.add(TimeDistributed(Dense(256, activation='relu')))
        model.add(TimeDistributed(Dense(1)))
        model.compile(loss='mse', optimizer=optimizer)

        print("model name: %s" % args.name)
        
        model_root_path = os.path.join(root, 'models', args.name)
        Path(model_root_path).mkdir(parents=True, exist_ok=True)
        checkpoint_path = os.path.join(model_root_path, 'model-{epoch:03d}-{val_loss:03f}.h5')
        model_path = os.path.join(model_root_path, 'model.h5')
        
        callback = EarlyStopping(monitor='val_loss', patience=args.patience)
        checkpoint = ModelCheckpoint(checkpoint_path, verbose=1, monitor='val_loss', save_best_only=True)

        history = model.fit(X_train, y_train, batch_size=args.batch_size, epochs=budget,
                            validation_data=(X_val, y_val), callbacks=[callback, checkpoint], shuffle=args.shuffle)
        model.save(model_path)
        
        return model
    
def predict(partition, model, scaler):
    X_test, y_test = partition['test']
    y_test = y_test.reshape((y_test.shape[0] * y_test.shape[1], y_test.shape[2]))
    y_test = scaler.inverse_transform(y_test)
    
    y_pred = model.predict(X_test)
    y_pred = y_pred.reshape((-1, 1))
    y_pred = scaler.inverse_transform(y_pred)
    
    print(y_test.shape, y_pred.shape)
    
    zero_indices = np.where(y_test == 0)
    y_test_adjusted = np.delete(y_test, zero_indices)
    y_pred_adjusted = np.delete(y_pred, zero_indices)

    rmse = np.sqrt(mean_squared_error(y_test_adjusted, y_pred_adjusted))
    max_min = np.max(y_test_adjusted) - np.min(y_test_adjusted)
    nrmse = rmse / max_min
    
    print('rmse:', rmse)
    print('nrmse:', nrmse)
    
    return y_test, y_pred, nrmse

def save(y_pred_list, y_test, test_start, test_end, name='result.csv'):
    df = pd.DataFrame()
    for i in range(len(features)):
        y_pred_list[i] = y_pred_list[i].reshape((y_pred_list[i].shape[0]))
        df['%dth model' % (i + 1)] = y_pred_list[i].tolist()
    y_test = y_test.reshape((y_test.shape[0]))
    df['y_test'] = y_test.tolist()

    full_idx = pd.date_range(start=test_start+timedelta(days=3), end=test_end, freq='H')
    full_idx = full_idx[:y_test.shape[0]]
    df['time'] = full_idx
    df = df.set_index('time')

    result_path = os.path.join(root, 'results')
    result_name = os.path.join(result_path, name)
    df.to_csv(result_name)

### Experiment

In [5]:
power = Power(args)
weather = Weather(args, features)

power_data = power.get_data()
scaler = power_data['scaler']

y_pred_list = []
nrmse_list = []

for i in range(len(features)):
    weather_data = weather.get_data(i+1)
    setattr(args, 'feature_len', i + 1)
    setattr(args, 'name', 'normal_hp_model_%d' % (i + 1))

    train = [weather_data['train'], power_data['train']]
    val = [weather_data['val'], power_data['val']]
    test = [weather_data['test'], power_data['test']]

    dataset = {'train': train, 'val': val, 'test': test}
    
#     model = encoder_decoder(args, dataset, budget=256)
    model_path = os.path.join(root, 'models', args.name, 'model.h5')
    model = load_model(model_path)
    y_test, y_pred, nrmse = predict(dataset, model, scaler)
    y_pred_list.append(y_pred)
    nrmse_list.append(nrmse)
    
save(y_pred_list, y_test, power.test_start, power.test_end, name='normal_setting.csv')

train start date: 2017-01-01 00:00:00
train end date: 2018-10-19 23:00:00
val start date: 2018-10-20 00:00:00
val end date: 2019-05-26 23:00:00
test start date: 2019-05-27 00:00:00
test end date: 2019-12-31 23:00:00
train start date: 2017-01-01 00:00:00
train end date: 2018-10-19 23:00:00
val start date: 2018-10-20 00:00:00
val end date: 2019-05-26 23:00:00
test start date: 2019-05-27 00:00:00
test end date: 2019-12-31 23:00:00
0 missing dates
0 value(s) are not zero
0 missing dates
0 value(s) are not zero
0 missing dates
0 value(s) are not zero
(15768,)
(5256,)
(5256,)
missing dates: []
missing dates: ['2018-01-13 09:00', '2018-01-13 10:00', '2018-01-13 11:00', '2018-01-13 12:00', '2018-01-13 13:00']
missing dates: []
(5184, 1) (5184, 1)
rmse: 48.71345076573177
nrmse: 0.1586757353932631
missing dates: []
missing dates: ['2018-01-13 09:00', '2018-01-13 10:00', '2018-01-13 11:00', '2018-01-13 12:00', '2018-01-13 13:00']
missing dates: []
(5184, 1) (5184, 1)
rmse: 55.28152928440092
nrmse

In [6]:
nrmse_list

[0.1586757353932631,
 0.18007012796221797,
 0.17241647899619988,
 0.16342813839006395,
 0.1903091257038708,
 0.18494658873979072,
 0.15662439245246407,
 0.16073085156310288,
 0.17277546451744494,
 0.18365650736664915,
 0.17399083774075524]