In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import tensorflow as tf
import csv
import os
import datetime
import time
import scipy.stats as stats
from sklearn.preprocessing import MinMaxScaler, RobustScaler, PowerTransformer, QuantileTransformer
from sklearn.model_selection import train_test_split
from scipy.stats import norm

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv1D, LSTM, Bidirectional, MaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers.wrappers import TimeDistributed
import keras
import h5py

from keras.utils.vis_utils import plot_model
import pydot
import pydotplus
import graphviz

from sklearn.metrics import r2_score as r2
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_squared_error as MSE

In [5]:
Data_4Y_Edited = pd.read_csv('./House1_Ch1_Combined_Data_New_13-16_Outlier_Edited.csv', encoding='cp949')
Data_4Y_Edited.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2102400 entries, 0 to 2102399
Data columns (total 8 columns):
 #   Column           Dtype  
---  ------           -----  
 0   Unnamed: 0       int64  
 1   Year-Month-Date  object 
 2   Time             object 
 3   Holiday          int64  
 4   Watt             float64
 5   Temp('C)         float64
 6   Humidity(%)      float64
 7   CPI              float64
dtypes: float64(4), int64(2), object(2)
memory usage: 128.3+ MB


In [6]:
Data_4Y_Edited.sort_index(ascending=False).reset_index(drop=True)
from sklearn.preprocessing import PowerTransformer, QuantileTransformer, StandardScaler
scaler=MinMaxScaler()
Scaler_Name=str(scaler)[:-2]

#scale_cols = ["Holiday", "Watt", "Temp('C)", "Humidity(%)", "CPI"]
Data_4Y_Edited.insert(2, "Watt2", Data_4Y_Edited["Watt"])
scale_cols = ["Watt", "Watt2"]
Data_4Y_scaled = scaler.fit_transform(Data_4Y_Edited[scale_cols])
Data_4Y_scaled = pd.DataFrame(Data_4Y_scaled)
Data_4Y_scaled.columns = scale_cols

#Data_4Y_scaled=Data_4Y_scaled[["Watt", "Watt2"]]
Data_4Y_scaled.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2102400 entries, 0 to 2102399
Data columns (total 2 columns):
 #   Column  Dtype  
---  ------  -----  
 0   Watt    float64
 1   Watt2   float64
dtypes: float64(2)
memory usage: 32.1 MB


In [7]:
scaler1=MinMaxScaler()
Data_4Y_Edited["Watt"]=scaler1.fit_transform(Data_4Y_Edited["Watt"].values.reshape(-1,1))

In [8]:
TEST_SIZE = 1*60*24*365 #1년치를 테스트셋으로 사용
WINDOW_SIZE = 5 #얼마 동안의 과거 기반의 데이터로 다음값을 예측할 것인지 설정. 
BATCH_SIZE = 64
VAL_DAYS = 525600

In [9]:
learning = Data_4Y_scaled[:-TEST_SIZE] # 학습에 쓰이는 전체 데이터
VAL_RATIO=VAL_DAYS/len(learning)
VAL_SIZE = int(len(learning)*VAL_RATIO)
TRAIN = learning[:-VAL_SIZE]
VAL = learning[-VAL_SIZE:]
TEST = Data_4Y_scaled[-TEST_SIZE:]
print(len(TRAIN), len(VAL), len(TEST))

1051200 525600 525600


In [10]:
def windowed_dataset(series, window_size, batch_size, shuffle):
    series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))
    if shuffle:
        ds = ds.shuffle(1000)
    ds = ds.map(lambda w: (w[:-1], w[-1]))
    return ds.batch(batch_size).prefetch(1)

In [11]:
train_data = windowed_dataset(TRAIN, WINDOW_SIZE, BATCH_SIZE, False) 
valid_data = windowed_dataset(VAL, WINDOW_SIZE, BATCH_SIZE, False)
test_data = windowed_dataset(TEST, WINDOW_SIZE, BATCH_SIZE, False)

In [14]:
LeakyReLU=tf.keras.layers.LeakyReLU(alpha=0.3)

model = Sequential([
    Conv1D(filters=8, kernel_size=4, padding="causal", activation=LeakyReLU, input_shape=[WINDOW_SIZE, TRAIN.shape[1]]),
    MaxPooling1D(pool_size=2, strides=1, padding="same"),
    Conv1D(filters=8, kernel_size=4, padding="causal", activation=LeakyReLU),
    MaxPooling1D(pool_size=2, strides=1, padding="same"),
    Bidirectional(LSTM(4, activation=LeakyReLU, return_sequences=True), merge_mode="sum"),
    TimeDistributed(Dense(8)),
    Bidirectional(LSTM(4, activation=LeakyReLU, return_sequences=True), merge_mode="sum"),
    TimeDistributed(Dense(4)),
    Bidirectional(LSTM(2, activation=LeakyReLU, return_sequences=False), merge_mode="sum"),
    Dense(1)
                    ])


Nadam = tf.keras.optimizers.Nadam(learning_rate=0.0003)
model.compile(loss='mean_absolute_error', optimizer=Nadam)
print('\n'*4)
model.summary()






Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 5, 8)              72        
                                                                 
 max_pooling1d (MaxPooling1D  (None, 5, 8)             0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 5, 8)              264       
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 5, 8)             0         
 1D)                                                             
                                                                 
 bidirectional (Bidirectiona  (None, 5, 4)             416       
 l)                                                              
                                                   

In [15]:
filename = "./Data/MinMax_LReLU.h5"
model.load_weights(filename)
pred = model.predict(test_data)
pred.shape

(525595, 1)

In [17]:
Inversed_pred = scaler1.inverse_transform(np.array(pred).reshape(-1,1))
Inversed_true = scaler1.inverse_transform(np.array(TEST['Watt'][:-WINDOW_SIZE]).reshape(-1,1))

In [16]:
def mae(y_true, predictions):
    y_true, predictions = np.array(y_true), np.array(predictions)
    return np.mean(np.abs(y_true - predictions))

def smape(y_test, y_pred):
    y_test, y_pred = np.array(y_test), np.array(y_pred)
    return np.mean((np.abs(y_test-y_pred))/(np.abs(y_test)+np.abs(y_pred)))*100/2

In [18]:
#true = TEST['Watt2'][:-WINDOW_SIZE].to_numpy()
#true = true.reshape(-1,1)

RMSE = np.sqrt(MSE(Inversed_true, Inversed_pred))

In [20]:
print('r2 >> %.4f' %r2(Inversed_true, Inversed_pred)) # 1에 가까워야 좋음
print('MAE >> %.4f' %mae(Inversed_true, Inversed_pred)) # 0에 가까워야 좋음
print('RMSE >> %.4f' %RMSE) # 0에 가까워야 좋음
print('SMAPE >> %.4f' %smape(Inversed_true, Inversed_pred)) # 0에 가까워야 좋음

r2 >> -0.3940
MAE >> 260.3021
RMSE >> 637.8751
SMAPE >> 9.3031
