In [12]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline

In [3]:
solar_data = pd.read_csv('train.csv',encoding='utf-8')
solar_data.drop(['Day', 'Hour','Minute'], axis='columns', inplace=True)

In [4]:
def create_dataset(solar_data, index, input_days=7, output_days=2):
    dataX, dataY = [], []
    for i in range(0,48*7):
        dataX += list(np.array(solar_data.loc[index+i].tolist()))
    for i in range(48*7,48*(7+2)):
        dataY += solar_data.loc[[0+i],['TARGET']].values.tolist()
    return np.array(dataX), np.array(dataY)

In [5]:
# 데이터셋 생성
input_data, output_data = [], []
last_index = 3*365*48-48*(7+2)
index_list = list(range(0,last_index+1,48))
for i in index_list:
    X, Y = create_dataset(solar_data,i)
    input_data.append(X)
    output_data.append(Y)

In [22]:
# 데이터셋 분배
x_train, x_test, y_train, y_test = train_test_split(np.array(input_data), np.array(output_data), test_size = 0.1)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1)

x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_val = np.reshape(x_val, (x_val.shape[0], x_val.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [23]:
class CustomHistory(keras.callbacks.Callback):
    def init(self):
        self.train_loss = []
        self.val_loss = []
        
    def on_epoch_end(self, batch, logs={}):
        self.train_loss.append(logs.get('loss'))
        self.val_loss.append(logs.get('val_loss'))

In [24]:
model = Sequential()
for i in range(2):
    model.add(LSTM(32, batch_input_shape=(1, 2016, 1), stateful=True, return_sequences=True))
    model.add(Dropout(0.3))
model.add(LSTM(32, batch_input_shape=(1, 2016, 1), stateful=True))
model.add(Dropout(0.3))
model.add(Dense(1))

In [25]:
# 모델 학습과정 설정
model.compile(loss='mean_squared_error', optimizer='adam')

In [26]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_7 (LSTM)                (1, 2016, 32)             4352      
_________________________________________________________________
dropout_6 (Dropout)          (1, 2016, 32)             0         
_________________________________________________________________
lstm_8 (LSTM)                (1, 2016, 32)             8320      
_________________________________________________________________
dropout_7 (Dropout)          (1, 2016, 32)             0         
_________________________________________________________________
lstm_9 (LSTM)                (1, 32)                   8320      
_________________________________________________________________
dropout_8 (Dropout)          (1, 32)                   0         
_________________________________________________________________
dense_2 (Dense)              (1, 1)                   

In [None]:
# 모델 학습
custom_hist = CustomHistory()
custom_hist.init()

for i in range(200):
    model.fit(x_train, y_train, epochs=1, batch_size=1, shuffle=False, callbacks=[custom_hist], validation_data=(x_val, y_val))
    model.reset_states()

 36/880 [>.............................] - ETA: 23:38 - loss: 245.4428

In [None]:
# 학습과정 확인
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [152]:
# 모델 평가
trainScore = model.evaluate(X_train, y_train, verbose=0)
print('Train Score: ', trainScore)
valScore = model.evaluate(X_val, y_val, verbose=0)
print('Validataion Score: ', valScore)
testScore = model.evaluate(X_test, y_test, verbose=0)
print('Test Score: ', testScore)

Train Score:  313.8495788574219
Validataion Score:  319.575927734375
Test Score:  315.566162109375


In [None]:
# 모델 사용하기
look_ahead = 100
xhat = x_test[0]
predictions = np.zeros((look_ahead,1))
for i in range(look_ahead):
    prediction = model.predict(np.array([xhat]), batch_size=1)
    predictions[i] = prediction
    xhat = np.vstack([xhat[1:],prediction])
    
plt.figure(figsize=(12,5))
plt.plot(np.arange(look_ahead),predictions,'r',label="prediction")
plt.plot(np.arange(look_ahead),y_test[:look_ahead],label="test function")
plt.legend()
plt.show()