In [4]:
import os
import numpy  as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow               as tf
import tensorflow.keras.backend as K
from tensorflow                 import keras
from sklearn.model_selection    import train_test_split
tf.random.set_seed(777)

# 학습/검증 데이터 (Raw data) 준비

In [8]:
total = pd.read_csv('한국가스공사_시간별 공급량_20181231.csv', encoding='cp949')

In [9]:
total.head()

Unnamed: 0,연월일,시간,구분,공급량
0,2013-01-01,1,A,2497.129
1,2013-01-01,2,A,2363.265
2,2013-01-01,3,A,2258.505
3,2013-01-01,4,A,2243.969
4,2013-01-01,5,A,2344.105


In [10]:
total['구분']

0         A
1         A
2         A
3         A
4         A
         ..
368083    H
368084    H
368085    H
368086    H
368087    H
Name: 구분, Length: 368088, dtype: object

In [11]:
total['구분'].unique()

array(['A', 'B', 'C', 'D', 'E', 'G', 'H'], dtype=object)

In [12]:
# 구분 A~H string을 0~6 integar로 바꿔주기
d_map = {}
for i, d in enumerate(total['구분'].unique()):
    d_map[d] = i
total['구분'] = total['구분'].map(d_map)

In [13]:
total['구분']

0         0
1         0
2         0
3         0
4         0
         ..
368083    6
368084    6
368085    6
368086    6
368087    6
Name: 구분, Length: 368088, dtype: int64

In [14]:
# datatype를 object에서 datetime으로 바궈줌
total['연월일'] = pd.to_datetime(total['연월일'])

In [15]:
#연/월/일/weekday(월화수목금토일을 숫자로) 분리
total['year'] = total['연월일'].dt.year
total['month'] = total['연월일'].dt.month
total['day'] = total['연월일'].dt.day
total['weekday'] = total['연월일'].dt.weekday

In [16]:
train_years = [2013,2014,2015,2016,2017]
val_years = [2018]

In [17]:
train = total[total['year'].isin(train_years)]
val = total[total['year'].isin(val_years)]

In [23]:
features = ['구분', 'month', 'day', 'weekday', '시간']
train_x = np.array(train[features])
train_y = np.array(train['공급량'])

val_x = np.array(val[features])
val_y = np.array(val['공급량'])

In [24]:
train_x.shape

(306768, 5)

In [26]:
train_x_reshape = train_x.reshape(train_x.shape[0],train_x.shape[1],1)
val_x_reshape = val_x.reshape(val_x.shape[0],val_x.shape[1],1)

In [27]:
train_x_reshape.shape, train_y.shape, val_x_reshape.shape, val_y.shape

((306768, 5, 1), (306768,), (61320, 5, 1), (61320,))

In [46]:
train_x_reshape[24]

array([[0],
       [1],
       [2],
       [2],
       [1]], dtype=int64)

## 1D-CNN(Convolution Neural Network)

#### CNN 학습 중 일정 epoch 마다 정확도(Accuracy) 표시 함수 생성

In [28]:
PrintAccPerEpochs = 200

class AccuracyPerEpoch(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        keras.callbacks.Callback()
        if epoch%PrintAccPerEpochs == 0:
            print("[{} Epochs] Accuracy : {:.2f}% ".format(epoch, logs["accuracy"]*100))

#### hyperparameter 설정

In [29]:
Epoch = 2000

Kenel_S = [3, 4, 5, 10, 20]
Dense_N = [6, 12, 18, 21]
Learn_R = [0.001]

print('Number of case : %d'%(len(Kenel_S)*len(Dense_N)*len(Learn_R)))

Number of case : 20


In [30]:
TrainData = train_x_reshape
TrainLabl = train_y

TestData = val_x_reshape
TestLabl = val_y

In [32]:
Tr_result_temp = np.zeros((len(Kenel_S)*len(Dense_N)*len(Learn_R) , 5))
cnt = 0

#     exec("TrainData = TrainData_"+ SG_D)
#     exec("TrainLabl = TrainLabl_"+ SG_D)
#     exec("TestData  = TestData_" + SG_D)
#     exec("TestLabl  = TestLabl_" + SG_D)

#     print('\n\n\n\n#####################################################')
#     print('################## Model for "' + SG_D + '" ##################')
#     print('#####################################################\n\n')

for i in range(len(Kenel_S)):
    kernelSize = Kenel_S[i]

    for j in range(len(Dense_N)):
        DenseNeuron = Dense_N[j]

        for k in range(len(Learn_R)):
            learningRate = Learn_R[k]

            ################ CNN 구조 재설계 ################

            def CNN_model(input_data):
                model = keras.Sequential()
                model.add(keras.layers.Conv1D(4, kernel_size=kernelSize, strides=1, padding='same', activation='relu',
                                              input_shape=(input_data.shape[1],input_data.shape[2])))           # Convolution layer 1
                model.add(keras.layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))                    # Pooling layer 1
                model.add(keras.layers.Conv1D(8,  kernel_size=kernelSize, strides=1, padding='same', activation='relu')) # Convolution layer 2
                model.add(keras.layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))                    # Pooling layer 2
                model.add(keras.layers.Conv1D(16, kernel_size=kernelSize, strides=1, padding='same', activation='relu')) # Convolution layer 3
                model.add(keras.layers.MaxPooling1D(pool_size=2, strides=2, padding='same')) 
                model.add(keras.layers.Flatten())
                model.add(keras.layers.Dense(units = DenseNeuron, activation='relu'))
                model.add(keras.layers.Dense(units = 1 , activation='softmax'))                                 # Output Layer

                model.compile(optimizer= keras.optimizers.Adam(learning_rate = learningRate),
                              loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
                return model

            model = CNN_model(TrainData)
            model.summary()

            print('\n\n[Grid search] case No.%d'%(cnt+1))
#             print('SG_D :' + SG_D)
            print('Kernel size of each conv. layer : %d'%(kernelSize))
            print('Num of neurons in Dense_H layer : %d'%(DenseNeuron))
            print('Learning rate : %.4f'%(learningRate))

            ################ CNN 학습 ################

            print("\n↓↓↓↓↓ Start CNN training ↓↓↓↓↓\n")

            tf.random.set_seed(777)
            History = model.fit(TrainData, TrainLabl, epochs=Epoch, verbose=0,
                                validation_data=(TestData, TestLabl), callbacks=[AccuracyPerEpoch()])

            # 학습완료된 모델 기반 TestData 진단 정확도
            Loss, Accuracy = model.evaluate(TestData,  TestLabl, verbose=0)
            print("\n[Final Epochs] Accuracy : %.2f"%(Accuracy*100))                
            print('\n\n#####################################################\n\n')

            ################ 학습결과 저장 ################

            # Hyperparameter 케이스별 모델성능 저장
            Tr_result_temp[cnt,0] = cnt
            Tr_result_temp[cnt,1] = kernelSize
            Tr_result_temp[cnt,2] = DenseNeuron
            Tr_result_temp[cnt,3] = learningRate
            Tr_result_temp[cnt,4] = Accuracy

            cnt+=1

            # 학습완료된 CNN 모델 저장
#             model.save("[CNN_models]/CNN_"+ SG_D +"_case%d.h5"%(cnt))

            # CNN 학습과정 저장
#             Hist = pd.DataFrame(np.zeros((Epoch,4)))
#             Hist.iloc[:,0] = np.array(History.history['loss'])
#             Hist.iloc[:,1] = np.array(History.history['val_loss'])
#             Hist.iloc[:,2] = np.array(History.history['accuracy'])
#             Hist.iloc[:,3] = np.array(History.history['val_accuracy'])
#             Hist.to_csv("[CNN_history]/CNN_"+ SG_D +"_case%d_history.csv"%(cnt),header=None,index=None)

Tr_result_temp_pd = pd.DataFrame(Tr_result_temp, columns=['Case', 'Kernel_S', 
                                                          'Dense_N', 'Learn_R', 'Accuracy'])
# Tr_result_temp_pd.to_csv("[Grid search]/Training_result_"+ SG_D +".csv", index=None)                
            

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_3 (Conv1D)           (None, 5, 4)              16        
                                                                 
 max_pooling1d_3 (MaxPooling  (None, 3, 4)             0         
 1D)                                                             
                                                                 
 conv1d_4 (Conv1D)           (None, 3, 8)              104       
                                                                 
 max_pooling1d_4 (MaxPooling  (None, 2, 8)             0         
 1D)                                                             
                                                                 
 conv1d_5 (Conv1D)           (None, 2, 16)             400       
                                                                 
 max_pooling1d_5 (MaxPooling  (None, 1, 16)           

KeyboardInterrupt: 