<a href="https://colab.research.google.com/github/KangHwan-Cha/MyStudy/blob/main/TensorProject/Category5B__NewProject_HEPC-training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Import

In [92]:
import urllib
import os
import zipfile
import pandas as pd

In [93]:
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, Dense, LSTM, Bidirectional
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint

In [94]:
def download_and_extract_data():
    url = 'https://storage.googleapis.com/download.tensorflow.org/data/certificate/household_power.zip'
    urllib.request.urlretrieve(url, 'household_power.zip')
    with zipfile.ZipFile('household_power.zip', 'r') as zip_ref:
        zip_ref.extractall()

In [95]:
download_and_extract_data()

In [96]:
df = pd.read_csv('household_power_consumption.csv', sep=',', infer_datetime_format=True, index_col='datetime', header=0)
df.head(5)

Unnamed: 0_level_0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006-12-16 17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
2006-12-16 17:25:00,5.36,0.436,233.63,23.0,0.0,1.0,16.0
2006-12-16 17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
2006-12-16 17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
2006-12-16 17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0


### Data Nomalization

In [97]:
def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data

In [98]:
# FEATURES에 데이터프레임의 Column 개수 대입
N_FEATURES = len(df.columns)

# 데이터프레임을 numpy array으로 가져와 data에 대입
data = df.values

# 데이터 정규화
data = normalize_series(data, data.min(axis=0), data.max(axis=0))
data

array([[0.43377912, 0.47826087, 0.04036551, ..., 0.        , 0.01282051,
        0.85      ],
       [0.55716135, 0.49885584, 0.0355582 , ..., 0.        , 0.01282051,
        0.8       ],
       [0.55867127, 0.56979405, 0.03420739, ..., 0.        , 0.02564103,
        0.85      ],
       ...,
       [0.03710095, 0.        , 0.05983313, ..., 0.        , 0.        ,
        0.        ],
       [0.03559103, 0.        , 0.06515693, ..., 0.        , 0.        ,
        0.        ],
       [0.03774806, 0.        , 0.06730234, ..., 0.        , 0.01282051,
        0.        ]])

In [99]:
pd.DataFrame(data).describe()

Unnamed: 0,0,1,2,3,4,5,6
count,86400.0,86400.0,86400.0,86400.0,86400.0,86400.0,86400.0
mean,0.156411,0.147141,0.064697,0.152278,0.01695,0.024085,0.375711
std,0.14404,0.134578,0.0139,0.139343,0.086787,0.097022,0.433595
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.021786,0.0,0.055344,0.024752,0.0,0.0,0.0
50%,0.131795,0.132723,0.065713,0.123762,0.0,0.0,0.0
75%,0.239431,0.224256,0.074652,0.227723,0.0,0.012821,0.85
max,0.979077,1.0,0.10735,0.980198,1.0,1.0,1.0


In [100]:
split_time = int(len(data) * 0.8)
split_time

69120

In [101]:
x_train = data[:split_time]
x_valid = data[split_time:]

### Windowed Dataset 생성

In [102]:
def windowed_dataset(series, batch_size, n_past=24, n_future=24, shift=1):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(size=(n_past + n_future), shift = shift, drop_remainder = True)
    ds = ds.flat_map(lambda w: w.batch(n_past + n_future))
    ds = ds.shuffle(len(series))
    ds = ds.map(
        lambda w: (w[:n_past], w[n_past:])   # w[:n_past] => x / w[n_past:] => y
    )
    return ds.batch(batch_size).prefetch(1)

In [103]:
# 다음 4개의 옵션은 주어 집니다.
BATCH_SIZE = 32 # 변경 가능하나 더 올리는 것은 비추 (내리는 것은 가능하나 시간 오래 걸림)
N_PAST = 24 # 변경 불가.
N_FUTURE = 24 # 변경 불가.
SHIFT = 1 # 변경 불가.

In [104]:
train_set = windowed_dataset(series=x_train, 
                             batch_size=BATCH_SIZE,
                             n_past=N_PAST, 
                             n_future=N_FUTURE,
                             shift=SHIFT)

valid_set = windowed_dataset(series=x_valid, 
                             batch_size=BATCH_SIZE,
                             n_past=N_PAST, 
                             n_future=N_FUTURE,
                             shift=SHIFT)

In [105]:
train_set

<PrefetchDataset element_spec=(TensorSpec(shape=(None, None, 7), dtype=tf.float64, name=None), TensorSpec(shape=(None, None, 7), dtype=tf.float64, name=None))>

In [106]:
model = Sequential([
    Conv1D(filters=32,
           kernel_size=3,
           padding='causal',
           activation='relu',
           input_shape=[N_PAST, 7]),
    Bidirectional(LSTM(32, return_sequences=True)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(N_FEATURES)
])

In [107]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_5 (Conv1D)           (None, 24, 32)            704       
                                                                 
 bidirectional_4 (Bidirectio  (None, 24, 64)           16640     
 nal)                                                            
                                                                 
 dense_15 (Dense)            (None, 24, 32)            2080      
                                                                 
 dense_16 (Dense)            (None, 24, 16)            528       
                                                                 
 dense_17 (Dense)            (None, 24, 7)             119       
                                                                 
Total params: 20,071
Trainable params: 20,071
Non-trainable params: 0
__________________________________________________

In [108]:
checkpoint_path = 'my_checkpoint.ckpt'
checkpoint = ModelCheckpoint(filepath=checkpoint_path,
                             save_weights_only=True,
                             save_best_only=True,
                             monitor='val_loss',
                             verbose=1
                             )

In [109]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005)
model.compile(loss='mae',
              optimizer=optimizer,
              metrics=['mae']
              )


In [110]:
model.fit(train_set,
          validation_data=(valid_set),
          epochs=20,
          callbacks=[checkpoint])

Epoch 1/20
   2154/Unknown - 32s 10ms/step - loss: 0.0750 - mae: 0.0750
Epoch 1: val_loss improved from inf to 0.05563, saving model to my_checkpoint.ckpt
Epoch 2/20
Epoch 2: val_loss improved from 0.05563 to 0.05075, saving model to my_checkpoint.ckpt
Epoch 3/20
Epoch 3: val_loss improved from 0.05075 to 0.04881, saving model to my_checkpoint.ckpt
Epoch 4/20
Epoch 4: val_loss improved from 0.04881 to 0.04783, saving model to my_checkpoint.ckpt
Epoch 5/20
Epoch 5: val_loss improved from 0.04783 to 0.04653, saving model to my_checkpoint.ckpt
Epoch 6/20
Epoch 6: val_loss improved from 0.04653 to 0.04523, saving model to my_checkpoint.ckpt
Epoch 7/20
Epoch 7: val_loss improved from 0.04523 to 0.04427, saving model to my_checkpoint.ckpt
Epoch 8/20
Epoch 8: val_loss improved from 0.04427 to 0.04395, saving model to my_checkpoint.ckpt
Epoch 9/20
Epoch 9: val_loss improved from 0.04395 to 0.04355, saving model to my_checkpoint.ckpt
Epoch 10/20
Epoch 10: val_loss improved from 0.04355 to 0.043

<keras.callbacks.History at 0x7f79ee169340>

In [111]:
model.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f7967b70640>

In [112]:
model.evaluate(valid_set)



[0.041994281113147736, 0.04199425131082535]