<a href="https://colab.research.google.com/github/HARASEON/Deep_Learning_tensorflow/blob/main/Day5_Power.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Power
* Data특징:
  * 컬럼: 7개,
  * total: 86400 ea
  * 60일 데이터
* 학습방식:
  * 24개 데이터를 읽고 다음 24개 데이터를 예측한다. (window = 24, horizon =24)
  * 바꿔보자
* val_loss < 0.05


In [None]:
import tensorflow as tf
tf.__version__

'2.12.0'

In [None]:
!pip uninstall tensorflow
!pip install tensorflow==2.10.0

[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow==2.10.0
  Downloading tensorflow-2.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (578.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m578.1/578.1 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-estimator<2.11,>=2.10.0
  Downloading tensorflow_estimator-2.10.0-py2.py3-none-any.whl (438 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m438.7/438.7 kB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard<2.11,>=2.10
  Downloading tensorboard-2.10.1-py3-none-any.whl (5.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.9/5.9 MB[0m [31m91.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras<2.11,>=2.10.0
  Downloading keras-2.10.0-py2.py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m74.8 MB/s

In [None]:
import urllib
import os
import zipfile
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv1D, LSTM, Bidirectional, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping


def download_and_extract_data():
    url = 'https://storage.googleapis.com/download.tensorflow.org/data/certificate/household_power.zip'
    urllib.request.urlretrieve(url, 'household_power.zip')
    with zipfile.ZipFile('household_power.zip', 'r') as zip_ref:
        zip_ref.extractall()
        
def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data

def windowed_dataset(series, batch_size, n_past=24, n_future=24, shift=1):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(size=(n_past + n_future), shift = shift, drop_remainder = True)
    ds = ds.flat_map(lambda w: w.batch(n_past + n_future))
    ds = ds.shuffle(len(series))
    ds = ds.map(
        lambda w: (w[:n_past], w[n_past:])
    )
    return ds.batch(batch_size).prefetch(1)


def solution_model():
    download_and_extract_data()
    
    df = pd.read_csv('household_power_consumption.csv', sep=',', infer_datetime_format=True, index_col='datetime', header=0)
    
    N_FEATURES = len(df.columns)
    
    data = df.values
    split_time = int(len(data) * 0.5)
    data = normalize_series(data, data.min(axis=0), data.max(axis=0))
    
    x_train = data[:split_time]
    x_valid = data[split_time:]
    
    # DO NOT CHANGE THIS CODE
    tf.keras.backend.clear_session()
    tf.random.set_seed(42)

    BATCH_SIZE = 32
    N_PAST = 24
    N_FUTURE = 24
    SHIFT = 1

    train_set = windowed_dataset(series=x_train, 
                                 batch_size=BATCH_SIZE,
                                 n_past=N_PAST, 
                                 n_future=N_FUTURE,
                                 shift=SHIFT)
    
    valid_set = windowed_dataset(series=x_valid, 
                                 batch_size=BATCH_SIZE,
                                 n_past=N_PAST, 
                                 n_future=N_FUTURE,
                                 shift=SHIFT)
    
    
    model = tf.keras.models.Sequential([
        Conv1D(filters=64, 
               kernel_size=5,
               padding="causal",
               activation="relu",
               input_shape=[N_PAST, 7],
               ),
        Bidirectional(LSTM(32, return_sequences=True)),
        Bidirectional(LSTM(32, return_sequences=True)), 
        Dense(64, activation="relu"),
        Dropout(0.5),     
        Dense(32, activation="relu"),   
        Dense(N_FEATURES)
    ])
    
    checkpoint_path='model/my_checkpoint.ckpt'
    checkpoint = ModelCheckpoint(checkpoint_path,
                             save_weights_only=True,
                             save_best_only=True,
                             monitor='val_mae',
                             verbose=1,
                             )
    early_stopping = EarlyStopping(monitor='val_mae', patience=10)

    model.compile(loss='huber',
              optimizer='adam', 
              metrics=["mae"]
              )

    model.fit(train_set, 
          validation_data=(valid_set), 
          epochs=100, 
          callbacks=[checkpoint, early_stopping], 
          )
    model.load_weights(checkpoint_path)
    return model

if __name__ == '__main__':
    model = solution_model()
    model.save("power-1.h5")
## don't copy following codes in real test
    from google.colab import files
    files.download('power-1.h5')

Epoch 1/100
   1349/Unknown - 97s 60ms/step - loss: 0.0079 - mae: 0.0661
Epoch 1: val_mae improved from inf to 0.05851, saving model to model/my_checkpoint.ckpt
Epoch 2/100
Epoch 2: val_mae improved from 0.05851 to 0.05296, saving model to model/my_checkpoint.ckpt
Epoch 3/100
Epoch 3: val_mae did not improve from 0.05296
Epoch 4/100
Epoch 4: val_mae improved from 0.05296 to 0.04887, saving model to model/my_checkpoint.ckpt
Epoch 5/100
Epoch 5: val_mae did not improve from 0.04887
Epoch 6/100
Epoch 6: val_mae did not improve from 0.04887
Epoch 7/100
Epoch 7: val_mae improved from 0.04887 to 0.04872, saving model to model/my_checkpoint.ckpt
Epoch 8/100
Epoch 8: val_mae did not improve from 0.04872
Epoch 9/100
Epoch 9: val_mae did not improve from 0.04872
Epoch 10/100
Epoch 10: val_mae did not improve from 0.04872
Epoch 11/100
Epoch 11: val_mae did not improve from 0.04872
Epoch 12/100
Epoch 12: val_mae did not improve from 0.04872
Epoch 13/100
Epoch 13: val_mae did not improve from 0.048

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./tboard

# ChatGPT

* Prompt: code RNN  and transfer using tensorflow  learning with  url = 'https://storage.googleapis.com/download.tensorflow.org/data/certificate/household_power.zip'  use modelcheckpoint and  early stopping and save the best model  as 'power.h5'. Modify the code to make its val_loss under 0.05.
  
* Respond: To improve the model's performance, here are some modifications that can be made:

1. Increase the number of filters in the Conv1D layer to capture more complex features.
2. Reduce the number of LSTM units in each layer to prevent overfitting.
3. Add BatchNormalization after each layer to stabilize the learning process.
4. Use the Adam optimizer with a learning rate scheduler to improve convergence speed.
5. Increase the number of epochs to allow the model to learn more from the data.
Here's the modified code below.

* 코드 설명:
1. Data downloading and preprocessing: The code downloads a zip file containing a CSV file with household power consumption data, which is then read and preprocessed into a pandas dataframe. The data is split into training and validation sets and normalized to have values between 0 and 1.

2. Windowed dataset creation: The code defines a function to create windowed datasets from time-series data. The function takes in a series of data, the batch size, the number of past and future time steps to include in each window, and the shift between consecutive windows. The function returns a TensorFlow dataset with the appropriate windowed data format for use in training a machine learning model.

3. Machine learning model: The code defines a machine learning model using the TensorFlow Keras API. The model includes several layers of convolutional and recurrent neural network layers, as well as batch normalization and dropout layers. The model is compiled with the mean absolute error loss function and the Adam optimizer.

4. Model training: The code trains the machine learning model on the training dataset, with early stopping and model checkpointing to monitor progress and save the best weights. The model is also trained with a learning rate scheduler that decreases the learning rate after a certain number of epochs.

5. Model saving: The code saves the trained model as a h5 file and downloads it to the local machine. This step is not necessary for running the code but is included for convenience in a Colab environment.

In [None]:
import urllib
import os
import zipfile
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv1D, LSTM, Bidirectional, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping


def download_and_extract_data():
    url = 'https://storage.googleapis.com/download.tensorflow.org/data/certificate/household_power.zip'
    urllib.request.urlretrieve(url, 'household_power.zip')
    with zipfile.ZipFile('household_power.zip', 'r') as zip_ref:
        zip_ref.extractall()
        
def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data

def windowed_dataset(series, batch_size, n_past=24, n_future=24, shift=1):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(size=(n_past + n_future), shift = shift, drop_remainder = True)
    ds = ds.flat_map(lambda w: w.batch(n_past + n_future))
    ds = ds.shuffle(len(series))
    ds = ds.map(
        lambda w: (w[:n_past], w[n_past:])
    )
    return ds.batch(batch_size).prefetch(1)


def solution_model():
    download_and_extract_data()
    
    df = pd.read_csv('household_power_consumption.csv', sep=',', infer_datetime_format=True, index_col='datetime', header=0)
    
    N_FEATURES = len(df.columns)
    
    data = df.values
    split_time = int(len(data) * 0.5)
    data = normalize_series(data, data.min(axis=0), data.max(axis=0))
    
    x_train = data[:split_time]
    x_valid = data[split_time:]
    
    # DO NOT CHANGE THIS CODE
    tf.keras.backend.clear_session()
    tf.random.set_seed(42)

    BATCH_SIZE = 32
    N_PAST = 24
    N_FUTURE = 24
    SHIFT = 1

    train_set = windowed_dataset(series=x_train, 
                                 batch_size=BATCH_SIZE,
                                 n_past=N_PAST, 
                                 n_future=N_FUTURE,
                                 shift=SHIFT)
    
    valid_set = windowed_dataset(series=x_valid, 
                                 batch_size=BATCH_SIZE,
                                 n_past=N_PAST, 
                                 n_future=N_FUTURE,
                                 shift=SHIFT)
    
    model = tf.keras.models.Sequential([
        Conv1D(filters=128, 
              kernel_size=5,
              padding="causal",
              activation="relu",
              input_shape=[N_PAST, N_FEATURES],
              ),
        BatchNormalization(),
        Bidirectional(LSTM(16, return_sequences=True)),
        BatchNormalization(),
        Bidirectional(LSTM(16, return_sequences=True)), 
        BatchNormalization(),
        Dense(32, activation="relu"),
        Dropout(0.5),     
        Dense(N_FEATURES)
    ])

    # Learning rate scheduler
    def scheduler(epoch, lr):
        if epoch < 10:
            return lr
        else:
            return lr * tf.math.exp(-0.1)

    checkpoint_path = 'model/my_checkpoint.ckpt'
    checkpoint = ModelCheckpoint(checkpoint_path,
                                 save_weights_only=True,
                                 save_best_only=True,
                                 monitor='val_mae',
                                 verbose=1,
)
    early_stopping = EarlyStopping(monitor='val_mae', patience=10)

    model.compile(loss='huber',
              optimizer='adam', 
              metrics=["mae"]
              )

    model.fit(train_set, 
          validation_data=(valid_set), 
          epochs=100, 
          callbacks=[checkpoint, early_stopping], 
          )
    model.load_weights(checkpoint_path)
    return model

if __name__ == '__main__':
    model = solution_model()
    model.save("power-1.h5")
## don't copy following codes in real test
    from google.colab import files
    files.download('power-1.h5')                                


Epoch 1/100
   1348/Unknown - 83s 47ms/step - loss: 0.0286 - mae: 0.1551
Epoch 1: val_mae improved from inf to 0.13409, saving model to model/my_checkpoint.ckpt
Epoch 2/100
Epoch 2: val_mae did not improve from 0.13409
Epoch 3/100
Epoch 3: val_mae improved from 0.13409 to 0.12875, saving model to model/my_checkpoint.ckpt
Epoch 4/100
Epoch 4: val_mae did not improve from 0.12875
Epoch 5/100
Epoch 5: val_mae did not improve from 0.12875
Epoch 6/100
Epoch 6: val_mae did not improve from 0.12875
Epoch 7/100
Epoch 7: val_mae did not improve from 0.12875
Epoch 8/100
Epoch 8: val_mae did not improve from 0.12875
Epoch 9/100
Epoch 9: val_mae did not improve from 0.12875
Epoch 10/100
Epoch 10: val_mae did not improve from 0.12875
Epoch 11/100
Epoch 11: val_mae did not improve from 0.12875
Epoch 12/100
Epoch 12: val_mae did not improve from 0.12875
Epoch 13/100
Epoch 13: val_mae did not improve from 0.12875


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>