In [1]:
# ============================================================================================
# PROBLEM C5
#
# Build and train a neural network to predict time indexed variables of
# the multivariate house hold electric power consumption time series dataset.
# Using a window of past 24 observations of the 7 variables, the model 
# should be trained to predict the next 24 observations of the 7 variables.
# Use MAE as the metrics of your neural network model.
# We provided code for normalizing the data. Please do not change the code.
# Do not use lambda layers in your model.
#
# The dataset used in this problem is downloaded from https://archive.ics.uci.edu/dataset/235/individual+household+electric+power+consumption
#
# Desired MAE < 0.1 on the normalized dataset.
# ============================================================================================

In [1]:
import urllib
import os
import zipfile
import pandas as pd
import tensorflow as tf

2024-02-17 14:53:07.571425: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def download_and_extract_data():
    url = 'https://raw.githubusercontent.com/dicodingacademy/dicoding_dataset/main/household_power.zip'
    urllib.request.urlretrieve(url, 'household_power.zip')
    with zipfile.ZipFile('household_power.zip', 'r') as zip_ref:
        zip_ref.extractall()


# This function normalizes the dataset using min max scaling.
# DO NOT CHANGE THIS CODE
def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data

In [5]:
# download dataset
download_and_extract_data()
# Reads the dataset from the csv.
df = pd.read_csv('household_power_consumption.csv', sep=',',
                 infer_datetime_format=True, index_col='datetime', header=0)

  df = pd.read_csv('household_power_consumption.csv', sep=',',


In [13]:
N_FEATURES = df.shape[1]

In [6]:
#Normalization
data = df.values
split_time = int(len(data) * 0.5)
data = normalize_series(data, data.min(axis=0), data.max(axis=0))

In [7]:
# Splits the data into training and validation sets.
x_train = data[:split_time]
x_valid = data[split_time:]

In [8]:
# DO NOT CHANGE THIS
BATCH_SIZE = 32  
N_PAST = 24 # Number of past time steps based on which future observations should be predicted
N_FUTURE = 24  # Number of future time steps which are to be predicted.
SHIFT = 1  # By how many positions the window slides to create a new window of observations.

In [9]:
def windowed_dataset(series, batch_size=BATCH_SIZE, n_past=24, n_future=24, shift=1):
    window_size=n_past+n_future
    
    # Turn into tensor dataset
    ds = tf.data.Dataset.from_tensor_slices(series)
    
    # window of the data
    ds = ds.window(window_size, shift=shift, drop_remainder=True)
    
    # flatten windows into individual elements
    ds = ds.flat_map(lambda x: x.batch(window_size))
    
    # Split windows into input (past) and label (future) sets
    ds = ds.map(lambda x: (x[:n_past], x[n_past:]))
    
    # batch ds    
    ds = ds.batch(batch_size=batch_size).prefetch(tf.data.AUTOTUNE)
    
    return ds

In [10]:
# Code to create windowed train and validation datasets.
# Complete the code in windowed_dataset.
train_set = windowed_dataset(x_train)
valid_set = windowed_dataset(x_valid)

2024-02-17 14:53:29.751326: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-17 14:53:29.984770: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-17 14:53:29.984825: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-17 14:53:29.987885: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-02-17 14:53:29.987979: I tensorflow/compile

In [11]:
class thecustomcallbacks(tf.keras.callbacks.Callback):
    def __init__(self):
        super(thecustomcallbacks, self).__init__()
    
    def on_epoch_end(self, epoch, logs=None):
        loggedtrain = logs["MAE"]
        if loggedtrain < 0.1:
            self.model.stop_training = True

In [14]:
def get_model():

    model = tf.keras.models.Sequential([
        tf.keras.layers.LSTM(input_shape=(N_PAST,N_FEATURES), units=64, return_sequences=True),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.LSTM(units=128, return_sequences=True),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(N_FEATURES),
    ])
    
    model.compile("adam", loss=tf.keras.losses.MeanSquaredError(),metrics="MAE")
    
    return model

In [15]:
model=get_model()
model.fit(train_set, validation_data=valid_set, callbacks=thecustomcallbacks(), epochs=100)

Epoch 1/100


2024-02-17 14:54:15.902600: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2024-02-17 14:54:16.042955: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2024-02-17 14:54:16.086060: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f5f7c0366c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-02-17 14:54:16.086114: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
2024-02-17 14:54:16.142167: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-02-17 14:54:16.465608: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-02-17 14:

   1349/Unknown - 30s 18ms/step - loss: 0.0239 - MAE: 0.0863

2024-02-17 14:54:42.133295: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 11443001950356000277
2024-02-17 14:54:42.133372: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 1636632731279744914




2024-02-17 14:54:55.688261: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 11443001950356000277


<keras.src.callbacks.History at 0x7f6050982c70>

# Answer!

In [17]:
# ============================================================================================
# PROBLEM C5
#
# Build and train a neural network to predict time indexed variables of
# the multivariate house hold electric power consumption time series dataset.
# Using a window of past 24 observations of the 7 variables, the model 
# should be trained to predict the next 24 observations of the 7 variables.
# Use MAE as the metrics of your neural network model.
# We provided code for normalizing the data. Please do not change the code.
# Do not use lambda layers in your model.
#
# The dataset used in this problem is downloaded from https://archive.ics.uci.edu/dataset/235/individual+household+electric+power+consumption
#
# Desired MAE < 0.1 on the normalized dataset.
# ============================================================================================

import urllib
import os
import zipfile
import pandas as pd
import tensorflow as tf

# This function downloads and extracts the dataset to the directory that contains this file.
# DO NOT CHANGE THIS CODE
# (unless you need to change the URL)
def download_and_extract_data():
    url = 'https://raw.githubusercontent.com/dicodingacademy/dicoding_dataset/main/household_power.zip'
    urllib.request.urlretrieve(url, 'household_power.zip')
    with zipfile.ZipFile('household_power.zip', 'r') as zip_ref:
        zip_ref.extractall()


# This function normalizes the dataset using min max scaling.
# DO NOT CHANGE THIS CODE
def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data

# COMPLETE THE CODE IN THE FOLLOWING FUNCTION.
def windowed_dataset(series, batch_size=BATCH_SIZE, n_past=24, n_future=24, shift=1):
    window_size=n_past+n_future
    
    # Turn into tensor dataset
    ds = tf.data.Dataset.from_tensor_slices(series)
    
    # window of the data
    ds = ds.window(window_size, shift=shift, drop_remainder=True)
    
    # flatten windows into individual elements
    ds = ds.flat_map(lambda x: x.batch(window_size))
    
    # Split windows into input (past) and label (future) sets
    ds = ds.map(lambda x: (x[:n_past], x[n_past:]))
    
    # batch ds    
    ds = ds.batch(batch_size=batch_size).prefetch(tf.data.AUTOTUNE)
    
    return ds

# COMPLETE THE CODE IN THE FOLLOWING FUNCTION.
def solution_C5():
    # Downloads and extracts the dataset to the directory that contains this file.
    download_and_extract_data()
    # Reads the dataset from the csv.
    df = pd.read_csv('household_power_consumption.csv', sep=',',
                     infer_datetime_format=True, index_col='datetime', header=0)

    # Number of features in the dataset. We use all features as predictors to
    # predict all features at future time steps.
    N_FEATURES = df.shape[1]

    # Normalizes the data
    # DO NOT CHANGE THIS
    data = df.values
    split_time = int(len(data) * 0.5)
    data = normalize_series(data, data.min(axis=0), data.max(axis=0))

    # Splits the data into training and validation sets.
    x_train = data[:split_time]
    x_valid = data[split_time:]

    # DO NOT CHANGE THIS
    BATCH_SIZE = 32  
    N_PAST = 24 # Number of past time steps based on which future observations should be predicted
    N_FUTURE = 24  # Number of future time steps which are to be predicted.
    SHIFT = 1  # By how many positions the window slides to create a new window of observations.

    # Code to create windowed train and validation datasets.
    # Complete the code in windowed_dataset.
    train_set = windowed_dataset(x_train)
    valid_set = windowed_dataset(x_valid)

    class thecustomcallbacks(tf.keras.callbacks.Callback):
        def __init__(self):
            super(thecustomcallbacks, self).__init__()
        
        def on_epoch_end(self, epoch, logs=None):
            loggedtrain = logs["MAE"]
            if loggedtrain < 0.1:
                self.model.stop_training = True

    def get_model():

        model = tf.keras.models.Sequential([
            tf.keras.layers.LSTM(input_shape=(N_PAST,N_FEATURES), units=64, return_sequences=True),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.LSTM(units=128, return_sequences=True),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Dense(N_FEATURES),
        ])
        
        model.compile("adam", loss=tf.keras.losses.MeanSquaredError(),metrics="MAE")
        
        return model

    model=get_model()
    model.fit(train_set, validation_data=valid_set, callbacks=thecustomcallbacks(), epochs=100)

    return model

# The code below is to save your model as a .h5 file.
# It will be saved automatically in your Submission folder.
if __name__ == '__main__':
    # DO NOT CHANGE THIS CODE
    model = solution_C5()
    model.save("Model/model_C5.h5")

  df = pd.read_csv('household_power_consumption.csv', sep=',',


Epoch 1/100
   1349/Unknown - 26s 17ms/step - loss: 0.0239 - MAE: 0.0860

2024-02-17 14:58:01.628080: I tensorflow/core/framework/local_rendezvous.cc:409] Local rendezvous send item cancelled. Key hash: 2938959074362778788
2024-02-17 14:58:01.628150: I tensorflow/core/framework/local_rendezvous.cc:409] Local rendezvous send item cancelled. Key hash: 11127073154597915652
2024-02-17 14:58:01.628181: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 11443001950356000277
2024-02-17 14:58:01.628220: I tensorflow/core/framework/local_rendezvous.cc:409] Local rendezvous send item cancelled. Key hash: 4625381010506968607
2024-02-17 14:58:01.628251: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 1636632731279744914




  saving_api.save_model(
