In [None]:
# ==============================================================================
#
# TIME SERIES QUESTION
#
# Build and train a neural network to predict the time indexed variable of
# the univariate US diesel prices (On - Highway) All types for the period of
# 1994 - 2021.
# Using a window of past 10 observations of 1 feature , train the model
# to predict the next 10 observations of that feature.
#
# ==============================================================================
#
# ABOUT THE DATASET
#
# Original Source:
# https://www.eia.gov/dnav/pet/pet_pri_gnd_dcus_nus_w.htm#
#
# For the purpose of the examination we have used the Diesel (On - Highway) -
# All Types time series data for the period of 1994 - 2021 from the
# aforementioned link. The dataset has 1 time indexed feature.
# We have provided a cleaned version of the data.
#
# ==============================================================================


import pandas as pd
import tensorflow as tf
import urllib

from tensorflow.keras.layers import Dense, Conv1D, LSTM, Bidirectional
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint

url = 'https://www.dropbox.com/s/eduk281didil1km/Weekly_U.S.Diesel_Retail_Prices.csv?dl=1'
urllib.request.urlretrieve(url, 'Weekly_U.S.Diesel_Retail_Prices.csv')

# This function normalizes the dataset using min max scaling.
# DO NOT CHANGE THIS CODE
def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data

def windowed_dataset(series, batch_size, n_past=10, n_future=10, shift=1):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(size=n_past + n_future, shift=shift, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(n_past + n_future))
    ds = ds.map(lambda w: (w[:n_past], w[n_past:]))
    return ds.batch(batch_size).prefetch(1)

def solution_model():
    # DO NOT CHANGE THIS CODE
    # Reads the dataset.
    df = pd.read_csv('Weekly_U.S.Diesel_Retail_Prices.csv',
                     infer_datetime_format=True, index_col='Week of', header=0)

    df.head(20)

    # Number of features in the dataset. We use all features as predictors to
    # predict all features of future time steps.
    N_FEATURES = len(df.columns)

    # Normalizes the data
    data = df.values
    data = normalize_series(data, data.min(axis=0), data.max(axis=0))

    # Splits the data into training and validation sets.
    SPLIT_TIME = int(len(data) * 0.8)
    x_train = data[:SPLIT_TIME]
    x_valid = data[SPLIT_TIME:]

    tf.keras.backend.clear_session()
    tf.random.set_seed(42)

    BATCH_SIZE = 32  # ADVISED NOT TO CHANGE THIS

    N_PAST = 10  # DO NOT CHANGE THIS

    N_FUTURE = 10  # DO NOT CHANGE THIS

    SHIFT = 1  # DO NOT CHANGE THIS

    train_set = windowed_dataset(series=x_train, batch_size=BATCH_SIZE,
                                 n_past=N_PAST, n_future=N_FUTURE,
                                 shift=SHIFT)
    valid_set = windowed_dataset(series=x_valid, batch_size=BATCH_SIZE,
                                 n_past=N_PAST, n_future=N_FUTURE,
                                 shift=SHIFT)

    # Code to define your model.
    model = tf.keras.models.Sequential([

        Conv1D(filters=32, kernel_size=5, padding='causal', activation='relu', input_shape=[N_PAST, 1]),
        Bidirectional(LSTM(32, return_sequences=True)),
        Bidirectional(LSTM(32, return_sequences=True)),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),

        tf.keras.layers.Dense(N_FEATURES)
    ])

    # 체크포인트 생성
    checkpoint_path = 'model/my_checkpoint.ckpt'
    checkpoint = ModelCheckpoint(filepath=checkpoint_path,
                                 save_weights_only=True,
                                 save_best_only=True,
                                 monitor='val_mae',
                                 verbose=1)

    # Code to train and compile the model
    optimizer = tf.keras.optimizers.Adam(0.0001)
    model.compile(
        optimizer=optimizer, loss=tf.keras.losses.Huber(), metrics=['mae']
    )
    model.fit(
        train_set,
        validation_data=(valid_set),
        epochs=100,
        callbacks=[checkpoint]
    )

    model.load_weights(checkpoint_path)

    return model

if __name__ == '__main__':
    model = solution_model()
    model.save("model.h5")



Epoch 1/100

Epoch 00001: val_mae improved from inf to 0.32533, saving model to model/my_checkpoint.ckpt
Epoch 2/100

Epoch 00002: val_mae improved from 0.32533 to 0.25625, saving model to model/my_checkpoint.ckpt
Epoch 3/100

Epoch 00003: val_mae improved from 0.25625 to 0.15567, saving model to model/my_checkpoint.ckpt
Epoch 4/100

Epoch 00004: val_mae improved from 0.15567 to 0.05648, saving model to model/my_checkpoint.ckpt
Epoch 5/100

Epoch 00005: val_mae improved from 0.05648 to 0.04121, saving model to model/my_checkpoint.ckpt
Epoch 6/100

Epoch 00006: val_mae improved from 0.04121 to 0.03871, saving model to model/my_checkpoint.ckpt
Epoch 7/100

Epoch 00007: val_mae improved from 0.03871 to 0.03530, saving model to model/my_checkpoint.ckpt
Epoch 8/100

Epoch 00008: val_mae improved from 0.03530 to 0.03278, saving model to model/my_checkpoint.ckpt
Epoch 9/100

Epoch 00009: val_mae improved from 0.03278 to 0.03104, saving model to model/my_checkpoint.ckpt
Epoch 10/100

Epoch 000