In [None]:
import os
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import seaborn as sns
from tensorflow.keras.layers import LSTM, Flatten, TimeDistributed, Conv1D, MaxPooling1D
from tensorflow.keras import Model, Sequential
import json

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

In [None]:
!pip install -q tensorflow-gpu==2.0.0-rc1

[K     |████████████████████████████████| 380.5MB 74kB/s 
[K     |████████████████████████████████| 501kB 41.3MB/s 
[K     |████████████████████████████████| 4.3MB 29.7MB/s 
[?25h

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
tf.__version__

'2.0.0-rc1'

In [None]:
main_path = 'drive/My Drive/Colab Notebooks/competition'
train_df = pd.read_csv(os.path.join(main_path, 'processed_test_data.csv'))
train_df = train_df.fillna(method='ffill')

In [None]:
cols = train_df.columns.to_list()
cols = cols[1:]

In [None]:
tf.random.set_seed(13)

In [None]:
def multivariate_data(dataset, target, start_index, end_index, history_size,
                      target_size, step, single_step=False):
  data = []
  labels = []

  start_index = start_index + history_size
  if end_index is None:
    end_index = len(dataset) - target_size

  for i in range(start_index, end_index):
    indices = range(i-history_size, i, step)
    data.append(dataset[indices])

    if single_step:
      labels.append(target[i+target_size])
    else:
      labels.append(target[i:i+target_size])

  return np.array(data), np.array(labels)

In [None]:
def datasets(data):
    dataset = train_df[[data]]
    dataset = dataset.dropna()
    dataset = dataset.values
    data_max = dataset.max(axis=0)
    data_min = dataset.min(axis=0)
    dataset = (dataset-data_min) / (data_max-data_min)
    return dataset

In [None]:
for i in range(140, 150):
    dataset = datasets(cols[i])
    TRAIN_SPLIT = len(dataset) - int(0.2 * len(dataset))
    BATCH_SIZE = 256
    BUFFER_SIZE = 10000
    past_history = 168
    future_target = 24
    STEP = 1

    n_features = 1
    n_seq = 2
    n_steps = 84

    x_train_multi, y_train_multi = multivariate_data(dataset, dataset, 0,
                                                       TRAIN_SPLIT, past_history,
                                                       future_target, STEP,
                                                       single_step=False)
    x_val_multi, y_val_multi = multivariate_data(dataset, dataset,
                                                   TRAIN_SPLIT, None, past_history,
                                                   future_target, STEP,
                                                   single_step=False)
    
    x_train_multi = x_train_multi.reshape((x_train_multi.shape[0], n_seq, n_steps, n_features))
    x_val_multi = x_val_multi.reshape((x_val_multi.shape[0], n_seq, n_steps, n_features))

    train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))
    train_data_multi = train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

    val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
    val_data_multi = val_data_multi.batch(BATCH_SIZE).repeat()
    checkpoint_path = "training_2_"+cols[i]
    end_point = "cp-{epoch:04d}.ckpt"
    checkpoint_dir = os.path.join(main_path, checkpoint_path, end_point)
    # 파일 이름에 에포크 번호를 포함시킵니다(`str.format` 포맷)
    
    
    # 체크포인트 콜백 만들기
    cp_callback = [tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=7),
                   
                   tf.keras.callbacks.ModelCheckpoint(checkpoint_dir,
                                                     save_weights_only=True,
                                                     verbose=1, 
                                                     monitor='val_loss',
                                                     save_best_only=True)]


    multi_step_model = tf.keras.models.Sequential()
    multi_step_model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1, activation='relu'), input_shape=(None, n_steps, n_features)))
    multi_step_model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
    multi_step_model.add(TimeDistributed(Flatten()))
    
    multi_step_model.add(tf.keras.layers.LSTM(16,
                                              return_sequences=True,
                                              input_shape=x_train_multi.shape[-2:], activation='softsign'))
    multi_step_model.add(tf.keras.layers.LSTM(8, activation='relu'))
    multi_step_model.add(tf.keras.layers.Dense(24))

    multi_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0), loss='mae')
    EVALUATION_INTERVAL = 200
    EPOCHS = 40

    multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
                                              steps_per_epoch=EVALUATION_INTERVAL,
                                              validation_data=val_data_multi,
                                              validation_steps=50,
                                              callbacks = cp_callback)


    
    with open(os.path.join(main_path, 'json', cols[i]+'.json'), 'w') as file:

        json.dump(multi_step_history.history, file)

Train for 200 steps, validate for 50 steps
Epoch 1/40
Epoch 00001: val_loss improved from inf to 0.11683, saving model to drive/My Drive/Colab Notebooks/competition/training_2_X333/cp-0001.ckpt
Epoch 2/40
Epoch 00002: val_loss improved from 0.11683 to 0.11346, saving model to drive/My Drive/Colab Notebooks/competition/training_2_X333/cp-0002.ckpt
Epoch 3/40
Epoch 00003: val_loss improved from 0.11346 to 0.11207, saving model to drive/My Drive/Colab Notebooks/competition/training_2_X333/cp-0003.ckpt
Epoch 4/40
Epoch 00004: val_loss improved from 0.11207 to 0.11065, saving model to drive/My Drive/Colab Notebooks/competition/training_2_X333/cp-0004.ckpt
Epoch 5/40
Epoch 00005: val_loss improved from 0.11065 to 0.11011, saving model to drive/My Drive/Colab Notebooks/competition/training_2_X333/cp-0005.ckpt
Epoch 6/40
Epoch 00006: val_loss did not improve from 0.11011
Epoch 7/40
Epoch 00007: val_loss improved from 0.11011 to 0.10957, saving model to drive/My Drive/Colab Notebooks/competitio