In [1]:
import pandas as pd
import numpy as np
import os

from utils import *
import tensorflow as tf
from sklearn.preprocessing import minmax_scale

In [2]:
df = pd.read_csv('./data/train.csv')
df_tst = pd.read_csv('./data/test.csv')
subm = pd.read_csv('./data/sample_submission.csv')

In [3]:
df[Xs] = minmax_scale(df[Xs])

Ys = ['Y{}'.format(str(i).zfill(2)) for i in range(16, 19)]
df['Y'] = df.loc[:, Ys].mean(axis=1)

df_trn = df[df['Y18'].isna()]
df_val = df[df['Y00'].isna()]

In [4]:
def df2seqs(df, time_range=14, vals=200):
    seqs = []
    for i in range(len(df) - time_range+1):
        seqs.append(df.iloc[i:i+time_range])

    data_len = len(seqs)
    trn_X, trn_Y = [], []
    val_X, val_Y = [], []

    for i in range(data_len - vals):
        seq = seqs[i]
        trn_X.append(seq[Xs].values)
        trn_Y.append(seq['Y'].values[-1])

    for i in range(data_len-vals, data_len):
        seq = seqs[i]
        val_X.append(seq[Xs].values)
        val_Y.append(seq['Y'].values[-1])


    trn_X = np.array(trn_X)
    trn_Y = np.array(trn_Y)
    val_X = np.array(val_X)
    val_Y = np.array(val_Y)
    
    return trn_X, trn_Y, val_X, val_Y

In [5]:
def df2seqs_test(df_tst, df_val, time_range=14):
    df = pd.concat([df_val, df_tst]).reset_index()
    seqs = []
    for i in range(len(df) - time_range +1):
        seqs.append(df.iloc[i:i+time_range])

    data_len = len(seqs)
    test_X = []

    for i in range(data_len):
        seq = seqs[i]
        test_X.append(seq[Xs].values)

    return np.array(test_X)

In [6]:
time_range = 14

trn_X, trn_Y, val_X, val_Y = df2seqs(df_trn, time_range)
trn_X2, trn_Y2, val_X2, val_Y2 = df2seqs(df_val, time_range)

test_X = df2seqs_test(df_tst, df[-time_range+1:], time_range)

In [7]:
test_X.shape

(11520, 14, 40)

# Model

In [8]:
tf.keras.backend.clear_session()

inp = tf.keras.layers.Input([time_range, 40])

x = tf.keras.layers.LSTM(120, return_sequences=True)(inp)
x = tf.keras.layers.LSTM(80)(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)

outp = tf.keras.layers.Dense(1, activation='linear')(x)

In [9]:
mdl = tf.keras.models.Model(inputs=inp, outputs=outp)
mdl.compile(optimizer='adam', loss='mse')

In [11]:
mdl.fit(x=trn_X, y=trn_Y, validation_data=(val_X, val_Y), batch_size=256, epochs=40, verbose=2)

Train on 4107 samples, validate on 200 samples
Epoch 1/40
4107/4107 - 0s - loss: 2.0120 - val_loss: 3.6633
Epoch 2/40
4107/4107 - 0s - loss: 1.9127 - val_loss: 5.5786
Epoch 3/40
4107/4107 - 0s - loss: 2.1476 - val_loss: 4.1241
Epoch 4/40
4107/4107 - 0s - loss: 1.9272 - val_loss: 4.5608
Epoch 5/40
4107/4107 - 0s - loss: 1.8828 - val_loss: 5.8930
Epoch 6/40
4107/4107 - 0s - loss: 2.2200 - val_loss: 3.7342
Epoch 7/40
4107/4107 - 0s - loss: 2.3342 - val_loss: 6.9067
Epoch 8/40
4107/4107 - 0s - loss: 2.7866 - val_loss: 3.6823
Epoch 9/40
4107/4107 - 0s - loss: 1.9123 - val_loss: 4.7364
Epoch 10/40
4107/4107 - 0s - loss: 1.7812 - val_loss: 3.7088
Epoch 11/40
4107/4107 - 0s - loss: 1.6623 - val_loss: 4.4460
Epoch 12/40
4107/4107 - 0s - loss: 1.6727 - val_loss: 4.9219
Epoch 13/40
4107/4107 - 0s - loss: 1.8325 - val_loss: 4.5071
Epoch 14/40
4107/4107 - 0s - loss: 1.5520 - val_loss: 3.8104
Epoch 15/40
4107/4107 - 0s - loss: 1.5768 - val_loss: 5.8429
Epoch 16/40
4107/4107 - 0s - loss: 1.7163 - val

<tensorflow.python.keras.callbacks.History at 0x28655a69288>

In [12]:
for i in range(1, 3):
    mdl.layers[i].trainable = False

In [13]:
mdl.fit(x=trn_X2, y=trn_Y2, validation_data=(val_X2, val_Y2), batch_size=256, epochs=40, verbose=2)

Train on 219 samples, validate on 200 samples
Epoch 1/40
219/219 - 0s - loss: 8.6974 - val_loss: 7.4568
Epoch 2/40
219/219 - 0s - loss: 7.3086 - val_loss: 6.1254
Epoch 3/40
219/219 - 0s - loss: 5.7711 - val_loss: 5.4187
Epoch 4/40
219/219 - 0s - loss: 5.1058 - val_loss: 4.3536
Epoch 5/40
219/219 - 0s - loss: 4.3148 - val_loss: 3.6687
Epoch 6/40
219/219 - 0s - loss: 3.8518 - val_loss: 3.9144
Epoch 7/40
219/219 - 0s - loss: 3.7970 - val_loss: 4.3517
Epoch 8/40
219/219 - 0s - loss: 3.7185 - val_loss: 3.9862
Epoch 9/40
219/219 - 0s - loss: 3.2652 - val_loss: 3.4846
Epoch 10/40
219/219 - 0s - loss: 3.0891 - val_loss: 3.5750
Epoch 11/40
219/219 - 0s - loss: 3.0672 - val_loss: 4.1452
Epoch 12/40
219/219 - 0s - loss: 2.7922 - val_loss: 4.8632
Epoch 13/40
219/219 - 0s - loss: 2.6445 - val_loss: 4.7305
Epoch 14/40
219/219 - 0s - loss: 2.5973 - val_loss: 4.3898
Epoch 15/40
219/219 - 0s - loss: 2.3952 - val_loss: 4.6099
Epoch 16/40
219/219 - 0s - loss: 2.1060 - val_loss: 4.9804
Epoch 17/40
219/219

<tensorflow.python.keras.callbacks.History at 0x288c919f1c8>

# Test

In [14]:
pred_test = mdl.predict(test_X)
pred_test_adj = pred_test.reshape([-1])

In [15]:
writeSubm(pred_test_adj)

Submission file is written on ./subm/subm_0310011814.csv


# End