Written by Matthieu Sarkis, https://github.com/MatthieuSarkis

This code is licensed under the Apache License, Version 2.0. You may
obtain a copy of this license in the LICENSE.txt file in the root directory
of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.

Any modifications or derivative works of this code must retain this
copyright notice, and modified files need to carry a notice indicating
that they have been altered from the originals

In [54]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
%matplotlib inline
sns.set_style()
import get_data

In [55]:
DATA_PATH = '~/data_cfm_auction'
X_train_path = os.path.join(DATA_PATH, 'input_training.csv')
y_train_path = os.path.join(DATA_PATH, 'output_training_IxKGwDV.csv')
X_test_path = os.path.join(DATA_PATH, 'input_test.csv')
submission_example_path = os.path.join(DATA_PATH, 'submission_csv_file_random_example.csv')

In [56]:
X_train = pd.read_csv(X_train_path)
y_train = pd.read_csv(y_train_path)
X_test = pd.read_csv(X_test_path)

In [57]:
temp = pd.concat([X_train.fillna(method='ffill', axis=1).fillna(method='bfill', axis=1), y_train['target']], axis=1).dropna()
y_train = temp['target']
X_train = temp.drop(['target'], axis = 1)

In [58]:
(X_train.shape, y_train.shape)

((684482, 127), (684482,))

In [69]:
X_train_2d_timeSequence = X_train[['abs_ret{}'.format(i) for i in range(0, 61)] + ['rel_vol{}'.format(i) for i in range(0, 61)]].copy().values.reshape(-1, 61, 2)

In [70]:
X_train_2d_timeSequence

array([[[0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00],
        [2.93169159e-02, 0.00000000e+00],
        ...,
        [9.46154175e-03, 1.26973539e-02],
        [1.95071487e-02, 5.10070533e-03],
        [1.06539458e-02, 6.97085563e-03]],

       [[0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 2.20555801e-02],
        [0.00000000e+00, 3.67107195e-02],
        ...,
        [5.27637403e-03, 1.16484133e-02],
        [1.18287058e-02, 1.55580697e-02],
        [1.78764606e-03, 5.25071213e-03]],

       [[0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00],
        [2.22667557e-02, 0.00000000e+00],
        ...,
        [1.27279014e-02, 7.92064677e-03],
        [6.43157335e-04, 2.96920398e-02],
        [1.64399882e-02, 1.19702532e-02]],

       ...,

       [[1.08626198e+00, 0.00000000e+00],
        [5.68900126e-01, 4.71846493e-01],
        [1.89753321e-01, 4.75285171e-01],
        ...,
        [1.45520612e-02, 1.61738930e-02],
     