In [1]:
%reload_ext autoreload
%autoreload 2

import glob
import os, gc
import numpy as numpy
import pandas as pd
import scipy as sp
import datatable as dt
from collections import defaultdict
from tqdm.notebook import tqdm
from sklearn.utils import shuffle
from sklearn.metrics import r2_score
from numba import njit
from utils import *

from IPython.display import clear_output

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
# TF
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

In [2]:
N_FOLD = 5
N_MINS = 5
MIN_SIZE = 600 // N_MINS

SOL_NAME = '501-WaveNet'
DATA_NAME = '501'
mkdir(f'./models/{SOL_NAME}/')

In [3]:
# CONSTANT
MEAN = -5.762330803300896
STD = 0.6339307835941186
EPS = 1e-9

In [4]:
# get ids
list_stock_id = get_stock_id()
list_time_id = get_time_id()

# Functions

In [5]:
def transform_target(target):
    return (np.log(target + EPS) - MEAN) / STD

def inverse_target(target):
    return np.exp(MEAN + STD * target) - EPS

def np_rmspe(y_true, y_pred):
    y_true = inverse_target(y_true)
    y_pred = inverse_target(y_pred)
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

def mspe_loss(y_true, y_pred):
    y_true = K.exp(MEAN + STD * y_true) - EPS
    y_pred = K.exp(MEAN + STD * y_pred) - EPS
    return K.sqrt(K.mean(K.square((y_true - y_pred) / y_true)))

def rmspe_keras(y_true, y_pred):
    return K.sqrt(K.mean(K.square((y_true - y_pred) / y_true)))

In [6]:
def cbr(x, out_layer, kernel, stride, dilation):
        x = tf.keras.layers.Conv1D(out_layer, kernel_size=kernel, dilation_rate=dilation, strides=stride, padding="same")(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation("relu")(x)
        return x
    
def WaveNetResidualConv1D(num_filters, kernel_size, stacked_layer):

    def build_residual_block(l_input):
        resid_input = l_input
        skip_connections = []
        for dilation_rate in [2**i for i in range(stacked_layer)]:
            l_sigmoid_conv1d = tf.keras.layers.Conv1D(
              num_filters, kernel_size, dilation_rate=dilation_rate,
              padding='same', activation='sigmoid')(l_input)
              #padding='same', activation='linear')(l_input)
            l_tanh_conv1d = tf.keras.layers.Conv1D(
             num_filters, kernel_size, dilation_rate=dilation_rate,
             padding='same', activation='tanh')(l_input)
             #padding='same', activation='mish')(l_input)
            l_input = tf.keras.layers.Multiply()([l_sigmoid_conv1d, l_tanh_conv1d])
            l_skip = tf.keras.layers.Conv1D(num_filters, 1, padding='same')(l_input)
            skip_connections.append(l_skip)
            l_input = tf.keras.layers.Conv1D(num_filters, 1, padding='same')(l_input)
            resid_input = tf.keras.layers.Add()([resid_input ,l_input])
        return resid_input ,skip_connections
    return build_residual_block
            
def wave_block(x, filters, kernel_size, n):
        dilation_rates = [2**i for i in range(n)]
        x = tf.keras.layers.Conv1D(filters = filters,
                   kernel_size = 1,
                   padding = 'same')(x)
        res_x = x
        for dilation_rate in dilation_rates:
            tanh_out = tf.keras.layers.Conv1D(filters = filters,
                              kernel_size = kernel_size,
                              padding = 'same', 
                              activation = 'tanh', 
                              dilation_rate = dilation_rate)(x)
            sigm_out = tf.keras.layers.Conv1D(filters = filters,
                              kernel_size = kernel_size,
                              padding = 'same',
                              activation = 'sigmoid', 
                              dilation_rate = dilation_rate)(x)
            x = tf.keras.layers.Multiply()([tanh_out, sigm_out])
            x = tf.keras.layers.Conv1D(filters = filters,
                       kernel_size = 1,
                       padding = 'same')(x)
            res_x = tf.keras.layers.Add()([res_x, x])
        return res_x      
    
def create_wavenet(num_columns, num_labels, learning_rate, filters, kernel_size, n):
    num_filters_ = filters[0]
    
    # input
    inp = tf.keras.layers.Input(shape=(num_columns,))
    x = tf.keras.layers.BatchNormalization()(inp)
    
    # 1dcnn
    x = tf.keras.layers.Dense(4096, activation='relu')(x)
    x = tf.keras.layers.Reshape((64, 64))(x)
    
    x1 = tf.keras.layers.Dense(256, activation='linear', name='in_dense')(x)
    x1 = tf.keras.layers.BatchNormalization()(x1)
    x2 = tf.keras.layers.Dense(256, activation='linear', name='in_dense2')(x)
    x2 = tf.keras.layers.BatchNormalization()(x2)
    x4 = tf.keras.layers.Conv1D(32, 3,activation='linear', padding='same')(x)
    x4 = tf.keras.layers.Conv1D(64, 3,activation='linear', padding='same')(x4)
    
    
    x5 = tf.keras.layers.Conv1D(32, 7,activation='linear', padding='same')(x)
    x5 = tf.keras.layers.Conv1D(64, 7,activation='linear', padding='same')(x5)
    
    x6 = tf.keras.layers.Conv1D(16, 3,activation='tanh', padding='same')(x)
    x6 = tf.keras.layers.Conv1D(32, 3,activation='tanh', padding='same')(x6)
    x6 = tf.keras.layers.Conv1D(64, 3,activation='tanh', padding='same')(x6)       
    
    x7 = tf.keras.layers.Conv1D(16, 7,activation='tanh', padding='same')(x)
    x7 = tf.keras.layers.Conv1D(32, 7,activation='tanh', padding='same')(x7)
    x7 = tf.keras.layers.Conv1D(64, 7,activation='tanh', padding='same')(x7)
    
    x_merge = tf.keras.layers.Concatenate()([x4, x5,x6,x7]) 
    x = tf.keras.layers.Add()([x1,x2,x_merge]) 
                      

    x00 = tf.keras.layers.Conv1D(num_filters_, 1, padding='same')(x)
    x,skip1 = WaveNetResidualConv1D(num_filters_, kernel_size[0], int(n[0]/2))(x00)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv1D(num_filters_, 1, padding='same')(x)

    x,skip2 = WaveNetResidualConv1D(num_filters_, kernel_size[1], int(n[1]))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv1D(num_filters_, 1, padding='same')(x)

    x,skip3 = WaveNetResidualConv1D(num_filters_, kernel_size[2], int(n[2]))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Conv1D(num_filters_, 1, padding='same')(x)

    x,skip4 = WaveNetResidualConv1D(num_filters_, kernel_size[3], 1)(x)  
    
    skip_cons = skip4+skip1+skip2+skip3
    x = tf.keras.layers.Add()(skip_cons) 
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    x = tf.keras.layers.Dense(num_labels)(x)
    
    model = tf.keras.models.Model(inputs=inp, outputs=x)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss=mspe_loss,
    )
    return model     


# Loading data

In [7]:
# train
df_train = dt.fread(f'./dataset/train_{DATA_NAME}_NN.csv').to_pandas()
fea_cols = [f for f in df_train.columns if f.startswith('B_') or f.startswith('T_') or f.startswith('Z_')]
# result
df_result = dt.fread('./dataset/train.csv').to_pandas()
df_result = gen_row_id(df_result)

In [8]:
fea_cols_TA = [f for f in fea_cols if 'min_' not in f]
df_time_mean = df_train.groupby('time_id')[fea_cols_TA].mean()
df_time_mean.columns = [f'{c}_TA_mean' for c in df_time_mean.columns]
df_time_mean = df_time_mean.reset_index()
df_train = df_train.merge(df_time_mean, on='time_id', how='left')

del df_time_mean
gc.collect()

43

In [9]:
df_train['target'] = transform_target(df_train['target'])
df_train = gen_row_id(df_train)
df_train = add_time_fold(df_train, N_FOLD)

# Evaluation

In [10]:
def add_time_stats(df_train):
    time_cols = [f for f in df_train.columns if f.endswith('_time')]
    df_gp_stock = df_train.groupby('stock_id')
    #
    df_stats = df_gp_stock[time_cols].mean().reset_index()
    df_stats.columns = ['stock_id'] + [f'{f}_mean' for f in time_cols]
    df_train = df_train.merge(df_stats, on=['stock_id'], how='left')
    #
    df_stats = df_gp_stock[time_cols].std().reset_index()
    df_stats.columns = ['stock_id'] + [f'{f}_std' for f in time_cols]
    df_train = df_train.merge(df_stats, on=['stock_id'], how='left')
    #
    df_stats = df_gp_stock[time_cols].skew().reset_index()
    df_stats.columns = ['stock_id'] + [f'{f}_skew' for f in time_cols]
    df_train = df_train.merge(df_stats, on=['stock_id'], how='left')
    #
    df_stats = df_gp_stock[time_cols].min().reset_index()
    df_stats.columns = ['stock_id'] + [f'{f}_min' for f in time_cols]
    df_train = df_train.merge(df_stats, on=['stock_id'], how='left')
    #
    df_stats = df_gp_stock[time_cols].max().reset_index()
    df_stats.columns = ['stock_id'] + [f'{f}_max' for f in time_cols]
    df_train = df_train.merge(df_stats, on=['stock_id'], how='left')
    #
    df_stats = df_gp_stock[time_cols].quantile(0.25).reset_index()
    df_stats.columns = ['stock_id'] + [f'{f}_q1' for f in time_cols]
    df_train = df_train.merge(df_stats, on=['stock_id'], how='left')
    #
    df_stats = df_gp_stock[time_cols].quantile(0.50).reset_index()
    df_stats.columns = ['stock_id'] + [f'{f}_q2' for f in time_cols]
    df_train = df_train.merge(df_stats, on=['stock_id'], how='left')
    #
    df_stats = df_gp_stock[time_cols].quantile(0.75).reset_index()
    df_stats.columns = ['stock_id'] + [f'{f}_q3' for f in time_cols]
    df_train = df_train.merge(df_stats, on=['stock_id'], how='left')
    return df_train

In [11]:
batch_size = 1024
filters = [8]
kernel_size = [5, 2, 1, 1]
n_params = [2, 2, 4]
learning_rate = 2.4e-05
epochs = 1000

list_seeds = [0, 11, 42]

In [12]:
list_rmspe = []
for i_seed, seed in enumerate(list_seeds):
    df_train = add_time_fold(df_train, N_FOLD, seed=seed)
    list_rmspe += [[]]
    for i_fold in range(N_FOLD):
        gc.collect()
        df_tr = df_train.loc[df_train.fold!=i_fold]
        df_te = df_train.loc[df_train.fold==i_fold]
        df_tr = add_time_stats(df_tr)
        df_te = add_time_stats(df_te)

        fea_cols = [f for f in df_tr if f.startswith('B_') or f.startswith('T_') or f.startswith('Z_')]

        X_train = df_tr[fea_cols].values
        y_train = df_tr[['target']].values
        X_test = df_te[fea_cols].values
        y_test = df_te[['target']].values
        idx_test = df_train.loc[df_train.fold==i_fold].index
        print(f'Fold {i_seed+1}/{len(list_seeds)} | {i_fold+1}/{N_FOLD}', X_train.shape, X_test.shape)


        # Callbacks
        ckp_path = f'./models/{SOL_NAME}/model_{i_seed}_{i_fold}.hdf5'
        rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=8, min_delta=1e-5, verbose=2)
        es = EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=30, restore_best_weights=True, verbose=2)

        model = create_wavenet(X_train.shape[1], 1, learning_rate, filters, kernel_size, n_params)
        history = model.fit(X_train, y_train,
            epochs=epochs,
            validation_data=(X_test, y_test),
            validation_batch_size=len(y_test),
            batch_size=batch_size,
            verbose=2,
            callbacks=[rlr, es]
        ) 
        # model = tf.keras.models.load_model(ckp_path, custom_objects={'mspe_loss': mspe_loss})

        y_pred = model.predict(X_test, batch_size=len(y_test))
        curr_rmspe = np_rmspe(y_test, y_pred)
        list_rmspe[-1] += [curr_rmspe]
        model.save(ckp_path)
        # generate and save preds
        df_result.loc[idx_test, f'pred_{i_seed}'] = inverse_target(y_pred)
        clear_output()
        print(list_rmspe)

Fold 1/3 | 1/5 (343145, 674) (85787, 674)
Epoch 1/1000


ResourceExhaustedError:  OOM when allocating tensor with shape[5490368,256] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node model/in_dense/Tensordot/MatMul (defined at tmp/ipykernel_2827372/2430019839.py:34) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_test_function_9531]

Function call stack:
test_function


In [None]:
df_result.to_csv(f'./results/{SOL_NAME}.csv', index=False)

In [None]:
for i in range(len(list_seeds)):
    print(i, rmspe(df_result['target'], df_result[f'pred_{i}']))
print('All: ', rmspe(df_result['target'], df_result[[f'pred_{i}' for i in range(len(list_seeds))]].mean(axis=1)))

0 0.21217646313524488
1 0.21264498482027988
2 0.21260948057035714
3 0.21338003692486984
4 0.21225971098150143
All:  0.2093912807451296
