In [1]:
!pip --quiet install ../input/treelite/treelite-0.93-py3-none-manylinux2010_x86_64.whl

In [2]:
!pip --quiet install ../input/treelite/treelite_runtime-0.93-py3-none-manylinux2010_x86_64.whl

In [3]:
# ../input/jane-street-save-as-feather
# ../input/jane-street-xgb-treelite
# ../input/jane-street-resnet-cv
# ../input/jane-street-1dcnn-cv
# ../input/jane-street-mlp-stratifiedgroupkfold

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import sys
import pathlib
from pathlib import Path

import tensorflow as tf
import tensorflow_addons as tfa

import operator
import seaborn as sns

import sklearn
from sklearn import model_selection
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args

from tqdm import tqdm
import random
from random import choices

from collections import Counter, defaultdict


import warnings
warnings.filterwarnings("ignore")
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:
# tf setup
print("Tensorflow version " + tf.__version__)
AUTO = tf.data.experimental.AUTOTUNE

MIXED_PRECISION = False
XLA_ACCELERATE = True

if MIXED_PRECISION:
    from tensorflow.keras.mixed_precision import experimental as mixed_precision
    if tpu: policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16')
    else: policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
    mixed_precision.set_policy(policy)
    print('Mixed precision enabled')

if XLA_ACCELERATE:
    tf.config.optimizer.set_jit(True)
    print('Accelerated Linear Algebra enabled')

Tensorflow version 2.3.1
Accelerated Linear Algebra enabled


# Config

In [5]:
SEED = 2021
START_DATE = 86
FOLDS = 5

### Loading the training data

In [6]:
train = pd.read_feather('../input/jane-street-save-as-feather/train.feather')
train = train.query(f'date >= {START_DATE}').reset_index(drop = True) 
train = train.astype({c: np.float32 for c in train.select_dtypes(include='float64').columns}) #limit memory use
train.fillna(train.mean(),inplace=True)
train = train.query('weight > 0').reset_index(drop = True)
# train = train.query('weight != 0').reset_index(drop = True)
# train['action'] = (train['resp'] > 0).astype('int')
train['action'] =  (  (train['resp_1'] > 0 ) & (train['resp_2'] > 0 ) & (train['resp_3'] > 0 ) & (train['resp_4'] > 0 ) &  (train['resp'] > 0 )   ).astype('int')
features = [c for c in train.columns if 'feature' in c]

resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']

In [7]:
import treelite
import treelite_runtime 

In [8]:
predictor = treelite_runtime.Predictor('../input/jane-street-xgb-treelite/mymodel.so', verbose=True)

[09:03:18] /workspace/src/predictor/predictor.cc:262: Dynamic shared library `/kaggle/input/jane-street-xgb-treelite/mymodel.so' does not contain valid get_pred_transform() function
[09:03:18] /workspace/src/predictor/predictor.cc:276: Dynamic shared library `/kaggle/input/jane-street-xgb-treelite/mymodel.so' does not contain valid get_sigmoid_alpha() function
[09:03:18] /workspace/src/predictor/predictor.cc:288: Dynamic shared library `/kaggle/input/jane-street-xgb-treelite/mymodel.so' does not contain valid get_global_bias() function
[09:03:18] /opt/conda/lib/python3.7/site-packages/treelite_runtime/predictor.py:309: Dynamic shared library /kaggle/input/jane-street-xgb-treelite/mymodel.so has been successfully loaded into memory


In [9]:
def create_autoencoder(input_dim,output_dim,noise=0.05):
    i = tf.keras.layers.Input(input_dim)
    encoded = tf.keras.layers.BatchNormalization()(i)
    encoded = tf.keras.layers.GaussianNoise(noise)(encoded)
    encoded = tf.keras.layers.Dense(64,activation='relu')(encoded)
    decoded = tf.keras.layers.Dropout(0.2)(encoded)
    decoded = tf.keras.layers.Dense(input_dim,name='decoded')(decoded)
    x = tf.keras.layers.Dense(32,activation='relu')(decoded)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(output_dim,activation='sigmoid',name='label_output')(x)
    
    encoder = tf.keras.models.Model(inputs=i,outputs=encoded)
    autoencoder = tf.keras.models.Model(inputs=i,outputs=[decoded,x])
    
    autoencoder.compile(optimizer=tf.keras.optimizers.Adam(0.001), 
                        loss={'decoded':'mse','label_output':'binary_crossentropy'})
    return autoencoder, encoder

In [10]:
NN_NAME = 'mlp' # 1dcnn, resnet, mlp

In [11]:
X = train[features].values
y = np.stack([(train[c] > 0).astype('int') for c in resp_cols]).T #Multitarget

f_mean = np.mean(train[features[1:]].values,axis=0)

In [12]:
autoencoder, encoder = create_autoencoder(X.shape[-1],y.shape[-1],noise=0.1)
if NN_NAME == 'mlp': encoder.load_weights('../input/jane-street-mlp-stratifiedgroupkfold/encoder.hdf5')
elif NN_NAME == 'resnet': encoder.load_weights('../input/jane-street-resnet-cv/encoder.hdf5')
elif NN_NAME == '1dcnn': encoder.load_weights('../input/jane-street-1dcnn-cv/encoder.hdf5')
encoder.trainable = False

# NN

In [13]:
def create_1dcnn(input_dim, output_dim, encoder):
    # input
    inputs = tf.keras.layers.Input(input_dim)
    
    x = encoder(inputs)
    x = tf.keras.layers.Concatenate()([x,inputs]) #use both raw and encoded features
    
    # normalize
    x = tf.keras.layers.BatchNormalization()(x)
    
    # 1dcnn
    x = tf.keras.layers.Dense(4096, activation='relu')(x)
    x = tf.keras.layers.Reshape((256, 16))(x)
    x = tf.keras.layers.Conv1D(filters=16,
                      kernel_size=7,
                      strides=1,
                      activation='relu')(x)
    x = tf.keras.layers.MaxPooling1D(pool_size=2)(x)
    x = tf.keras.layers.Flatten()(x)
    
    # ffn
    for i in range(2):
        x = tf.keras.layers.Dense(256 // (2 ** i), activation='relu')(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.GaussianNoise(0.01)(x)
        x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(output_dim, activation='sigmoid')(x)
    
    model = tf.keras.models.Model(inputs=inputs,outputs=x)
    
    # compile
    opt = tfa.optimizers.RectifiedAdam(learning_rate=1e-03)
    opt = tfa.optimizers.SWA(opt)
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=1e-02)
    model.compile(optimizer=opt, 
                  loss=loss, 
                  metrics=[tf.keras.metrics.AUC(name = 'auc')])
    return model

In [14]:
def create_resnet(n_features, n_labels, encoder, label_smoothing = 0.0005):    
    input_1 = tf.keras.layers.Input(shape = (n_features,))
    input_2 = encoder(input_1)

    head_1 = tf.keras.Sequential([
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(512, activation="elu"), 
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(256, activation = "elu")
        ],name='Head1') 

    input_3 = head_1(input_1)
    input_3_concat = tf.keras.layers.Concatenate()([input_2, input_3])

    head_2 = tf.keras.Sequential([
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(512, "relu"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(512, "elu"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(256, "relu"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(256, "elu")
        ],name='Head2')

    input_4 = head_2(input_3_concat)
    input_4_avg = tf.keras.layers.Average()([input_3, input_4]) 

    head_3 = tf.keras.Sequential([
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(256, kernel_initializer='lecun_normal', activation='selu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(128, kernel_initializer='lecun_normal', activation='selu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1), name='l2_norm'),
        tf.keras.layers.Dense(n_labels, activation="sigmoid")
        ],name='Head3')

    output = head_3(input_4_avg)

    model = tf.keras.models.Model(inputs = [input_1, ], outputs = output)
    opt = tfa.optimizers.RectifiedAdam(learning_rate=1e-03)
    opt = tfa.optimizers.SWA(opt)
    model.compile(optimizer=opt, 
                  loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing), 
                  metrics=['AUC'])
    
    return model

In [15]:
def create_mlp(
    num_columns, num_labels, hidden_units, dropout_rates, label_smoothing, learning_rate
):

    inp = tf.keras.layers.Input(shape=(num_columns,))
    x = tf.keras.layers.BatchNormalization()(inp)
    x = tf.keras.layers.Dropout(dropout_rates[0])(x)
    for i in range(len(hidden_units)):
        x = tf.keras.layers.Dense(hidden_units[i])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation(tf.keras.activations.swish)(x)
        x = tf.keras.layers.Dropout(dropout_rates[i + 1])(x)
    
    x = tf.keras.layers.Dense(num_labels)(x)
    out = tf.keras.layers.Activation("sigmoid")(x)

    model = tf.keras.models.Model(inputs=inp, outputs=out)
    model.compile(
        optimizer=tfa.optimizers.RectifiedAdam(learning_rate=learning_rate),
        loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing),
        metrics=tf.keras.metrics.AUC(name="AUC"),
    )

    return model

In [16]:
%%time

if NN_NAME == '1dcnn':
    models = []

    for fold in range(FOLDS):
        # 1dcnn
        tf.keras.backend.clear_session()
        model = create_1dcnn(X.shape[-1], y.shape[-1], encoder)
        model.load_weights(pathlib.Path(f'../input/jane-street-1dcnn-cv/model_{SEED}_{fold}.hdf5'))
        models.append(model)
        
    models = [models[-1]]

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 10.3 µs


In [17]:
%%time

if NN_NAME == 'resnet':
    models = []

    for fold in range(FOLDS):
        tf.keras.backend.clear_session()
        model = create_resnet(X.shape[-1], y.shape[-1], encoder)
        model.load_weights(pathlib.Path(f'../input/jane-street-resnet-cv/model_{SEED}_{fold}.hdf5'))
        models.append(model)
        
    models = [models[-1]]

CPU times: user 3 µs, sys: 2 µs, total: 5 µs
Wall time: 10 µs


In [18]:
%%time

if NN_NAME == 'mlp':
    model = tf.keras.models.load_model('../input/jane-street-mlp-stratifiedgroupkfold/model.h5')
    models = [model]

CPU times: user 236 ms, sys: 6.91 ms, total: 242 ms
Wall time: 289 ms


In [19]:
f = np.median
th = 0.500

import janestreet
env = janestreet.make_env()
for (test_df, pred_df) in tqdm(env.iter_test()):
    if test_df['weight'].item() > 0:
        x_tt = test_df.loc[:, features].values
        
        # GBDT inference with treelite
        batch = treelite_runtime.Batch.from_npy2d(x_tt)
        xgb_pred = predictor.predict(batch)
    
        # NN inference
        if np.isnan(x_tt[:, 1:].sum()):
            x_tt[:, 1:] = np.nan_to_num(x_tt[:, 1:]) + np.isnan(x_tt[:, 1:]) * f_mean
        
        pred = np.mean([model(x_tt, training = False).numpy() for model in models],axis=0)
        pred = f(pred)
        
        # ensemble
        pred_df.action = np.where(0.9*pred + 0.1*xgb_pred >= th, 1, 0).astype(int)
    else:
        pred_df.action = 0
    env.predict(pred_df)

15219it [03:37, 70.00it/s]
