In [1]:
import warnings
warnings.filterwarnings('ignore')

from xgboost import XGBRegressor
import numpy as np
from numba import njit
import pandas as pd 
import os
import janestreet
import datatable as dt

In [2]:
features_to_drop=[
'feature_27',
'feature_28',
'feature_18',
'feature_17',
'feature_7',
'feature_8',
'feature_84', 
'feature_78',
'feature_72',
'feature_96',
'feature_114', 
'feature_90',
'feature_108', 
'feature_102',
'feature_32',
'feature_31',
'feature_22',
'feature_21',
'feature_12',
'feature_11',
'feature_120',
'feature_121',
'feature_55',
'feature_74',
'feature_116',
'feature_104',
'feature_98',
'feature_92',
'feature_110',
'feature_80',
'feature_86']

features = [f'feature_{x}' for x in range(0,130) if f'feature_{x}' not in features_to_drop]

no_zero_feature = [f for f in features if f != 'feature_0']

columns_to_keep = features + ['weight','date','resp']

columns_to_keep_test = features + ['weight']


lower = 0
upper = 10

TARGET='resp'

# Load data
def get_data():
    data = dt.fread('../input/jane-street-market-prediction/train.csv').to_pandas()[columns_to_keep]

    data['linear_weights']=1.0 / data.groupby("date")['resp'].std()[data.date].values
    
    data.dropna(inplace=True)
    
    return data

In [3]:
def tree_model():
    train = get_data()

    sample_weights_train = train['linear_weights'].values
    
    X_train = train[features].values
    y_train = train[TARGET].values
    
    tree_params = {'tree_method':'gpu_hist', 'gpu_id':0,
                   'learning_rate':0.05,'n_estimators':150,'n_jobs':-1,'verbose':False}
    
    tree_model = XGBRegressor(**tree_params).fit(X_train, y_train, sample_weight=sample_weights_train)
    
    return tree_model

In [4]:
@njit
def any_nan(array):
    return np.isnan(array.sum())

In [5]:
env = janestreet.make_env()
iter_test = env.iter_test()

tree_model = tree_model()

for (test_df, sample_prediction_df) in iter_test:
    
    try:
        b = any_nan(test_df.loc[:,columns_to_keep_test].values)
        if not b:
            w = test_df['weight'].item()
            if w > lower and w < upper:
                tree_resp_preds = tree_model.predict(test_df.loc[:,features].values)
                
                sample_prediction_df['action'] = (tree_resp_preds > 0).astype('int')
            else:
                sample_prediction_df['action'] = 0
        else:
            sample_prediction_df['action'] = 0
    except:
        sample_prediction_df['action'] = 0
        env.predict(sample_prediction_df)
        
        continue
        
    env.predict(sample_prediction_df)

Parameters: { verbose } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


