In [1]:
import gc  
import os  
import time  
import warnings 
from itertools import combinations  
from warnings import simplefilter 
import joblib  
import lightgbm as lgb  
import numpy as np  
import pandas as pd  
from sklearn.metrics import mean_absolute_error 
from sklearn.model_selection import KFold, TimeSeriesSplit  
warnings.filterwarnings("ignore")
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
is_offline = False 
is_train = True  
is_infer = True 
max_lookback = np.nan 
split_day = 435  




# Optive Trading Competition Light Gradient Boosting Machine






In [2]:
df = pd.read_csv("/kaggle/input/optiver-trading-at-the-close/train.csv")
df = df.dropna(subset=["target"])
df.reset_index(drop=True, inplace=True)
df_shape = df.shape

# Memory Optimization

In [3]:
def reduce_mem_usage(df, verbose=0):
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtype
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            
            if str(col_type)[:3] == "int":
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
               
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float32)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float32)
    if verbose:
        logger.info(f"Memory usage of dataframe is {start_mem:.2f} MB")
        end_mem = df.memory_usage().sum() / 1024**2
        logger.info(f"Memory usage after optimization is: {end_mem:.2f} MB")
        decrease = 100 * (start_mem - end_mem) / start_mem
        logger.info(f"Decreased by {decrease:.2f}%")
    return df


 # Parallel Triplet Imbalance Calculation

In [4]:
from numba import njit, prange

@njit(parallel=True)
def compute_triplet_imbalance(df_values, comb_indices):
    num_rows = df_values.shape[0]
    num_combinations = len(comb_indices)
    imbalance_features = np.empty((num_rows, num_combinations))
    for i in prange(num_combinations):
        a, b, c = comb_indices[i]
        for j in range(num_rows):
            max_val = max(df_values[j, a], df_values[j, b], df_values[j, c])
            min_val = min(df_values[j, a], df_values[j, b], df_values[j, c])
            mid_val = df_values[j, a] + df_values[j, b] + df_values[j, c] - min_val - max_val
            
            if mid_val == min_val:
                imbalance_features[j, i] = np.nan
            else:
                imbalance_features[j, i] = (max_val - mid_val) / (mid_val - min_val)

    return imbalance_features

def calculate_triplet_imbalance_numba(price, df):
    df_values = df[price].values
    comb_indices = [(price.index(a), price.index(b), price.index(c)) for a, b, c in combinations(price, 3)]
    features_array = compute_triplet_imbalance(df_values, comb_indices)
    columns = [f"{a}_{b}_{c}_imb2" for a, b, c in combinations(price, 3)]
    features = pd.DataFrame(features_array, columns=columns)
    return features


# Feature Generation Functions 

In [5]:
def imbalance_features(df):
    import cudf
    df = cudf.from_pandas(df)
    # Define lists of price and size-related column names
    prices = ["reference_price", "far_price", "near_price", "ask_price", "bid_price", "wap"]
    sizes = ["matched_size", "bid_size", "ask_size", "imbalance_size"]
    df["volume"] = df.eval("ask_size + bid_size")
    df["mid_price"] = df.eval("ask_price + bid_price")/2
    df["liquidity_imbalance"] = df.eval("(bid_size-ask_size)/(bid_size+ask_size)")
    df["matched_imbalance"] = df.eval("imbalance_size-matched_size")/df.eval("matched_size+imbalance_size")
    df["size_imbalance"] = df.eval("bid_size / ask_size")
    ####################################NEW V5 FEATURES################################################
    df['imb_s2'] = df.eval('(imbalance_size - matched_size) / (matched_size + imbalance_size)') 
    #relative order book imbalance between unmatched or unexecuted orders (imbalance_size) and executed orders (matched_size) in a financial market.
         # Calculate and add imbalance feature 1 (bid_ask_spread)
    df['bid_ask_spread'] = df.eval('(bid_size - ask_size) / (bid_size + ask_size)')  
    #bid-ask spread imbalance, which is a measure of the relative order book depth or liquidity in a financial market.

    ###Binary Features for Nasdaq###
    df['LOC_CLOSE'] = df['seconds_in_bucket'].apply(lambda x: 1 if x >= 480 else 0)
    df['CLOSING_INFO'] = df['seconds_in_bucket'].apply(lambda x: 1 if x >= 300 else 0)  
    
    ######################################################################################################
    for c in combinations(prices, 2):
        df[f"{c[0]}_{c[1]}_imb"] = df.eval(f"({c[0]} - {c[1]})/({c[0]} + {c[1]})")

    df["imbalance_momentum"] = df.groupby(['stock_id'])['imbalance_size'].diff(periods=1) / df['matched_size']
    df["price_spread"] = df["ask_price"] - df["bid_price"]
    df["spread_intensity"] = df.groupby(['stock_id'])['price_spread'].diff()
    df['price_pressure'] = df['imbalance_size'] * (df['ask_price'] - df['bid_price'])
    df['market_urgency'] = df['price_spread'] * df['liquidity_imbalance']
    df['depth_pressure'] = (df['ask_size'] - df['bid_size']) * (df['far_price'] - df['near_price'])
   
    for col in ['matched_size', 'imbalance_size', 'reference_price', 'imbalance_buy_sell_flag']:
        for window in [1, 2, 3, 10]:
            df[f"{col}_shift_{window}"] = df.groupby('stock_id')[col].shift(window)
            df[f"{col}_ret_{window}"] = df.groupby('stock_id')[col].pct_change(window)
    
    
    for col in ['ask_price', 'bid_price', 'ask_size', 'bid_size']:
        for window in [1, 2, 3, 10]:
            df[f"{col}_diff_{window}"] = df.groupby("stock_id")[col].diff(window)
    df = df.to_pandas()
   
    return df.replace([np.inf, -np.inf], 0)

def numba_imb_features(df):
    prices = ["reference_price", "far_price", "near_price", "ask_price", "bid_price", "wap"]
    sizes = ["matched_size", "bid_size", "ask_size", "imbalance_size"]
    
    for func in ["mean", "std", "skew", "kurt"]:
        df[f"all_prices_{func}"] = df[prices].agg(func, axis=1)
        df[f"all_sizes_{func}"] = df[sizes].agg(func, axis=1)
    for c in [['ask_price', 'bid_price', 'wap', 'reference_price'], sizes]:
        triplet_feature = calculate_triplet_imbalance_numba(c, df)
        df[triplet_feature.columns] = triplet_feature.values
    return df


####################################NEEDS DEBUGING FEATURES################################################
def data_clean(df):
    df = df.dropna(subset=["wap"])
######################################################################################################

def time_features(df): #ADDED INFO
    ###NEW V3.1 FEATURES###
    df["dow"] = df["date_id"] % 5  # Day of the week
    df["seconds"] = df["seconds_in_bucket"] % 60  # Seconds
    df["minute"] = df["seconds_in_bucket"] // 60  # Minutes
    
    df= df.fillna(method='ffill').fillna(0) #fillna
    
    # Map global features to the DataFrame
    for key, value in global_stock_id_feats.items():
        df[f"global_{key}"] = df["stock_id"].map(value.to_dict())

    return df



def generate_all_features(df):
    cols = [c for c in df.columns if c not in ["row_id", "time_id", "target"]]
    df = df[cols]
    df = imbalance_features(df) 
    df = numba_imb_features(df)
    df = time_features(df) 
    #df = data_clean(df) ###NEW V3.2 FEATURES - Attempt debug###
    
    gc.collect()  
    feature_name = [i for i in df.columns if i not in ["row_id", "target", "time_id", "date_id"]]
    
    return df[feature_name]

In [6]:
weights = [
    0.004, 0.001, 0.002, 0.006, 0.004, 0.004, 0.002, 0.006, 0.006, 0.002, 0.002, 0.008,
    0.006, 0.002, 0.008, 0.006, 0.002, 0.006, 0.004, 0.002, 0.004, 0.001, 0.006, 0.004,
    0.002, 0.002, 0.004, 0.002, 0.004, 0.004, 0.001, 0.001, 0.002, 0.002, 0.006, 0.004,
    0.004, 0.004, 0.006, 0.002, 0.002, 0.04 , 0.002, 0.002, 0.004, 0.04 , 0.002, 0.001,
    0.006, 0.004, 0.004, 0.006, 0.001, 0.004, 0.004, 0.002, 0.006, 0.004, 0.006, 0.004,
    0.006, 0.004, 0.002, 0.001, 0.002, 0.004, 0.002, 0.008, 0.004, 0.004, 0.002, 0.004,
    0.006, 0.002, 0.004, 0.004, 0.002, 0.004, 0.004, 0.004, 0.001, 0.002, 0.002, 0.008,
    0.02 , 0.004, 0.006, 0.002, 0.02 , 0.002, 0.002, 0.006, 0.004, 0.002, 0.001, 0.02,
    0.006, 0.001, 0.002, 0.004, 0.001, 0.002, 0.006, 0.006, 0.004, 0.006, 0.001, 0.002,
    0.004, 0.006, 0.006, 0.001, 0.04 , 0.006, 0.002, 0.004, 0.002, 0.002, 0.006, 0.002,
    0.002, 0.004, 0.006, 0.006, 0.002, 0.002, 0.008, 0.006, 0.004, 0.002, 0.006, 0.002,
    0.004, 0.006, 0.002, 0.004, 0.001, 0.004, 0.002, 0.004, 0.008, 0.006, 0.008, 0.002,
    0.004, 0.002, 0.001, 0.004, 0.004, 0.004, 0.006, 0.008, 0.004, 0.001, 0.001, 0.002,
    0.006, 0.004, 0.001, 0.002, 0.006, 0.004, 0.006, 0.008, 0.002, 0.002, 0.004, 0.002,
    0.04 , 0.002, 0.002, 0.004, 0.002, 0.002, 0.006, 0.02 , 0.004, 0.002, 0.006, 0.02,
    0.001, 0.002, 0.006, 0.004, 0.006, 0.004, 0.004, 0.004, 0.004, 0.002, 0.004, 0.04,
    0.002, 0.008, 0.002, 0.004, 0.001, 0.004, 0.006, 0.004,
]

weights = {int(k):v for k,v in enumerate(weights)}

## Data Splitting

In [7]:
if is_offline:
    
    df_train = df[df["date_id"] <= split_day]
    df_valid = df[df["date_id"] > split_day]
    print("Offline mode")
    print(f"train : {df_train.shape}, valid : {df_valid.shape}")
else:
    df_train = df
    print("Online mode")


Online mode


In [8]:
if is_train:
    global_stock_id_feats = {
        "median_size": df_train.groupby("stock_id")["bid_size"].median() + df_train.groupby("stock_id")["ask_size"].median(),
        "std_size": df_train.groupby("stock_id")["bid_size"].std() + df_train.groupby("stock_id")["ask_size"].std(),
        "ptp_size": df_train.groupby("stock_id")["bid_size"].max() - df_train.groupby("stock_id")["bid_size"].min(),
        "median_price": df_train.groupby("stock_id")["bid_price"].median() + df_train.groupby("stock_id")["ask_price"].median(),
        "std_price": df_train.groupby("stock_id")["bid_price"].std() + df_train.groupby("stock_id")["ask_price"].std(),
        "ptp_price": df_train.groupby("stock_id")["bid_price"].max() - df_train.groupby("stock_id")["ask_price"].min(),
    }
    if is_offline:
        df_train_feats = generate_all_features(df_train)
        print("Build Train Feats Finished.")
        df_valid_feats = generate_all_features(df_valid)
        print("Build Valid Feats Finished.")
        df_valid_feats = reduce_mem_usage(df_valid_feats)
    else:
        df_train_feats = generate_all_features(df_train)
        print("Build Online Train Feats Finished.")

    df_train_feats = reduce_mem_usage(df_train_feats)


Build Online Train Feats Finished.


In [9]:
df_train_feats.tail(10)

Unnamed: 0,stock_id,seconds_in_bucket,imbalance_size,imbalance_buy_sell_flag,reference_price,matched_size,far_price,near_price,bid_price,bid_size,...,bid_size_ask_size_imbalance_size_imb2,dow,seconds,minute,global_median_size,global_std_size,global_ptp_size,global_median_price,global_std_price,global_ptp_price
5237882,190,540,466320.2,-1,0.999295,39294716.0,0.998983,0.999139,0.999295,19725.859375,...,1.989666,0,0,9,29245.080078,117374.4,826622.2,2.0001,0.004897,0.020989
5237883,191,540,36937780.0,1,0.995983,608326144.0,0.999086,0.997746,0.995913,84744.0,...,1003.650452,0,0,9,59573.050781,1248712.0,30287730.0,2.000334,0.005736,0.030926
5237884,192,540,67162.62,1,1.000762,11790079.0,1.000834,1.000762,1.000545,168412.859375,...,36.795734,0,0,9,27031.480469,58276.11,506346.1,2.000119,0.003898,0.017389
5237885,193,540,6500948.0,1,1.001599,80357528.0,1.002908,1.002326,1.001454,413142.25,...,122.713997,0,0,9,55169.0,183620.2,4570198.0,2.000003,0.004552,0.034248
5237886,194,540,262271.5,-1,1.001504,3922295.5,1.000677,1.000677,1.001504,20829.199219,...,1.032201,0,0,9,30714.400391,128342.3,7549251.0,2.000059,0.005415,0.024909
5237887,195,540,2440723.0,-1,1.000317,28280362.0,0.999734,0.999734,1.000317,32257.039062,...,7.374204,0,0,9,51941.550781,98218.03,2761659.0,1.99993,0.003051,0.014076
5237888,196,540,349510.5,-1,1.000643,9187699.0,1.000129,1.000386,1.000643,205108.40625,...,1.29259,0,0,9,42476.949219,78070.06,459657.4,2.000042,0.003416,0.017398
5237889,197,540,0.0,0,0.995789,12725436.0,0.995789,0.995789,0.995789,16790.660156,...,9.722528,0,0,9,30070.039062,71964.17,1575294.0,1.999984,0.004696,0.020387
5237890,198,540,1000899.0,1,0.99921,94773272.0,0.99921,0.99921,0.99897,125631.71875,...,0.608175,0,0,9,304739.25,354682.8,2159163.0,1.999917,0.003146,0.015738
5237891,199,540,1884286.0,-1,1.002129,24073678.0,1.000859,1.001494,1.002129,250081.4375,...,31.627888,0,0,9,114127.101562,194210.1,4564502.0,2.000128,0.004325,0.022793


In [10]:
df_train_feats.sample(n=10)

Unnamed: 0,stock_id,seconds_in_bucket,imbalance_size,imbalance_buy_sell_flag,reference_price,matched_size,far_price,near_price,bid_price,bid_size,...,bid_size_ask_size_imbalance_size_imb2,dow,seconds,minute,global_median_size,global_std_size,global_ptp_size,global_median_price,global_std_price,global_ptp_price
537411,85,420,1017930.0,1,1.002664,8170532.0,1.012725,1.006584,1.002534,3222.659912,...,367.35202,0,0,7,22173.580078,71459.914062,957145.8,1.999738,0.007886,0.047194
3941563,15,80,26259800.0,1,1.000607,20988820.0,1.0025,1.001852,1.000405,14240.160156,...,1909.377563,3,20,1,35564.878906,148938.40625,3595858.0,2.000153,0.005008,0.037349
1644070,65,130,1665239.0,-1,1.00042,20197050.0,0.993843,0.996013,1.00042,26587.839844,...,8.123763,3,10,2,147240.3125,198862.453125,1453068.0,1.999957,0.003458,0.018259
342511,11,250,1092035.0,1,1.000442,15968730.0,0.996631,0.997155,1.000278,243.880005,...,10.506478,2,10,4,23147.960938,188907.625,2272204.0,2.000454,0.008526,0.047561
1824918,67,510,83197.99,1,0.999579,69653600.0,0.999579,0.999579,0.999463,17216.0,...,9.583162,4,30,8,28384.0,308200.34375,5946907.0,2.000113,0.006094,0.045951
4109470,122,220,1601953.0,1,1.003004,25946710.0,1.001581,1.001581,1.002414,81685.0,...,39.628204,3,40,3,485814.625,528909.125,5727107.0,2.00014,0.004296,0.021397
2592702,86,130,108790.4,1,1.00344,839788.9,1.000042,1.000042,1.003057,524.0,...,13.752203,0,10,2,20821.800781,153601.921875,3018322.0,1.999994,0.009851,0.056946
4914263,171,310,549780.4,1,1.002974,14032970.0,1.021199,1.009362,1.002692,129997.140625,...,3.358819,1,10,5,30478.5,70335.523438,937948.4,1.999991,0.003932,0.016723
4764463,170,520,5282110.0,-1,1.002083,24763510.0,1.001056,1.001056,1.002083,43900.0,...,40.047791,2,40,8,38630.71875,74916.96875,694996.1,1.999896,0.003406,0.016535
776159,47,30,6437282.0,-1,0.999182,1750707.0,0.999162,0.999534,0.999072,9049.0,...,343.336151,3,30,0,29909.050781,65103.703125,801178.8,1.999979,0.004039,0.019678


In [11]:
# Assuming df_train_feats is your DataFrame
sampled_rows = df_train_feats[["seconds_in_bucket",'LOC_CLOSE', 'CLOSING_INFO']].sample(n=10)

sampled_rows


Unnamed: 0,seconds_in_bucket,LOC_CLOSE,CLOSING_INFO
5090658,330,0,1
907996,150,0,0
4686144,460,0,1
4621659,530,1,1
275462,50,0,0
3793986,400,0,1
4655083,0,0,0
2265494,190,0,0
3431122,400,0,1
284880,540,1,1


In [12]:
import numpy as np
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error
import gc

lgb_params = {
    "objective": "mae",
    "n_estimators": 6000, #V5
    "num_leaves": 256, #V5
    "subsample": 0.6,
    'learning_rate': 0.0212375595260159, 
    "learning_rate": 0.00871,
    "colsample_bytree" : 0.8466335026104166,
    "n_jobs": 4,
    "device": "gpu",
    "verbosity": -1,
    "importance_type": "gain",
}
feature_name = list(df_train_feats.columns)
print(f"Feature length = {len(feature_name)}")

num_folds = 5 
fold_size = 480 // num_folds
gap = 5

models = []
scores = []

model_save_path = 'model_path_save' 
if not os.path.exists(model_save_path):
    os.makedirs(model_save_path)

date_ids = df_train['date_id'].values

for i in range(num_folds):
    start = i * fold_size
    end = start + fold_size
    if i < num_folds - 1:  # No need to purge after the last fold
        purged_start = end - 2
        purged_end = end + gap + 2
        train_indices = (date_ids >= start) & (date_ids < purged_start) | (date_ids > purged_end)
    else:
        train_indices = (date_ids >= start) & (date_ids < end)
    
    test_indices = (date_ids >= end) & (date_ids < end + fold_size)
    
    df_fold_train = df_train_feats[train_indices]
    df_fold_train_target = df_train['target'][train_indices]
    df_fold_valid = df_train_feats[test_indices]
    df_fold_valid_target = df_train['target'][test_indices]

    print(f"Fold {i+1} Model Training")
    
    # Train a LightGBM model for the current fold
    lgb_model = lgb.LGBMRegressor(**lgb_params)
    lgb_model.fit(
        df_fold_train[feature_name],
        df_fold_train_target,
        eval_set=[(df_fold_valid[feature_name], df_fold_valid_target)],
        callbacks=[
            lgb.callback.early_stopping(stopping_rounds=100),
            lgb.callback.log_evaluation(period=100),
        ],
    )

    models.append(lgb_model)
    # Save the model to a file
    model_filename = os.path.join(model_save_path, f'doblez_{i+1}.txt')
    lgb_model.booster_.save_model(model_filename)
    print(f"Model for fold {i+1} saved to {model_filename}")

    # Evaluate model performance on the validation set
    fold_predictions = lgb_model.predict(df_fold_valid[feature_name])
    fold_score = mean_absolute_error(fold_predictions, df_fold_valid_target)
    scores.append(fold_score)
    print(f"Fold {i+1} MAE: {fold_score}")

    # Free up memory by deleting fold specific variables
    del df_fold_train, df_fold_train_target, df_fold_valid, df_fold_valid_target
    gc.collect()

# Calculate the average best iteration from all regular folds
average_best_iteration = int(np.mean([model.best_iteration_ for model in models]))

# Update the lgb_params with the average best iteration
final_model_params = lgb_params.copy()
final_model_params['n_estimators'] = average_best_iteration

print(f"Training final model with average best iteration: {average_best_iteration}")

# Train the final model on the entire dataset
final_model = lgb.LGBMRegressor(**final_model_params)
final_model.fit(
    df_train_feats[feature_name],
    df_train['target'],
    callbacks=[
        lgb.callback.log_evaluation(period=100),
    ],
)

# Append the final model to the list of models
models.append(final_model)

# Save the final model to a file
final_model_filename = os.path.join(model_save_path, 'doblez-conjunto.txt')
final_model.booster_.save_model(final_model_filename)
print(f"Final model saved to {final_model_filename}")

# Now 'models' holds the trained models for each fold and 'scores' holds the validation scores
print(f"Average MAE across all folds: {np.mean(scores)}")


Feature length = 116
Fold 1 Model Training
Training until validation scores don't improve for 100 rounds
[100]	valid_0's l1: 7.10166
[200]	valid_0's l1: 7.05592
[300]	valid_0's l1: 7.03032
[400]	valid_0's l1: 7.00749
[500]	valid_0's l1: 6.98535
[600]	valid_0's l1: 6.96686
[700]	valid_0's l1: 6.95147
[800]	valid_0's l1: 6.93803
[900]	valid_0's l1: 6.92611
[1000]	valid_0's l1: 6.91585
[1100]	valid_0's l1: 6.90453
[1200]	valid_0's l1: 6.89585
[1300]	valid_0's l1: 6.88837
[1400]	valid_0's l1: 6.88115
[1500]	valid_0's l1: 6.87572
[1600]	valid_0's l1: 6.8683
[1700]	valid_0's l1: 6.86181
[1800]	valid_0's l1: 6.85528
[1900]	valid_0's l1: 6.84949
[2000]	valid_0's l1: 6.8461
[2100]	valid_0's l1: 6.84123
[2200]	valid_0's l1: 6.83651
[2300]	valid_0's l1: 6.83309
[2400]	valid_0's l1: 6.82959
[2500]	valid_0's l1: 6.82631
[2600]	valid_0's l1: 6.82293
[2700]	valid_0's l1: 6.8194
[2800]	valid_0's l1: 6.81606
[2900]	valid_0's l1: 6.81371
[3000]	valid_0's l1: 6.81079
[3100]	valid_0's l1: 6.80984
[3200]	v

In [13]:
def zero_sum(prices, volumes):
    std_error = np.sqrt(volumes)
    step = np.sum(prices) / np.sum(std_error)
    out = prices - std_error * step
    return out

if is_infer:
    import optiver2023
    env = optiver2023.make_env()
    iter_test = env.iter_test()
    counter = 0
    y_min, y_max = -64, 64
    qps, predictions = [], []
    cache = pd.DataFrame()

    # Weights for each fold model
    model_weights = [1/len(models)] * len(models) 
    
    for (test, revealed_targets, sample_prediction) in iter_test:
        now_time = time.time()
        cache = pd.concat([cache, test], ignore_index=True, axis=0)
        if counter > 0:
            cache = cache.groupby(['stock_id']).tail(21).sort_values(by=['date_id', 'seconds_in_bucket', 'stock_id']).reset_index(drop=True)
        feat = generate_all_features(cache)[-len(test):]

        # Generate predictions for each model and calculate the weighted average
        lgb_predictions = np.zeros(len(test))
        for model, weight in zip(models, model_weights):
            lgb_predictions += weight * model.predict(feat)

        lgb_predictions = zero_sum(lgb_predictions, test['bid_size'] + test['ask_size'])
        clipped_predictions = np.clip(lgb_predictions, y_min, y_max)
        sample_prediction['target'] = clipped_predictions
        env.predict(sample_prediction)
        counter += 1
        qps.append(time.time() - now_time)
        if counter % 10 == 0:
            print(counter, 'qps:', np.mean(qps))

    time_cost = 1.146 * np.mean(qps)


This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
10 qps: 0.7131677389144897
20 qps: 0.6911598443984985
30 qps: 0.6841653029123942
40 qps: 0.6851037085056305
50 qps: 0.6918981456756592
60 qps: 0.6890775124231975
70 qps: 0.6861701692853656
80 qps: 0.6850508689880371
90 qps: 0.6845424148771498
100 qps: 0.6872595691680908
110 qps: 0.6868800770152699
120 qps: 0.6859490811824799
130 qps: 0.6851266695902898
140 qps: 0.6860523785863604
150 qps: 0.6856607580184937
160 qps: 0.6849943429231644
