In [1]:
import os
import polars as pl
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
from lightgbm import LGBMRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
import copy
import kaggle_evaluation.default_inference_server

In [2]:
# 1. Load Training Data
train_path = "/kaggle/input/hull-tactical-market-prediction/train.csv"
df_train = pl.read_csv(train_path)
print(df_train.head())

shape: (5, 98)
┌─────────┬─────┬─────┬─────┬───┬──────┬─────────────────┬────────────────┬────────────────────────┐
│ date_id ┆ D1  ┆ D2  ┆ D3  ┆ … ┆ V9   ┆ forward_returns ┆ risk_free_rate ┆ market_forward_excess_ │
│ ---     ┆ --- ┆ --- ┆ --- ┆   ┆ ---  ┆ ---             ┆ ---            ┆ returns                │
│ i64     ┆ i64 ┆ i64 ┆ i64 ┆   ┆ str  ┆ f64             ┆ f64            ┆ ---                    │
│         ┆     ┆     ┆     ┆   ┆      ┆                 ┆                ┆ f64                    │
╞═════════╪═════╪═════╪═════╪═══╪══════╪═════════════════╪════════════════╪════════════════════════╡
│ 0       ┆ 0   ┆ 0   ┆ 0   ┆ … ┆ null ┆ -0.002421       ┆ 0.000301       ┆ -0.003038              │
│ 1       ┆ 0   ┆ 0   ┆ 0   ┆ … ┆ null ┆ -0.008495       ┆ 0.000303       ┆ -0.009114              │
│ 2       ┆ 0   ┆ 0   ┆ 0   ┆ … ┆ null ┆ -0.009624       ┆ 0.000301       ┆ -0.010243              │
│ 3       ┆ 0   ┆ 0   ┆ 0   ┆ … ┆ null ┆ 0.004662        ┆ 0.000299       ┆ 

In [3]:
# 2. Preprocessing
# Exclude non-feature columns
ignore_cols = ['date_id', 'forward_returns', 'market_forward_excess_returns', 'risk_free_rate']
feature_cols = [c for c in df_train.columns if c not in ignore_cols]
target_col = 'market_forward_excess_returns'

# Convert to Pandas for CatBoost
X = df_train.select(feature_cols).to_pandas()
y = df_train.select(target_col).to_pandas().values.ravel()

# Convert any non-numeric objects to NaN.
X = X.apply(pd.to_numeric, errors='coerce')

# Fill NaN with mean value by using SimpleImputer
imputer = SimpleImputer(strategy='mean')
X_imputed_array = imputer.fit_transform(X)
X = pd.DataFrame(X_imputed_array, columns=X.columns)

# Prepare data for volatility initialization (Last 60 days of returns)
train_returns = df_train.select('forward_returns').tail(60).to_numpy().flatten()

In [4]:
# 4. Final Model Training
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

# 1. CatBoost
cat_params = {
    "iterations": 3000,
    "learning_rate": 0.01,
    "depth": 8,
    "random_state": 42,
    "loss_function": "RMSE",
    "task_type": "GPU",
    "devices": "0",
    "allow_writing_files": False,
    "verbose": False
}

# 2. XGBoost
xgb_params = {
    "n_estimators": 3000,
    "learning_rate": 0.01,
    "max_depth": 8,
    "device": "cuda",
    "tree_method": "hist",
    "random_state": 42,
    "n_jobs": -1
}

# 3. LightGBM
lgbm_params = {
    "n_estimators": 3000,
    "learning_rate": 0.01,
    "max_depth": 8,
    "device": "gpu",
    "random_state": 42,
    "n_jobs": -1,
    "verbosity": -1
}

# 4. Stacking by RidgeCV
final_model = StackingRegressor(
    estimators=[
        ("CatBoost", CatBoostRegressor(**cat_params)),
        ("XGB", XGBRegressor(**xgb_params)),
        ("LGBM", LGBMRegressor(**lgbm_params)),
    ],
    final_estimator=RidgeCV(alphas=[0.1, 1.0, 5.0, 10.0, 50.0, 100.0]),
    cv=5,
    n_jobs=1,
    passthrough=False,
    verbose=1
)

final_model.fit(X, y)
print("Final model training completed.")

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  6.9min finished
Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   53.7s finished


Final model training completed.


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  1.1min finished


In [5]:
# 5. Volatility Management Class
class VolatilityController:
    def __init__(self, window_size=60, target_ratio=1.15, initial_returns=None):
        self.window_size = window_size
        self.target_ratio = target_ratio
        self.history = []
        
        # Initialize buffer with the end of training data
        if initial_returns is not None:
            for ret in initial_returns:
                self.history.append({'market_ret': ret, 'weight': 1.0})

    def calculate_safe_weight(self, raw_weight, current_lagged_return, last_weight):
        # Update history with realized return from the previous step
        if current_lagged_return is not None:
             self.history.append({'market_ret': current_lagged_return, 'weight': last_weight})
        
        # Maintain window size
        if len(self.history) > self.window_size:
            self.history.pop(0)
            
        # Warm-up check
        if len(self.history) < 10:
            return raw_weight
            
        # Calculate Volatility Ratio
        market_rets = np.array([x['market_ret'] for x in self.history])
        weights = np.array([x['weight'] for x in self.history])
        strategy_rets = weights * market_rets
        
        vol_bench = np.std(market_rets)
        vol_strat = np.std(strategy_rets)
        
        if vol_bench < 1e-7: 
            return raw_weight

        current_ratio = vol_strat / vol_bench
        
        # Apply scaling if ratio exceeds target (1.15 to be safe for 1.20 limit)
        scaling_factor = 1.0
        if current_ratio > self.target_ratio:
            scaling_factor = self.target_ratio / current_ratio
            
        return raw_weight * scaling_factor

In [6]:
# Initialize Controller
vol_manager = VolatilityController(window_size=60, target_ratio=1.15, initial_returns=train_returns)
last_submitted_weight = 1.0 

# 6. Inference Function
def predict(test: pl.DataFrame) -> float:
    global last_submitted_weight
    try:
        # Prepare test features
        X_test = test.select(feature_cols).to_pandas()
        
        # Get lagged return for volatility tracking
        lagged_ret = test.select("lagged_forward_returns").item(0, 0)
        
        # 1. Prediction using the FINAL MODEL (Trained on all data)
        pred_excess_return = final_model.predict(X_test)[0]
        
        # 2. Sigmoid Betting Strategy (Output 0 ~ 2)
        # Using a fixed scale factor (heuristic)
        scale_factor = 10000.0
        if pred_excess_return < 0:
            # Negative: Aggressive scaling
            # Even a small negative prediction will drop weight significantly
            scale_factor *= 5  # 5x more sensitive to downside
        
        sigmoid_value = 1 / (1 + np.exp(-pred_excess_return * scale_factor))
        raw_weight = 2.0 * sigmoid_value
        
        # 3. Volatility Control
        final_weight = vol_manager.calculate_safe_weight(raw_weight, lagged_ret, last_submitted_weight)
        
        # Hard clip to ensure valid submission
        final_weight = max(0.0, min(2.0, final_weight))
        
        # Update state for next iteration
        last_submitted_weight = final_weight

        # Print test information
        print(f"current test : {test.select('date_id').item(0, 0)}")
        print(f"pred_excess_return : {pred_excess_return}")
        print(f"raw_weight : {raw_weight}")
        print(f"final_weight : {final_weight}")
        print()
        
        return float(final_weight)
        
    except Exception as e:
        # Safety fallback
        return 0.0

# 7. Start Inference Server
inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('/kaggle/input/hull-tactical-market-prediction/',))

current test : 8980
pred_excess_return : 0.00011046456597672851
raw_weight : 1.5022590998124175
final_weight : 1.5022590998124175

current test : 8981
pred_excess_return : -2.9048079216809636e-06
raw_weight : 0.9275071921187277
final_weight : 0.9275071921187277

current test : 8982
pred_excess_return : 6.723777174110579e-06
raw_weight : 1.033606225908608
final_weight : 1.033606225908608

current test : 8983
pred_excess_return : 4.880056485332415e-05
raw_weight : 1.2392730033890778
final_weight : 1.2392730033890778

current test : 8984
pred_excess_return : 3.3056882799186715e-05
raw_weight : 1.1637955500288701
final_weight : 1.1637955500288701

current test : 8985
pred_excess_return : -2.44424421431643e-05
raw_weight : 0.455126429516752
final_weight : 0.455126429516752

current test : 8986
pred_excess_return : 8.24934546547121e-05
raw_weight : 1.3905656045068582
final_weight : 1.3905656045068582

current test : 8987
pred_excess_return : 4.20954175529952e-05
raw_weight : 1.20742310625652