# Hull Tactical Market Prediction - EXP-02 Inference Server

**Model:** LightGBM + Enhanced Tier 1 Features  
**Local CV Performance:** Adjusted Sharpe 1.7461 (+273% vs baseline)  
**IC:** 0.1067 Â± 0.1315  

**CRITICAL:** This is an INFERENCE SERVER, not a static file submission!  
The predict() function is called once per timestep and must return a single float.

## Setup and Imports

In [None]:
import os
import numpy as np
import pandas as pd
import polars as pl
import lightgbm as lgb
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

import kaggle_evaluation.default_inference_server

print("=" * 80)
print("HULL TACTICAL - EXP-02 INFERENCE SERVER")
print("=" * 80)
print("Model: LightGBM + Enhanced Tier 1 Features")
print("Local CV Performance: Adjusted Sharpe 1.7461 (+273% vs baseline)")
print("=" * 80)
print()

## Configuration

In [None]:
# Hyperparameters (from winning model EXP-02)
LGBM_PARAMS = {
    'objective': 'regression',
    'metric': 'rmse',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'n_estimators': 500,
    'min_child_samples': 20,
    'verbose': -1,
    'random_state': 42
}

# Feature engineering parameters
LAG_PERIODS = [1, 5, 10, 21]
ROLLING_WINDOWS = [5, 10, 21, 63]
DOWNSIDE_WINDOWS = [21, 63]

# Position mapping configuration
SIGNAL_MULTIPLIER = 2.0  # Maps predictions to [0, 2] range
MIN_POSITION = 0.0
MAX_POSITION = 2.0

# Target column
TARGET_COL = 'market_forward_excess_returns'

## Train Model (runs once at startup)

In [None]:
print("=" * 80)
print("TRAINING MODEL (runs once at startup)")
print("=" * 80)
print()

# Load training data
train_file = '/kaggle/input/hull-tactical-market-prediction/train.csv'
train_df = pd.read_csv(train_file)

print(f"âœ… Loaded {len(train_df):,} training rows")

# Feature Engineering on Training Data
returns = train_df[TARGET_COL]

# Lag features
for lag in LAG_PERIODS:
    train_df[f'return_lag_{lag}'] = returns.shift(lag)

# Rolling statistics
for window in ROLLING_WINDOWS:
    train_df[f'volatility_{window}d'] = returns.shift(1).rolling(window).std()
    train_df[f'mean_return_{window}d'] = returns.shift(1).rolling(window).mean()

# Downside deviation features
for window in DOWNSIDE_WINDOWS:
    negative_returns = returns.shift(1).clip(upper=0)
    train_df[f'downside_dev_{window}d'] = negative_returns.rolling(window).std()

# Define feature columns
FEATURE_COLS = []
FEATURE_COLS.extend([f'return_lag_{lag}' for lag in LAG_PERIODS])
for window in ROLLING_WINDOWS:
    FEATURE_COLS.extend([f'volatility_{window}d', f'mean_return_{window}d'])
FEATURE_COLS.extend([f'downside_dev_{window}d' for window in DOWNSIDE_WINDOWS])

print(f"Features: {len(FEATURE_COLS)} total")

# Prepare training data
X_train = train_df[FEATURE_COLS].copy()
y_train = train_df[TARGET_COL].copy()

# Drop NaN rows
valid_mask = X_train.notna().all(axis=1) & y_train.notna()
X_train = X_train[valid_mask]
y_train = y_train[valid_mask]

# Clean infinities
X_train = X_train.replace([np.inf, -np.inf], np.nan)
X_train = X_train.fillna(X_train.median())

print(f"âœ… Training matrix: {len(X_train):,} samples Ã— {len(FEATURE_COLS)} features")

# Calculate feature medians for inference
feature_medians = {col: X_train[col].median() for col in FEATURE_COLS}

# Train LightGBM model
model = lgb.LGBMRegressor(**LGBM_PARAMS)
model.fit(X_train, y_train)

print(f"âœ… Model trained (LightGBM with {LGBM_PARAMS['n_estimators']} trees)")
print()

## Define Predict Function (returns position allocation [0, 2])

In [None]:
# Global state for maintaining rolling windows during inference
inference_state = {
    'returns_history': [],
    'timestep': 0
}

def predict(test: pl.DataFrame) -> float:
    """
    Predict position allocation for inference server.
    Called once per timestep by Kaggle's evaluation system.
    
    Args:
        test: Polars DataFrame with test features (one row = one timestep)
    
    Returns:
        float: Position allocation [0.0, 2.0]
    """
    global model, feature_medians, FEATURE_COLS, inference_state
    
    # Convert Polars to Pandas
    test_pd = test.to_pandas()
    
    # Extract current return (for updating state)
    if 'lagged_market_forward_excess_returns' in test_pd.columns:
        current_return = test_pd['lagged_market_forward_excess_returns'].iloc[0]
    elif 'market_forward_excess_returns' in test_pd.columns:
        current_return = test_pd['market_forward_excess_returns'].iloc[0]
    else:
        current_return = 0.0
    
    # Update returns history
    inference_state['returns_history'].append(current_return)
    
    # Keep only last 300 timesteps
    if len(inference_state['returns_history']) > 300:
        inference_state['returns_history'] = inference_state['returns_history'][-300:]
    
    returns_arr = np.array(inference_state['returns_history'])
    
    # --- Feature Engineering for Current Timestep ---
    
    features_dict = {}
    
    # Lag features
    for lag in LAG_PERIODS:
        idx = -(lag + 1)
        if len(returns_arr) >= (lag + 1):
            features_dict[f'return_lag_{lag}'] = returns_arr[idx]
        else:
            features_dict[f'return_lag_{lag}'] = 0.0
    
    # Rolling statistics
    for window in ROLLING_WINDOWS:
        if len(returns_arr) >= window + 1:
            window_data = returns_arr[-(window+1):-1]
            features_dict[f'volatility_{window}d'] = np.std(window_data)
            features_dict[f'mean_return_{window}d'] = np.mean(window_data)
        else:
            if len(returns_arr) > 1:
                features_dict[f'volatility_{window}d'] = np.std(returns_arr[:-1])
                features_dict[f'mean_return_{window}d'] = np.mean(returns_arr[:-1])
            else:
                features_dict[f'volatility_{window}d'] = 0.01
                features_dict[f'mean_return_{window}d'] = 0.0
    
    # Downside deviation features
    for window in DOWNSIDE_WINDOWS:
        if len(returns_arr) >= window + 1:
            window_data = returns_arr[-(window+1):-1]
            negative_returns = np.clip(window_data, None, 0)
            features_dict[f'downside_dev_{window}d'] = np.std(negative_returns)
        else:
            if len(returns_arr) > 1:
                negative_returns = np.clip(returns_arr[:-1], None, 0)
                features_dict[f'downside_dev_{window}d'] = np.std(negative_returns)
            else:
                features_dict[f'downside_dev_{window}d'] = 0.01
    
    # Build feature vector
    features = []
    for feat in FEATURE_COLS:
        val = features_dict.get(feat, 0.0)
        if np.isnan(val) or np.isinf(val):
            val = feature_medians.get(feat, 0.0)
        features.append(val)
    
    # Predict market excess return
    predicted_return = model.predict([features])[0]
    
    # Convert to position allocation [0, 2]
    position = predicted_return * SIGNAL_MULTIPLIER + 1.0
    position = np.clip(position, MIN_POSITION, MAX_POSITION)
    
    # Increment timestep counter
    inference_state['timestep'] += 1
    
    # Debug logging (first few timesteps only)
    if inference_state['timestep'] <= 3:
        print(f"Timestep {inference_state['timestep']}: predicted_return={predicted_return:.6f}, position={position:.4f}")
    
    return float(position)

print("âœ… Predict function defined")
print()

## Setup Inference Server

In [None]:
print("=" * 80)
print("SETTING UP INFERENCE SERVER")
print("=" * 80)
print()

# Create inference server with our predict function
inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)

print("âœ… Inference server created")
print()

# Start server (competition mode) or run local test
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    print("ðŸš€ Starting inference server for competition...")
    inference_server.serve()
else:
    print("ðŸ§ª Running local gateway test...")
    inference_server.run_local_gateway((
        '/kaggle/input/hull-tactical-market-prediction/',
    ))
    print("âœ… Local test complete")

print()
print("=" * 80)
print("INFERENCE SERVER READY!")
print("=" * 80)