In [None]:
# ============================================================
# STEP 5 — FINAL SUBMISSION MODEL
# Hull Tactical Market Prediction (Kaggle Inference Server)
# LightGBM + Interest(I*) & Valuation(P*) Features
# ============================================================

import os
import numpy as np
import pandas as pd
import polars as pl
import lightgbm as lgb
import kaggle_evaluation.default_inference_server

# -------------------------
# 0. Paths & target column
# -------------------------
TRAIN_PATH = "/kaggle/input/hull-tactical-market-prediction/train.csv"
TARGET_NAME = "market_forward_excess_returns"

model = None
feature_cols = []
y_mean = 0.0
y_std = 1e-9

print("=== STEP 5: Loading training data & training final model ===")

# ============================================================
# 1. Train LightGBM Model (Interest + Valuation)
# ============================================================
if os.path.exists(TRAIN_PATH):
    df = pd.read_csv(TRAIN_PATH).sort_values("date_id")

    # target NaN 제거
    df = df.dropna(subset=[TARGET_NAME])

    # Feature selection
    all_cols = df.columns.tolist()
    interest_cols  = [c for c in all_cols if c.startswith("I")]
    valuation_cols = [c for c in all_cols if c.startswith("P")]

    feature_cols = sorted(interest_cols + valuation_cols)

    print(f"Using {len(feature_cols)} features:")
    print(feature_cols)

    X_train = df[feature_cols].fillna(0.0).values
    y_train = df[TARGET_NAME].values

    params = {
        "objective": "regression",
        "metric": "rmse",
        "learning_rate": 0.02,
        "num_leaves": 64,
        "feature_fraction": 0.8,
        "bagging_fraction": 0.8,
        "bagging_freq": 5,
        "min_data_in_leaf": 50,
        "verbosity": -1,   # Kaggle-safe
    }

    dtrain = lgb.Dataset(X_train, label=y_train)

    print("Training LightGBM...")
    model = lgb.train(params, dtrain, num_boost_round=1200)
    print("LightGBM training done.")

    # z-score normalization 기반 weight 변환
    y_mean = float(y_train.mean())
    y_std = float(y_train.std() + 1e-9)

else:
    print("[ERROR] train.csv not found — Using fallback model.")
    model = None
    feature_cols = []


# ============================================================
# 2. Prediction → Allocation (0~2)
# ============================================================

def pred_to_allocation(y_pred, mean, std, alpha=0.5):
    """
    Convert predicted forward excess return → allocation weight [0~2]
    """
    z = (y_pred - mean) / (std + 1e-9)
    w = 1.0 + alpha * z
    return np.clip(w, 0.0, 2.0)


# ============================================================
# 3. predict() — called repeatedly by Kaggle evaluation server
# ============================================================

def predict(test: pl.DataFrame) -> pl.DataFrame:
    """
    test: Polars DataFrame (streaming daily batch from server)
    Returns: Polars DataFrame with columns ["date_id", "prediction"]
    """

    if model is None or len(feature_cols) == 0:
        alloc = np.ones(len(test))
        return pl.DataFrame({"date_id": test["date_id"], "prediction": alloc})

    test_pd = test.to_pandas()

    X_test = pd.DataFrame(index=test_pd.index)
    for f in feature_cols:
        X_test[f] = test_pd[f] if f in test_pd.columns else 0.0

    X_test = X_test.fillna(0.0).values

    y_pred = model.predict(X_test)
    allocations = pred_to_allocation(y_pred, y_mean, y_std).astype("float32")

    return pl.DataFrame({
        "date_id": test["date_id"],
        "prediction": allocations,
    })


# ============================================================
# 4. Run inference server (Do NOT modify)
# ============================================================

inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)

if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        ("/kaggle/input/hull-tactical-market-prediction/",)
    )

=== STEP 5: Loading training data & training final model ===
Using 22 features:
['I1', 'I2', 'I3', 'I4', 'I5', 'I6', 'I7', 'I8', 'I9', 'P1', 'P10', 'P11', 'P12', 'P13', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9']
Training LightGBM...
LightGBM training done.
