Thanks @tmoroder.  
<a href='https://www.kaggle.com/code/morodertobias/hull-leak-safe-baseline'>https://www.kaggle.com/code/morodertobias/hull-leak-safe-baseline</a>

I replaced ElasticNet with Ridge, Lasso, or LinearRegression, only for teaching purposes. 
From version 9, the model has not been leaked, but it is not very stable.

v1:ridge

v2:LinearRegression

v3:Lasso

v4-v5:Lasso, parameter tuning

v6:Lasso, Printing selected features

v7:Lasso, Based on all features.

v8:Lasso, Add cross-feature interactions.

v9:Lasso, TimeSeriesSplit.

v10:Lasso, Keep only recent data.

v11:Lasso, parameter tuning.

v12:Arima, The code contains  bugs

v13-*:Arima,

## Import & Settings

In [None]:
import os
import warnings
from pathlib import Path
import polars as pl
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from dataclasses import dataclass
from collections import deque  # 用 deque 更高效

warnings.filterwarnings("ignore")

# =================== PATHS ===================
DATA_PATH: Path = Path('/kaggle/input/hull-tactical-market-prediction/')

# ============ SIGNAL CONVERSION CONFIGS ============
MIN_SIGNAL: float = 0.0
MAX_SIGNAL: float = 2.0
SIGNAL_MULTIPLIER: float = 200.0

@dataclass(frozen=True)
class RetToSignalParameters:
    signal_multiplier: float
    min_signal: float = MIN_SIGNAL
    max_signal: float = MAX_SIGNAL

ret_signal_params = RetToSignalParameters(signal_multiplier=SIGNAL_MULTIPLIER)

# =================== GLOBALS ===================
# 存储 (date_id, return) 的双端队列，最多保留 60 天
rolling_history = deque(maxlen=60)  # maxlen 自动控制长度

# =================== DATA LOADING ===================
def load_trainset() -> pl.DataFrame:
    """加载训练集，去掉 date_id >= 8980 的记录"""
    return (
        pl.read_csv(DATA_PATH / "train.csv")
        .filter(pl.col("date_id") < 8980)          # 保留 date_id < 8980
        .select([
            "date_id",
            pl.col("market_forward_excess_returns").shift(1).alias("target")
        ])
    )

# =================== SIGNAL CONVERSION ===================
def convert_ret_to_signal(
    ret_arr: np.ndarray,
    params: RetToSignalParameters
) -> np.ndarray:
    signal = ret_arr * params.signal_multiplier + 1
    return np.clip(signal, params.min_signal, params.max_signal)

# =================== PREDICTION FUNCTION ===================
def predict(test: pl.DataFrame) -> float:
    global rolling_history

    current_date = test["date_id"][0]
    current_return = test["lagged_forward_returns"][0]

    # 第一次调用：从训练集加载最后60天
    if len(rolling_history) == 0:
        train_df = load_trainset()
        # 取最后60天，转换为 (date_id, return) 列表
        recent_data = (
            train_df
            .select(["date_id", "target"])
            .tail(60)
            .rows()  # [(date_id, target), ...]
        )
        rolling_history.extend(recent_data)
        print(f"Initialized rolling_history with {len(rolling_history)} days")

    # ✅ 检查是否已存在该 date_id
    existing_date_ids = {item[0] for item in rolling_history}  # 使用集合快速查找
    if current_date not in existing_date_ids:
        # 如果没有，才添加
        rolling_history.append((current_date, current_return))
        # print(f"Added date_id={current_date}, return={current_return:.6f}")
    # else:
    #     print(f"Skipped duplicate date_id={current_date}")

    # 提取最近60天的 return 值用于建模
    ts = np.array([item[1] for item in rolling_history])
    #print(ts)
    # 确保有足够数据
    if len(ts) < 10:
        raw_pred = 0.0
    else:
        try:
            model = ARIMA(ts, order=(1, 1, 2))
            fitted_model = model.fit()
            raw_pred = fitted_model.forecast(steps=1)[0]
        except Exception as e:
            print(f"ARIMA fitting failed: {e}, using fallback")
            raw_pred = np.mean(ts[-5:])  # 备选：移动平均

    # 转换为信号
    signal = convert_ret_to_signal(np.array([raw_pred]), ret_signal_params)[0]
    print(signal)
    return float(signal)


# =================== LAUNCH SERVER ===================
if __name__ == "__main__":
    try:
        import kaggle_evaluation.default_inference_server
        inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)
        
        if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
            inference_server.serve()
        else:
            inference_server.run_local_gateway((str(DATA_PATH),))
    except Exception as e:
        print(f"Error launching server: {e}")
        raise