In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from pathlib import Path
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from scipy import stats

# 设置随机种子
np.random.seed(42)

print("✅ 库导入完成")

✅ 库导入完成


## 1. 加载数据 (仅使用2020年后)

In [2]:
# 加载数据
train_df = pd.read_csv('../data/train.csv')
test_df = pd.read_csv('../data/test.csv')

# 转换时间戳
train_df['Timestamp'] = pd.to_datetime(train_df['Timestamp'])
test_df['Timestamp'] = pd.to_datetime(test_df['Timestamp'])

# 按时间排序
train_df = train_df.sort_values('Timestamp').reset_index(drop=True)
test_df = test_df.sort_values('Timestamp').reset_index(drop=True)

print(f"原始训练集: {train_df.shape[0]:,} 样本")
print(f"时间范围: {train_df['Timestamp'].min()} 到 {train_df['Timestamp'].max()}")

# 只保留2020年之后的数据
train_df = train_df[train_df['Timestamp'] >= '2020-01-01'].reset_index(drop=True)

print(f"\n筛选后训练集 (2020年后): {train_df.shape[0]:,} 样本")
print(f"时间范围: {train_df['Timestamp'].min()} 到 {train_df['Timestamp'].max()}")
print(f"测试集: {test_df.shape[0]:,} 样本")

原始训练集: 484,202 样本
时间范围: 2012-01-01 10:00:00 到 2025-10-23 23:15:00

筛选后训练集 (2020年后): 203,730 样本
时间范围: 2020-01-01 00:00:00 到 2025-10-23 23:15:00
测试集: 2,881 样本


In [3]:
# 查看价格变化
print("2020年后价格统计:")
print(train_df[['Open', 'High', 'Low', 'Close']].describe())

print("\nTarget统计:")
print(train_df['Target'].describe())

2020年后价格统计:
                Open           High            Low          Close
count  203730.000000  203730.000000  203730.000000  203730.000000
mean    45591.813094   45675.806126   45504.542246   45592.283779
std     30657.587163   30697.537463   30616.593039   30657.716800
min      3957.840000    4284.470000    3850.000000    3902.700000
25%     21350.000000   21397.587500   21309.000000   21350.000000
50%     38581.915000   38684.065000   38468.030000   38583.455000
75%     62601.317500   62713.750000   62474.750000   62604.980000
max    126094.000000  126272.000000  125675.000000  126111.000000

Target统计:
count    203730.000000
mean          0.000013
std           0.003495
min          -0.121894
25%          -0.001259
50%           0.000009
75%           0.001308
max           0.179620
Name: Target, dtype: float64


## 2. 特征工程

In [4]:
def calculate_rsi(series, period=14):
    """计算RSI"""
    delta = series.diff()
    gain = delta.where(delta > 0, 0).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / (loss + 1e-10)
    return 100 - (100 / (1 + rs))

def calculate_macd(series, fast=12, slow=26, signal=9):
    """计算MACD"""
    exp_fast = series.ewm(span=fast, adjust=False).mean()
    exp_slow = series.ewm(span=slow, adjust=False).mean()
    macd = exp_fast - exp_slow
    macd_signal = macd.ewm(span=signal, adjust=False).mean()
    macd_hist = macd - macd_signal
    return macd, macd_signal, macd_hist

def calculate_atr(high, low, close, period=14):
    """计算ATR"""
    prev_close = close.shift(1)
    tr = pd.concat([
        high - low,
        (high - prev_close).abs(),
        (low - prev_close).abs()
    ], axis=1).max(axis=1)
    return tr.rolling(window=period).mean()

print("✅ 技术指标函数定义完成")

✅ 技术指标函数定义完成


In [5]:
def create_features(df, is_train=True):
    """
    特征工程
    
    时间周期 (15分钟间隔):
    - 4 = 1小时
    - 96 = 1天
    - 672 = 1周
    """
    df = df.copy()
    
    # =============================================
    # 1. 时间特征
    # =============================================
    df['hour'] = df['Timestamp'].dt.hour
    df['day_of_week'] = df['Timestamp'].dt.dayofweek
    df['day_of_month'] = df['Timestamp'].dt.day
    df['month'] = df['Timestamp'].dt.month
    
    # 周期性编码
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['day_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['day_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    
    # 交易时段
    df['is_asia_session'] = ((df['hour'] >= 0) & (df['hour'] < 8)).astype(int)
    df['is_europe_session'] = ((df['hour'] >= 8) & (df['hour'] < 16)).astype(int)
    df['is_us_session'] = ((df['hour'] >= 16) & (df['hour'] < 24)).astype(int)
    df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)
    
    # =============================================
    # 2. 价格特征
    # =============================================
    # 收益率
    df['return_1'] = df['Close'].pct_change(1)
    for lag in [2, 4, 8, 12, 24, 48, 96]:
        df[f'return_{lag}'] = df['Close'].pct_change(lag)
    
    # 对数收益率
    df['log_return_1'] = np.log(df['Close'] / df['Close'].shift(1))
    for lag in [4, 12, 24, 96]:
        df[f'log_return_{lag}'] = np.log(df['Close'] / df['Close'].shift(lag))
    
    # 价格范围
    df['price_range'] = df['High'] - df['Low']
    df['price_change'] = df['Close'] - df['Open']
    df['price_change_pct'] = df['price_change'] / (df['Open'] + 1e-10)
    
    # OHLC比率
    df['high_low_ratio'] = df['High'] / (df['Low'] + 1e-10)
    df['close_open_ratio'] = df['Close'] / (df['Open'] + 1e-10)
    
    # 蜡烛图形态
    df['upper_shadow'] = df['High'] - df[['Open', 'Close']].max(axis=1)
    df['lower_shadow'] = df[['Open', 'Close']].min(axis=1) - df['Low']
    df['body_size'] = (df['Close'] - df['Open']).abs()
    df['body_direction'] = np.sign(df['Close'] - df['Open'])
    
    # =============================================
    # 3. 成交量特征
    # =============================================
    df['volume_log'] = np.log1p(df['Volume'])
    df['volume_change'] = df['Volume'].pct_change(1)
    
    for window in [4, 12, 24, 48, 96]:
        df[f'volume_ma_{window}'] = df['Volume'].shift(1).rolling(window=window).mean()
        df[f'volume_ratio_{window}'] = df['Volume'] / (df[f'volume_ma_{window}'] + 1e-10)
    
    # 价量关系
    df['price_volume'] = df['price_change'] * df['Volume']
    
    # =============================================
    # 4. 技术指标
    # =============================================
    # RSI
    for period in [6, 14, 28]:
        df[f'RSI_{period}'] = calculate_rsi(df['Close'], period)
    
    # MACD
    macd, macd_signal, macd_hist = calculate_macd(df['Close'])
    df['MACD'] = macd
    df['MACD_signal'] = macd_signal
    df['MACD_hist'] = macd_hist
    
    # 布林带
    for period in [20]:
        sma = df['Close'].rolling(window=period).mean()
        std = df['Close'].rolling(window=period).std()
        df[f'BB_width_{period}'] = (2 * std) / (sma + 1e-10)
        df[f'BB_position_{period}'] = (df['Close'] - (sma - 2*std)) / (4 * std + 1e-10)
    
    # ATR
    for period in [14, 28]:
        df[f'ATR_{period}'] = calculate_atr(df['High'], df['Low'], df['Close'], period)
        df[f'ATR_ratio_{period}'] = df[f'ATR_{period}'] / (df['Close'] + 1e-10)
    
    # =============================================
    # 5. 移动平均
    # =============================================
    for window in [4, 12, 24, 48, 96]:
        df[f'SMA_{window}'] = df['Close'].shift(1).rolling(window=window).mean()
        df[f'close_SMA_ratio_{window}'] = df['Close'] / (df[f'SMA_{window}'] + 1e-10)
        df[f'EMA_{window}'] = df['Close'].shift(1).ewm(span=window, adjust=False).mean()
        df[f'close_EMA_ratio_{window}'] = df['Close'] / (df[f'EMA_{window}'] + 1e-10)
    
    # 均线交叉
    df['SMA_cross_12_48'] = df['SMA_12'] / (df['SMA_48'] + 1e-10)
    df['EMA_cross_12_48'] = df['EMA_12'] / (df['EMA_48'] + 1e-10)
    
    # =============================================
    # 6. 波动率特征
    # =============================================
    for window in [12, 24, 48, 96]:
        df[f'volatility_{window}'] = df['log_return_1'].shift(1).rolling(window=window).std() * np.sqrt(window)
    
    # 波动率比率
    df['vol_ratio_12_48'] = df['volatility_12'] / (df['volatility_48'] + 1e-10)
    
    # =============================================
    # 7. 滞后特征
    # =============================================
    target_col = 'Target' if is_train and 'Target' in df.columns else 'Close'
    
    lags = [1, 2, 3, 4, 6, 8, 12, 24, 48, 96]
    for lag in lags:
        df[f'target_lag_{lag}'] = df[target_col].shift(lag)
        if lag <= 24:
            df[f'close_lag_{lag}'] = df['Close'].shift(lag)
    
    # 目标差分
    for lag in [1, 4, 12, 24]:
        df[f'target_diff_{lag}'] = df[target_col].diff(lag)
    
    # =============================================
    # 8. 滚动统计
    # =============================================
    windows = [12, 24, 48, 96]
    
    for window in windows:
        df[f'target_rolling_mean_{window}'] = df[target_col].shift(1).rolling(window=window).mean()
        df[f'target_rolling_std_{window}'] = df[target_col].shift(1).rolling(window=window).std()
        df[f'target_rolling_min_{window}'] = df[target_col].shift(1).rolling(window=window).min()
        df[f'target_rolling_max_{window}'] = df[target_col].shift(1).rolling(window=window).max()
        
        df[f'return_rolling_mean_{window}'] = df['return_1'].shift(1).rolling(window=window).mean()
        df[f'return_rolling_std_{window}'] = df['return_1'].shift(1).rolling(window=window).std()
    
    # 偏度和峰度
    for window in [48, 96]:
        df[f'target_skew_{window}'] = df[target_col].shift(1).rolling(window=window).skew()
        df[f'target_kurt_{window}'] = df[target_col].shift(1).rolling(window=window).kurt()
    
    # =============================================
    # 9. 动量特征
    # =============================================
    for period in [4, 12, 24, 48]:
        df[f'momentum_{period}'] = df['Close'] - df['Close'].shift(period)
        df[f'ROC_{period}'] = (df['Close'] - df['Close'].shift(period)) / (df['Close'].shift(period) + 1e-10)
    
    return df

print("✅ 特征工程函数定义完成")

✅ 特征工程函数定义完成


In [6]:
# 创建训练集特征
print("创建训练集特征...")
train_featured = create_features(train_df.copy(), is_train=True)

print(f"\n原始列数: {train_df.shape[1]}")
print(f"特征后列数: {train_featured.shape[1]}")
print(f"新增特征数: {train_featured.shape[1] - train_df.shape[1]}")

创建训练集特征...

原始列数: 7
特征后列数: 153
新增特征数: 146


## 3. 数据准备

In [7]:
# 定义特征列
exclude_cols = ['Timestamp', 'Target', 'Open', 'High', 'Low', 'Close', 'Volume',
                'hour', 'day_of_week', 'day_of_month', 'month']

# 排除SMA, EMA原始值 (保留比率)
for window in [4, 12, 24, 48, 96]:
    exclude_cols.append(f'SMA_{window}')
    exclude_cols.append(f'EMA_{window}')

# 排除volume_ma原始值
for window in [4, 12, 24, 48, 96]:
    exclude_cols.append(f'volume_ma_{window}')

feature_cols = [col for col in train_featured.columns if col not in exclude_cols]
print(f"特征数量: {len(feature_cols)}")

特征数量: 127


In [8]:
# 处理无穷值和NaN
train_featured = train_featured.replace([np.inf, -np.inf], np.nan)

# 删除NaN行
valid_idx = train_featured[feature_cols + ['Target']].notna().all(axis=1)
train_clean = train_featured[valid_idx].reset_index(drop=True)
timestamps_clean = train_featured.loc[valid_idx, 'Timestamp'].reset_index(drop=True)

print(f"清洗前: {len(train_featured):,} 样本")
print(f"清洗后: {len(train_clean):,} 样本")
print(f"保留比例: {len(train_clean) / len(train_featured) * 100:.2f}%")

清洗前: 203,730 样本
清洗后: 203,526 样本
保留比例: 99.90%


In [9]:
# 准备特征和目标
X = train_clean[feature_cols].values.astype(np.float32)
y = train_clean['Target'].values.astype(np.float32)

# 时间序列分割 (80% 训练, 20% 验证)
val_ratio = 0.2
train_size = int(len(X) * (1 - val_ratio))

X_train = X[:train_size]
y_train = y[:train_size]
X_val = X[train_size:]
y_val = y[train_size:]

print(f"训练集: {X_train.shape[0]:,} 样本")
print(f"验证集: {X_val.shape[0]:,} 样本")
print(f"\n训练集时间: {timestamps_clean.iloc[0]} 到 {timestamps_clean.iloc[train_size-1]}")
print(f"验证集时间: {timestamps_clean.iloc[train_size]} 到 {timestamps_clean.iloc[-1]}")

训练集: 162,820 样本
验证集: 40,706 样本

训练集时间: 2020-01-02 00:15:00 到 2024-08-24 22:30:00
验证集时间: 2024-08-24 22:45:00 到 2025-10-23 23:15:00


## 4. LightGBM 模型训练

In [10]:
# LightGBM 参数
lgb_params = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'num_leaves': 63,
    'max_depth': 8,
    'min_child_samples': 100,
    'learning_rate': 0.02,
    'feature_fraction': 0.7,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'reg_alpha': 0.3,
    'reg_lambda': 0.5,
    'verbose': -1,
    'random_state': 42,
    'n_jobs': -1,
    'force_col_wise': True
}

print("LightGBM 参数:")
for key, value in lgb_params.items():
    print(f"  {key}: {value}")

LightGBM 参数:
  objective: regression
  metric: rmse
  boosting_type: gbdt
  num_leaves: 63
  max_depth: 8
  min_child_samples: 100
  learning_rate: 0.02
  feature_fraction: 0.7
  bagging_fraction: 0.8
  bagging_freq: 5
  reg_alpha: 0.3
  reg_lambda: 0.5
  verbose: -1
  random_state: 42
  n_jobs: -1
  force_col_wise: True


In [11]:
# 创建数据集
train_data = lgb.Dataset(X_train, label=y_train, feature_name=feature_cols)
val_data = lgb.Dataset(X_val, label=y_val, reference=train_data, feature_name=feature_cols)

# 训练模型
print("开始训练LightGBM模型...\n")

evals_result = {}
model = lgb.train(
    lgb_params,
    train_data,
    num_boost_round=3000,
    valid_sets=[train_data, val_data],
    valid_names=['train', 'valid'],
    callbacks=[
        lgb.early_stopping(stopping_rounds=100),
        lgb.log_evaluation(period=200),
        lgb.record_evaluation(evals_result)
    ]
)

print(f"\n✅ 训练完成!")
print(f"最佳迭代次数: {model.best_iteration}")
print(f"最佳验证RMSE: {evals_result['valid']['rmse'][model.best_iteration-1]:.6f}")

开始训练LightGBM模型...

Training until validation scores don't improve for 100 rounds
[200]	train's rmse: 0.000989193	valid's rmse: 0.000290498
[400]	train's rmse: 0.000829604	valid's rmse: 0.000206105
[600]	train's rmse: 0.00078478	valid's rmse: 0.000193759
[800]	train's rmse: 0.000765437	valid's rmse: 0.00019151
[1000]	train's rmse: 0.000753805	valid's rmse: 0.000190726
Early stopping, best iteration is:
[970]	train's rmse: 0.000755577	valid's rmse: 0.000190513

✅ 训练完成!
最佳迭代次数: 970
最佳验证RMSE: 0.000191


## 5. 模型评估

In [12]:
# 验证集预测
y_val_pred = model.predict(X_val, num_iteration=model.best_iteration)

# 评估指标
rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
corr, _ = stats.pearsonr(y_val, y_val_pred)

print("=" * 60)
print("验证集评估结果:")
print("=" * 60)
print(f"  RMSE: {rmse:.6f}")
print(f"  Pearson相关系数: {corr:.6f}")
print("=" * 60)

验证集评估结果:
  RMSE: 0.000191
  Pearson相关系数: 0.996919


In [13]:
# 特征重要性
importance = model.feature_importance(importance_type='gain')
feature_importance_df = pd.DataFrame({
    'feature': feature_cols,
    'importance': importance
}).sort_values('importance', ascending=False)

print("Top 20 重要特征:")
print(feature_importance_df.head(20).to_string(index=False))

Top 20 重要特征:
              feature  importance
        target_diff_1   10.060475
       target_diff_24    8.543150
       target_diff_12    7.401908
        target_diff_4    6.543511
             return_1    1.682583
        target_lag_24    1.557057
        target_lag_12    1.302143
         target_lag_4    1.104912
         log_return_1    0.549237
         target_lag_1    0.254325
     price_change_pct    0.121386
         price_volume    0.102139
     close_open_ratio    0.089821
       high_low_ratio    0.066071
target_rolling_min_12    0.026377
target_rolling_max_24    0.023313
         price_change    0.016999
target_rolling_min_24    0.016657
target_rolling_max_12    0.016066
    close_EMA_ratio_4    0.015377


## 6. 测试集预测

In [14]:
# 获取历史数据用于特征计算
max_lag = 200
train_tail = train_df.tail(max_lag).copy()

# 合并历史数据和测试集
test_with_history = pd.concat([train_tail, test_df], ignore_index=True)

print(f"训练集尾部: {len(train_tail)} 行")
print(f"测试集: {len(test_df)} 行")
print(f"合并后: {len(test_with_history)} 行")

训练集尾部: 200 行
测试集: 2881 行
合并后: 3081 行


In [15]:
# 创建测试集特征
print("创建测试集特征...")
test_featured = create_features(test_with_history.copy(), is_train=False)

# 只保留测试集行
test_featured = test_featured.tail(len(test_df)).reset_index(drop=True)

print(f"测试集特征形状: {test_featured.shape}")

创建测试集特征...
测试集特征形状: (2881, 153)


In [16]:
# 确保所有特征可用
available_features = [col for col in feature_cols if col in test_featured.columns]
missing_features = set(feature_cols) - set(available_features)

if missing_features:
    print(f"缺失特征数: {len(missing_features)}")
    for feat in missing_features:
        test_featured[feat] = 0

# 准备测试特征
X_test = test_featured[feature_cols].values.astype(np.float32)

# 处理NaN和Inf
X_test = np.where(np.isinf(X_test), np.nan, X_test)
if np.isnan(X_test).any():
    print("处理测试集NaN值...")
    train_means = np.nanmean(X_train, axis=0)
    for i in range(X_test.shape[1]):
        mask = np.isnan(X_test[:, i])
        if mask.any():
            X_test[mask, i] = train_means[i] if not np.isnan(train_means[i]) else 0

print(f"测试集特征矩阵: {X_test.shape}")
print(f"NaN: {np.isnan(X_test).any()}, Inf: {np.isinf(X_test).any()}")

测试集特征矩阵: (2881, 127)
NaN: False, Inf: False


In [17]:
# 预测
print("进行预测...")
test_predictions = model.predict(X_test, num_iteration=model.best_iteration)

print(f"\n预测完成!")
print(f"预测数量: {len(test_predictions)}")
print(f"预测值范围: [{test_predictions.min():.6f}, {test_predictions.max():.6f}]")
print(f"预测值均值: {test_predictions.mean():.6f}")
print(f"预测值标准差: {test_predictions.std():.6f}")

进行预测...

预测完成!
预测数量: 2881
预测值范围: [-0.032826, 0.044293]
预测值均值: 0.004582
预测值标准差: 0.020700


## 7. 后处理与提交

In [18]:
# 后处理 - 标准化到训练分布
train_target_mean = y.mean()
train_target_std = y.std()
test_pred_mean = test_predictions.mean()
test_pred_std = test_predictions.std()

print(f"训练集Target: 均值={train_target_mean:.6f}, 标准差={train_target_std:.6f}")
print(f"测试集预测(原始): 均值={test_pred_mean:.6f}, 标准差={test_pred_std:.6f}")

# 标准化
test_predictions_normalized = (test_predictions - test_pred_mean) / test_pred_std * train_target_std + train_target_mean

print(f"测试集预测(标准化后): 均值={test_predictions_normalized.mean():.6f}, 标准差={test_predictions_normalized.std():.6f}")

训练集Target: 均值=0.000013, 标准差=0.003495
测试集预测(原始): 均值=0.004582, 标准差=0.020700
测试集预测(标准化后): 均值=0.000013, 标准差=0.003495


In [19]:
# 创建提交文件
submission_df = pd.DataFrame({
    'Timestamp': test_df['Timestamp'],
    'Prediction': test_predictions_normalized
})

# 保存
submission_dir = Path('../submissions')
submission_dir.mkdir(exist_ok=True)

submission_file = submission_dir / 'lgbm_2020_submission.csv'
submission_df.to_csv(submission_file, index=False)

print(f"✅ 提交文件已保存: {submission_file}")

print(f"\n提交文件预览:")
print(submission_df.head(10))
print("\n...")
print(submission_df.tail(10))

✅ 提交文件已保存: ..\submissions\lgbm_2020_submission.csv

提交文件预览:
            Timestamp  Prediction
0 2025-10-23 23:30:00   -0.001138
1 2025-10-23 23:45:00    0.002199
2 2025-10-24 00:00:00    0.002391
3 2025-10-24 00:15:00   -0.001228
4 2025-10-24 00:30:00    0.002425
5 2025-10-24 00:45:00    0.002409
6 2025-10-24 01:00:00    0.000564
7 2025-10-24 01:15:00   -0.001577
8 2025-10-24 01:30:00    0.005605
9 2025-10-24 01:45:00    0.005742

...
               Timestamp  Prediction
2871 2025-11-22 21:15:00   -0.000755
2872 2025-11-22 21:30:00    0.001576
2873 2025-11-22 21:45:00    0.001552
2874 2025-11-22 22:00:00    0.003573
2875 2025-11-22 22:15:00    0.003393
2876 2025-11-22 22:30:00    0.002299
2877 2025-11-22 22:45:00    0.005816
2878 2025-11-22 23:00:00    0.000591
2879 2025-11-22 23:15:00    0.000179
2880 2025-11-22 23:30:00    0.000238


In [20]:
# 提交统计
print("\n提交文件统计:")
print(submission_df['Prediction'].describe())


提交文件统计:
count    2881.000000
mean        0.000013
std         0.003495
min        -0.006302
25%        -0.002668
50%        -0.000501
75%         0.002448
max         0.006718
Name: Prediction, dtype: float64


## 8. 保存模型

In [21]:
# 保存模型
model_dir = Path('../models')
model_dir.mkdir(exist_ok=True)

model_file = model_dir / 'lgbm_2020_model.txt'
model.save_model(str(model_file))
print(f"✅ 模型已保存: {model_file}")

# 保存特征列表
feature_file = model_dir / 'lgbm_2020_features.txt'
with open(feature_file, 'w') as f:
    for feat in feature_cols:
        f.write(f"{feat}\n")
print(f"✅ 特征列表已保存: {feature_file}")

# 保存特征重要性
importance_file = model_dir / 'lgbm_2020_feature_importance.csv'
feature_importance_df.to_csv(importance_file, index=False)
print(f"✅ 特征重要性已保存: {importance_file}")

✅ 模型已保存: ..\models\lgbm_2020_model.txt
✅ 特征列表已保存: ..\models\lgbm_2020_features.txt
✅ 特征重要性已保存: ..\models\lgbm_2020_feature_importance.csv


## 总结

### 方案特点:
1. **仅使用2020年后数据** - 排除早期价格过低的历史数据
2. **单一LightGBM模型** - 简洁高效
3. **丰富的特征工程** - 技术指标、时间特征、滞后特征等
4. **标准化后处理** - 将预测值调整到训练分布