# 股票预测模型工作流 (最终版)

---

### **工作流说明**
1.  **阶段零 (Setup)**: 导入所有库、加载配置文件。
2.  **阶段一 (Data Pipeline)**: **独立运行**。负责获取、处理并保存所有数据。只有在数据源或特征逻辑变更时才需运行。
3.  **阶段二 (Model Pipeline)**: **独立运行**。负责加载已处理好的数据，并进行模型训练与评估。这是进行模型实验的主要区域。

**操作指南**: 首次运行时，请按顺序执行所有单元格。后续实验中，如果数据未变，可直接运行阶段二之后的所有单元格。

## 0. 通用设置与导入

In [None]:
import os
import sys
import yaml
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from tqdm.autonotebook import tqdm

# --- 设置 OpenCL 环境变量 ---
os.environ['PYOPENCL_CTX'] = '0'

# --- 设置 Matplotlib 样式 ---
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
print("INFO: Matplotlib and Seaborn styles configured.")

# --- 健壮的模块导入逻辑 ---
try:
    # 导入所有需要的公共函数
    from data_process.get_data import initialize_apis, shutdown_apis
    from data_process.save_data import run_data_pipeline, get_processed_data_path
    from model_builders.build_models import run_training_for_ticker
    from model_builders.hpo_utils import run_hpo_for_ticker
    print("INFO: Project modules imported successfully.")
except ImportError as e:
    print(f"WARNNING: Standard import failed: {e}. Adding project root to sys.path.")
    project_root = str(Path().resolve())
    if project_root not in sys.path:
        sys.path.append(project_root)
    # 再次尝试导入
    from data_process.get_data import initialize_apis, shutdown_apis
    from data_process.save_data import run_data_pipeline, get_processed_data_path
    from model_builders.build_models import run_training_for_ticker
    from model_builders.hpo_utils import run_hpo_for_ticker
    print("INFO: Project modules imported successfully after path adjustment.")

# --- 加载配置文件 ---
CONFIG_PATH = 'configs/config.yaml'
try:
    with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
        config = yaml.safe_load(f)
    print(f"SUCCESS: Unified configuration file loaded from '{CONFIG_PATH}'.")
except FileNotFoundError:
    print(f"ERROR: Configuration file not found at '{CONFIG_PATH}'. Please check the path.")
    config = {}

# --- 提取核心配置块 (一次性提取) ---
if config:
    global_settings = config.get('global_settings', {})
    strategy_config = config.get('strategy_config', {})
    hpo_config = config.get('hpo_config', {})
    default_model_params = config.get('default_model_params', {})
    stocks_to_process = config.get('stocks_to_process', [])

  from tqdm.autonotebook import tqdm


INFO: Matplotlib and Seaborn styles configured.
INFO: Project modules imported successfully.
SUCCESS: Unified configuration file loaded from 'configs/config.yaml'.


# **阶段一：数据准备与特征工程**

此单元格负责完整的ETL流程：登录API -> 处理所有股票数据 -> 保存到磁盘 -> 登出API。

In [None]:
print("--- Starting Stage 1: Data Preparation and Feature Engineering ---\n")
try:
    if config:
        initialize_apis(config)
        run_data_pipeline(config_path=CONFIG_PATH)
        print("\n--- Stage 1 Finished: All data has been processed and saved. ---")
    else:
        print("ERROR: Config is empty. Cannot start data preparation.")
finally:
    # 无论成功与否，最后都调用关闭函数，它会检查登录状态并重置标志位
    shutdown_apis()

--- Starting Stage 1: Data Preparation and Feature Engineering ---

INFO: Attempting to log in to Baostock...
login success!
INFO: Baostock API 登录成功。SDK版本: 00.8.90
INFO: 未在配置中提供有效的 Tushare Token。将跳过宏观数据获取。
开始执行数据管道协调任务...
将使用配置文件: configs/config.yaml

--- Starting Batch Feature Generation Process ---
Using config file: configs/config.yaml

--- Generating features for 贵州茅台 (600519.SH) ---
  - [1/7] 正在从本地缓存加载 sh.600519 的原始日线数据...
INFO: Starting feature calculation pipeline...
  - [Calculating Features] Running: Technical Indicators...
    - Calculated: ema with params {'length': 10}
    - Calculated: ema with params {'length': 30}
    - Calculated: rsi with params {'length': 14}
    - Calculated: macd with params {'fast': 12, 'slow': 26, 'signal': 9}
    - Calculated: bbands with params {'length': 20, 'std': 2}
  - [Calculating Features] Running: Calendar Features...
  - [Calculating Features] INFO: No candlestick patterns specified in config. Skipping Candlestick Patterns.
INFO: Feature

# **阶段二：模型训练与评估**

### 2.1 (可选) 超参数优化

In [None]:
RUN_HPO = True # 设为 True 以运行优化，False 则跳过
HPO_TRIALS = hpo_config.get('n_trials', 50)

if RUN_HPO and config:
    hpo_tickers = hpo_config.get('tickers_for_hpo', [])
    
    if not hpo_tickers:
        print("INFO: No tickers specified for HPO in config file. Skipping HPO.")
    else:
        print(f"--- Starting HPO for tickers: {hpo_tickers} ---\n")
        model_type_for_hpo = 'lgbm'
        best_params_all = []
        
        for ticker in hpo_tickers:
            stock_info = next((s for s in stocks_to_process if s['ticker'] == ticker), None)
            if not stock_info:
                print(f"WARNNING: Config for HPO ticker {ticker} not found in 'stocks_to_process'. Skipping.")
                continue
            
            keyword = stock_info.get('keyword', ticker)

            data_path = get_processed_data_path(stock_info, config)
            if not data_path.exists():
                print(f"ERROR: Processed data for HPO on {keyword} not found at {data_path}. Please run Stage 1 first. Skipping.")
                continue
            
            try:
                df_loaded = pd.read_pickle(data_path)
                print(f"INFO: Loaded data for HPO on {keyword}. Shape: {df_loaded.shape}")
            except Exception as e:
                print(f"ERROR: Failed to load data for HPO on {keyword}: {e}. Skipping.")
                continue

            # 构建 HPO 所需的完整配置
            hpo_run_config = {
                'global_settings': global_settings,
                'strategy_config': strategy_config,
                'default_model_params': default_model_params,
                'stocks_to_process': [stock_info],
                'hpo_config': hpo_config
            }
            
            best_params = run_hpo_for_ticker(
                df=df_loaded,
                ticker=ticker,
                config=hpo_run_config,
                model_type=model_type_for_hpo,
                n_trials=HPO_TRIALS
            )
            if best_params: best_params_all.append(best_params)
        
        if best_params_all:
            final_hpo_params = pd.DataFrame(best_params_all).mean().to_dict()
            for int_param in ['num_leaves', 'min_child_samples']:
                if int_param in final_hpo_params: final_hpo_params[int_param] = int(round(final_hpo_params[int_param]))
            
            config['default_model_params']['lgbm_params'].update(final_hpo_params)
            default_model_params['lgbm_params'] = config['default_model_params']['lgbm_params']
            
            print("SUCCESS: HPO finished. The following parameters will be used for subsequent LGBM training:")
            print(yaml.dump(default_model_params['lgbm_params']))

else:
    print("INFO: Skipping HPO. Models will be trained with parameters from the YAML file.")

[I 2025-10-12 18:23:38,129] A new study created in memory with name: no-name-ef4cc385-c04b-433c-826a-b104c83eb35c


--- Starting HPO for tickers: ['603099.SH', '000100.SZ', '600519.SH'] ---

INFO: Loaded data for HPO on 长白山. Shape: (2622, 23)
INFO: HPO memory cache cleared.

--- Starting HPO for 长白山 (603099.SH) with 50 trials ---


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-10-12 18:23:41,540] Trial 0 finished with value: 0.44661154978708617 and parameters: {'num_leaves': 25, 'learning_rate': 0.07969454818643935, 'min_child_samples': 47, 'feature_fraction': 0.8394633936788146, 'bagging_fraction': 0.6624074561769746, 'reg_alpha': 0.029375384576328288, 'reg_lambda': 0.014936568554617643}. Best is trial 0 with value: 0.44661154978708617.
[I 2025-10-12 18:23:45,152] Trial 1 finished with value: 4.745188331051218 and parameters: {'num_leaves': 45, 'learning_rate': 0.015930522616241012, 'min_child_samples': 46, 'feature_fraction': 0.608233797718321, 'bagging_fraction': 0.9879639408647978, 'reg_alpha': 3.142880890840109, 'reg_lambda': 0.04335281794951567}. Best is trial 1 with value: 4.745188331051218.
[I 2025-10-12 18:23:50,226] Trial 2 finished with value: 2.6022404681958244 and parameters: {'num_leaves': 17, 'learning_rate': 0.002327067708383781, 'min_child_samples': 25, 'feature_fraction': 0.8099025726528951, 'bagging_fraction': 0.7727780074568463, '

  return spearmanr(a, b)[0]


[I 2025-10-12 18:23:53,353] Trial 3 finished with value: 20446032.182117146 and parameters: {'num_leaves': 15, 'learning_rate': 0.00383962929980417, 'min_child_samples': 28, 'feature_fraction': 0.7824279936868144, 'bagging_fraction': 0.9140703845572055, 'reg_alpha': 0.039721107273819126, 'reg_lambda': 0.34890188454913873}. Best is trial 3 with value: 20446032.182117146.
[I 2025-10-12 18:24:01,376] Trial 4 finished with value: 8.131055487189634 and parameters: {'num_leaves': 34, 'learning_rate': 0.001238513729886093, 'min_child_samples': 40, 'feature_fraction': 0.6682096494749166, 'bagging_fraction': 0.6260206371941118, 'reg_alpha': 7.025166339242156, 'reg_lambda': 7.886714129990489}. Best is trial 3 with value: 20446032.182117146.
[I 2025-10-12 18:24:17,963] Trial 5 finished with value: 3.303323068089707 and parameters: {'num_leaves': 43, 'learning_rate': 0.0040665633135147945, 'min_child_samples': 14, 'feature_fraction': 0.8736932106048627, 'bagging_fraction': 0.7760609974958406, 'reg

  return spearmanr(a, b)[0]


[I 2025-10-12 18:24:23,679] Trial 8 finished with value: 50364769.07197171 and parameters: {'num_leaves': 13, 'learning_rate': 0.0024658447214487376, 'min_child_samples': 12, 'feature_fraction': 0.7301321323053057, 'bagging_fraction': 0.7554709158757928, 'reg_alpha': 0.06516990611177176, 'reg_lambda': 3.063462210622083}. Best is trial 8 with value: 50364769.07197171.
[I 2025-10-12 18:24:28,388] Trial 9 finished with value: 4.528171570176002 and parameters: {'num_leaves': 24, 'learning_rate': 0.0036464395589807202, 'min_child_samples': 37, 'feature_fraction': 0.6563696899899051, 'bagging_fraction': 0.9208787923016158, 'reg_alpha': 0.01673601016782578, 'reg_lambda': 9.133995846860973}. Best is trial 8 with value: 50364769.07197171.
[I 2025-10-12 18:24:31,193] Trial 10 finished with value: 3.4168149477572367 and parameters: {'num_leaves': 31, 'learning_rate': 0.012978384473886913, 'min_child_samples': 59, 'feature_fraction': 0.7387403565626488, 'bagging_fraction': 0.8560354009870224, 'reg

  return spearmanr(a, b)[0]


[I 2025-10-12 18:25:58,901] Trial 25 finished with value: 32530992.970753573 and parameters: {'num_leaves': 20, 'learning_rate': 0.0016268296613238982, 'min_child_samples': 29, 'feature_fraction': 0.8412415836469934, 'bagging_fraction': 0.622983208385484, 'reg_alpha': 0.045607691484005904, 'reg_lambda': 0.6982014150255583}. Best is trial 8 with value: 50364769.07197171.
[I 2025-10-12 18:26:04,809] Trial 26 finished with value: 5.23384152337885 and parameters: {'num_leaves': 20, 'learning_rate': 0.001443640502533287, 'min_child_samples': 33, 'feature_fraction': 0.8584152652373682, 'bagging_fraction': 0.6282572457368231, 'reg_alpha': 0.021669746908757108, 'reg_lambda': 0.917199958961051}. Best is trial 8 with value: 50364769.07197171.
[I 2025-10-12 18:26:08,585] Trial 27 finished with value: 2.9509851073252604 and parameters: {'num_leaves': 14, 'learning_rate': 0.0015523400449131272, 'min_child_samples': 27, 'feature_fraction': 0.9036059223781863, 'bagging_fraction': 0.6013536020636991, 

  return spearmanr(a, b)[0]


[I 2025-10-12 18:26:10,910] Trial 28 finished with value: -42617838.42370073 and parameters: {'num_leaves': 13, 'learning_rate': 0.0030400687921604223, 'min_child_samples': 42, 'feature_fraction': 0.7603692855571655, 'bagging_fraction': 0.8845781795379228, 'reg_alpha': 0.01821579635542878, 'reg_lambda': 1.2674610600747123}. Best is trial 8 with value: 50364769.07197171.


  return spearmanr(a, b)[0]


[I 2025-10-12 18:26:13,155] Trial 29 finished with value: 17224614.69597444 and parameters: {'num_leaves': 20, 'learning_rate': 0.0016958139803229764, 'min_child_samples': 59, 'feature_fraction': 0.8377121162156264, 'bagging_fraction': 0.6493683700479091, 'reg_alpha': 0.03234581690549684, 'reg_lambda': 3.540608725895572}. Best is trial 8 with value: 50364769.07197171.


  return spearmanr(a, b)[0]


[I 2025-10-12 18:26:18,311] Trial 30 finished with value: 36975889.93935105 and parameters: {'num_leaves': 27, 'learning_rate': 0.0010660930603616416, 'min_child_samples': 53, 'feature_fraction': 0.717346150366539, 'bagging_fraction': 0.95122757266606, 'reg_alpha': 0.032513393743865325, 'reg_lambda': 0.37887116825384176}. Best is trial 8 with value: 50364769.07197171.


  return spearmanr(a, b)[0]


[I 2025-10-12 18:26:23,318] Trial 31 finished with value: 43359833.41966472 and parameters: {'num_leaves': 27, 'learning_rate': 0.0012934521824038653, 'min_child_samples': 53, 'feature_fraction': 0.7166665700370984, 'bagging_fraction': 0.9472394772122598, 'reg_alpha': 0.03145911688666272, 'reg_lambda': 0.3426025352002481}. Best is trial 8 with value: 50364769.07197171.
[I 2025-10-12 18:26:27,766] Trial 32 finished with value: 8.130491866124151 and parameters: {'num_leaves': 28, 'learning_rate': 0.0010164772461987218, 'min_child_samples': 52, 'feature_fraction': 0.703557439944841, 'bagging_fraction': 0.984754048611218, 'reg_alpha': 0.028607418256467877, 'reg_lambda': 0.219346887283408}. Best is trial 8 with value: 50364769.07197171.
[I 2025-10-12 18:26:31,950] Trial 33 finished with value: 55.07329437213755 and parameters: {'num_leaves': 26, 'learning_rate': 0.0015834208433151988, 'min_child_samples': 54, 'feature_fraction': 0.6478174484741557, 'bagging_fraction': 0.9535539690171989, 'r

  return spearmanr(a, b)[0]


[I 2025-10-12 18:26:41,907] Trial 34 finished with value: 37150240.365559846 and parameters: {'num_leaves': 32, 'learning_rate': 0.0010287607642569636, 'min_child_samples': 49, 'feature_fraction': 0.7277671318316582, 'bagging_fraction': 0.9503554672507261, 'reg_alpha': 0.0722145139444157, 'reg_lambda': 0.5147712775998768}. Best is trial 8 with value: 50364769.07197171.


  return spearmanr(a, b)[0]


[I 2025-10-12 18:26:53,691] Trial 35 finished with value: 3863633.477025581 and parameters: {'num_leaves': 32, 'learning_rate': 0.0010029978956720565, 'min_child_samples': 47, 'feature_fraction': 0.7221166152931349, 'bagging_fraction': 0.999011400658367, 'reg_alpha': 0.07115086920530882, 'reg_lambda': 0.14111976901256626}. Best is trial 8 with value: 50364769.07197171.
[I 2025-10-12 18:26:58,725] Trial 36 finished with value: 6.637666718204715 and parameters: {'num_leaves': 33, 'learning_rate': 0.0012878389355779776, 'min_child_samples': 54, 'feature_fraction': 0.6808177986707222, 'bagging_fraction': 0.9453634451054127, 'reg_alpha': 0.023979708515065572, 'reg_lambda': 0.30827250987390326}. Best is trial 8 with value: 50364769.07197171.
[I 2025-10-12 18:27:00,596] Trial 37 finished with value: -0.12268847360700323 and parameters: {'num_leaves': 35, 'learning_rate': 0.0029067513057184848, 'min_child_samples': 44, 'feature_fraction': 0.6233068585260121, 'bagging_fraction': 0.9763228788135

  return spearmanr(a, b)[0]


[I 2025-10-12 18:27:14,710] Trial 40 finished with value: 40862485.775312126 and parameters: {'num_leaves': 30, 'learning_rate': 0.002423416710009879, 'min_child_samples': 57, 'feature_fraction': 0.7608837024072702, 'bagging_fraction': 0.8873960497987674, 'reg_alpha': 2.7529736572780705, 'reg_lambda': 0.5289254365487744}. Best is trial 8 with value: 50364769.07197171.


  return spearmanr(a, b)[0]


[I 2025-10-12 18:27:17,320] Trial 41 finished with value: 39450209.08111209 and parameters: {'num_leaves': 30, 'learning_rate': 0.002724730745114738, 'min_child_samples': 57, 'feature_fraction': 0.7545083631353052, 'bagging_fraction': 0.8910612922493566, 'reg_alpha': 4.247768436025432, 'reg_lambda': 0.3649785070475841}. Best is trial 8 with value: 50364769.07197171.


  return spearmanr(a, b)[0]


[I 2025-10-12 18:27:19,885] Trial 42 finished with value: 46700912.359973006 and parameters: {'num_leaves': 30, 'learning_rate': 0.002635385012902868, 'min_child_samples': 58, 'feature_fraction': 0.7607486952834577, 'bagging_fraction': 0.8817743965322397, 'reg_alpha': 4.25212066160426, 'reg_lambda': 0.2394852373293467}. Best is trial 8 with value: 50364769.07197171.


  return spearmanr(a, b)[0]


[I 2025-10-12 18:27:22,611] Trial 43 finished with value: 37371450.31083172 and parameters: {'num_leaves': 23, 'learning_rate': 0.002589103740519137, 'min_child_samples': 57, 'feature_fraction': 0.7520935190511249, 'bagging_fraction': 0.886889935672853, 'reg_alpha': 4.003485838855542, 'reg_lambda': 0.2669504314195018}. Best is trial 8 with value: 50364769.07197171.


  return spearmanr(a, b)[0]


[I 2025-10-12 18:27:24,525] Trial 44 finished with value: 20931953.095491894 and parameters: {'num_leaves': 37, 'learning_rate': 0.004857965097032927, 'min_child_samples': 60, 'feature_fraction': 0.7633526470621793, 'bagging_fraction': 0.8418054916866987, 'reg_alpha': 3.307926357251946, 'reg_lambda': 0.18413994411420678}. Best is trial 8 with value: 50364769.07197171.
[I 2025-10-12 18:27:27,219] Trial 45 finished with value: 10.885878316187767 and parameters: {'num_leaves': 30, 'learning_rate': 0.058296961141996534, 'min_child_samples': 57, 'feature_fraction': 0.771153097163323, 'bagging_fraction': 0.8865361398993207, 'reg_alpha': 1.6370354012300028, 'reg_lambda': 0.08598392324684694}. Best is trial 8 with value: 50364769.07197171.
[I 2025-10-12 18:27:29,298] Trial 46 finished with value: 1.206353567468527 and parameters: {'num_leaves': 24, 'learning_rate': 0.003484948027883734, 'min_child_samples': 56, 'feature_fraction': 0.8123499972988603, 'bagging_fraction': 0.7760397148357534, 're

  return spearmanr(a, b)[0]


[I 2025-10-12 18:27:33,210] Trial 47 finished with value: 38224383.5587146 and parameters: {'num_leaves': 30, 'learning_rate': 0.0024863721393705144, 'min_child_samples': 51, 'feature_fraction': 0.7497970802227766, 'bagging_fraction': 0.8706862266940255, 'reg_alpha': 1.9114384099872999, 'reg_lambda': 1.2532158088265213}. Best is trial 8 with value: 50364769.07197171.
[I 2025-10-12 18:27:35,299] Trial 48 finished with value: 16.587942490679794 and parameters: {'num_leaves': 34, 'learning_rate': 0.0047912430847702305, 'min_child_samples': 46, 'feature_fraction': 0.6710559300757173, 'bagging_fraction': 0.9161449094502747, 'reg_alpha': 6.523077884334183, 'reg_lambda': 0.577594142496869}. Best is trial 8 with value: 50364769.07197171.


  return spearmanr(a, b)[0]
[I 2025-10-12 18:27:37,134] A new study created in memory with name: no-name-4d1c917e-f66a-4f65-b29d-3d633df92c8e


[I 2025-10-12 18:27:37,127] Trial 49 finished with value: 63646846.74585483 and parameters: {'num_leaves': 17, 'learning_rate': 0.00198276537521263, 'min_child_samples': 58, 'feature_fraction': 0.7355597564812602, 'bagging_fraction': 0.8956574698845071, 'reg_alpha': 0.5439691017598229, 'reg_lambda': 0.13550092029907773}. Best is trial 49 with value: 63646846.74585483.

--- HPO Results for 长白山 (603099.SH) ---
Best Score (ICIR): 63646846.7459
Best Parameters:
  num_leaves: 17
  learning_rate: 0.00198276537521263
  min_child_samples: 58
  feature_fraction: 0.7355597564812602
  bagging_fraction: 0.8956574698845071
  reg_alpha: 0.5439691017598229
  reg_lambda: 0.13550092029907773
INFO: Loaded data for HPO on TCL科技. Shape: (4854, 23)
INFO: HPO memory cache cleared.

--- Starting HPO for TCL科技 (000100.SZ) with 50 trials ---


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-10-12 18:27:41,266] Trial 0 finished with value: 4.22665386799851 and parameters: {'num_leaves': 25, 'learning_rate': 0.07969454818643935, 'min_child_samples': 47, 'feature_fraction': 0.8394633936788146, 'bagging_fraction': 0.6624074561769746, 'reg_alpha': 0.029375384576328288, 'reg_lambda': 0.014936568554617643}. Best is trial 0 with value: 4.22665386799851.
[I 2025-10-12 18:27:58,040] Trial 1 finished with value: 15.146619080670574 and parameters: {'num_leaves': 45, 'learning_rate': 0.015930522616241012, 'min_child_samples': 46, 'feature_fraction': 0.608233797718321, 'bagging_fraction': 0.9879639408647978, 'reg_alpha': 3.142880890840109, 'reg_lambda': 0.04335281794951567}. Best is trial 1 with value: 15.146619080670574.
[I 2025-10-12 18:28:22,671] Trial 2 finished with value: 7.6532564976273765 and parameters: {'num_leaves': 17, 'learning_rate': 0.002327067708383781, 'min_child_samples': 25, 'feature_fraction': 0.8099025726528951, 'bagging_fraction': 0.7727780074568463, 'reg_

### 2.2 模型训练

In [None]:
FORCE_RETRAIN = True
all_ic_history = []

print("--- Starting Stage 2: Model Training ---\\n")
if config and stocks_to_process:
    models_to_train = global_settings.get('models_to_train', ['lgbm', 'lstm'])
    
    stock_iterator = tqdm(stocks_to_process, desc="Processing Stocks")

    for stock_info in stock_iterator:
        ticker = stock_info.get('ticker')
        keyword = stock_info.get('keyword', ticker)
        
        # 动态更新进度条的描述
        stock_iterator.set_description(f"Processing {keyword}")

        if not ticker: continue
        
        # 1. 从磁盘查找并加载本股票的数据
        data_path = get_processed_data_path(stock_info, config)
        if not data_path.exists():
            print(f"\\nERROR: Processed data for {keyword} not found at {data_path}. Please run Stage 1 first. Skipping.")
            continue
        
        try:
            df_loaded = pd.read_pickle(data_path)
        except Exception as e:
            print(f"\\nERROR: Failed to load data for {keyword}: {e}. Skipping.")
            continue
        
        # 2. 为每个模型类型进行训练
        for model_type in models_to_train:
            # 构建传递给训练函数的配置字典
            run_config = {
                'global_settings': global_settings,
                'strategy_config': strategy_config,
                'default_model_params': default_model_params,
                'stocks_to_process': [stock_info]
            }

            ic_history = run_training_for_ticker(
                df=df_loaded,
                ticker=ticker,
                model_type=model_type,
                config=run_config, 
                force_retrain=FORCE_RETRAIN,
                keyword=keyword
            )
            
            if ic_history is not None and not ic_history.empty:
                all_ic_history.append(ic_history)
else:
    print("ERROR: Config is empty or 'stocks_to_process' list is missing/empty. Cannot start training.")

### 2.3 结果聚合、评估与可视化

In [None]:
print("\n--- Stage 2 Finished: Aggregating and Visualizing Results ---")
if all_ic_history:
    full_ic_df = pd.concat(all_ic_history)
    full_ic_df['ticker_name'] = full_ic_df['ticker'].map({s['ticker']: s.get('keyword', s['ticker']) for s in stocks_to_process})
    
    # 聚合评估结果
    evaluation_summary = full_ic_df.groupby(['ticker_name', 'model_type'])['rank_ic'].agg(['mean', 'std']).reset_index()
    evaluation_summary['icir'] = evaluation_summary['mean'] / evaluation_summary['std']
    
    # --- 1. 打印和显示评估表格 ---
    print("\n--- Model Performance Evaluation Summary ---")
    display(evaluation_summary.style.format({
        'mean': '{:.4f}', 'std': '{:.4f}', 'icir': '{:.4f}'
    }).background_gradient(cmap='viridis', subset=['icir']))

    # --- 2. 绘制 ICIR 对比图 ---
    plt.figure(figsize=(12, 6))
    sns.barplot(data=evaluation_summary, x='ticker_name', y='icir', hue='model_type')
    plt.title('模型信息比率 (ICIR) 对比', fontsize=16)
    plt.xlabel('股票', fontsize=12)
    plt.ylabel('ICIR (信息比率)', fontsize=12)
    plt.axhline(0, color='grey', linestyle='--')
    plt.axhline(0.5, color='red', linestyle='--', label='ICIR=0.5 (良好)')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

    # --- 3. 绘制累积 IC 曲线图 ---
    plot_df = full_ic_df.copy()
    plot_df['date'] = pd.to_datetime(plot_df['date'])
    plot_df.sort_values('date', inplace=True)
    plot_df['cumulative_ic'] = plot_df.groupby(['ticker_name', 'model_type'])['rank_ic'].cumsum()
    
    plt.figure(figsize=(14, 8))
    sns.lineplot(data=plot_df, x='date', y='cumulative_ic', hue='ticker_name', style='model_type', marker='o', markersize=4, linestyle='--')
    plt.title('模型累积 Rank IC 曲线', fontsize=16)
    plt.xlabel('日期', fontsize=12)
    plt.ylabel('累积 Rank IC', fontsize=12)
    plt.legend(title='股票/模型')
    plt.tight_layout()
    plt.show()

else:
    print("\nWARNNING: No IC history was generated during training. Skipping aggregation and evaluation.")