# 股票预测模型工作流

---
### 工作流说明
1.  **阶段零 (Setup)**: 导入库、加载配置。
2.  **阶段一 (Data Pipeline)**: 独立运行。负责处理并保存数据，生成 L2 特征数据缓存。
3.  **阶段二 (Model Pipeline)**: 独立运行。包含三个子步骤：
    - **2.1 HPO**: 自动调参。
    - **2.2 (预处理)**: 智能地加载或生成 L3 预处理数据缓存
    - **2.3 (模型训练)**: 使用 L3 缓存进行高效的模型训练。
    - **2.4 (评估)**: 对训练结果进行聚合与可视化。

## 0. 通用设置与导入

In [1]:
import os, sys, yaml, torch, joblib, numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
from pathlib import Path
from tqdm.autonotebook import tqdm
from sklearn.preprocessing import StandardScaler

os.environ['PYOPENCL_CTX'] = '0'
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

try:
    from data_process.get_data import initialize_apis, shutdown_apis
    from data_process.save_data import run_data_pipeline, get_processed_data_path
    from model_builders.build_models import run_training_for_ticker, _walk_forward_split
    from model_builders.hpo_utils import run_hpo_for_ticker
    from model_builders.model_fuser import ModelFuser
    from model_builders.lstm_builder import LSTMBuilder
    print("INFO: 项目模型导入成功.")
except ImportError as e:
    print(f"WARNNING: 导入失败: {e}. 正在添加项目根目录...")
    project_root = str(Path().resolve()); sys.path.append(project_root) if project_root not in sys.path else None
    from data_process.get_data import initialize_apis, shutdown_apis
    from data_process.save_data import run_data_pipeline, get_processed_data_path
    from model_builders.build_models import run_training_for_ticker, _walk_forward_split
    from model_builders.hpo_utils import run_hpo_for_ticker
    from model_builders.model_fuser import ModelFuser
    from model_builders.lstm_builder import LSTMBuilder
    print("INFO: 导入成功.")

CONFIG_PATH = 'configs/config.yaml'
try:
    with open(CONFIG_PATH, 'r', encoding='utf-8') as f: config = yaml.safe_load(f)
    print(f"SUCCESS: 从 '{CONFIG_PATH}' 加载 Config.")
except FileNotFoundError:
    print(f"ERROR: 未找到 Config."); config = {}

if config:
    global_settings, strategy_config, hpo_config, default_model_params, stocks_to_process = (
        config.get('global_settings', {}), config.get('strategy_config', {}), 
        config.get('hpo_config', {}), config.get('default_model_params', {}), 
        config.get('stocks_to_process', [])
    )

  from tqdm.autonotebook import tqdm


INFO: 项目模型导入成功.
SUCCESS: 从 'configs/config.yaml' 加载 Config.


# **阶段一：数据准备与特征工程**

In [None]:
try:
    if config: initialize_apis(config); run_data_pipeline(config_path=CONFIG_PATH)
    else: print("ERROR: Config 为空.")
finally:
    shutdown_apis()

--- 开始步骤 1: 数据准备 ---

INFO: 尝试登陆 Baostock...
login success!
INFO: Baostock API 登录成功。SDK版本: 00.8.90
INFO: 未在配置中提供有效的 Tushare Token。将跳过宏观数据获取。
开始执行数据管道协调任务...
将使用配置文件: configs/config.yaml
INFO: 特征文件已存在于 data\processed\000001.SZ\None_to_2025-09-30\features_3426a81a0a66.pkl，跳过 平安银行 的数据处理。
INFO: 特征文件已存在于 data\processed\000100.SZ\None_to_2025-09-30\features_a496eedfeef4.pkl，跳过 TCL科技 的数据处理。
INFO: 特征文件已存在于 data\processed\000426.SZ\None_to_2025-09-30\features_7735d66ba2d8.pkl，跳过 兴业矿业 的数据处理。
INFO: 特征文件已存在于 data\processed\002083.SZ\None_to_2025-09-30\features_31355922e534.pkl，跳过 孚日股份 的数据处理。
INFO: 特征文件已存在于 data\processed\000150.SZ\None_to_2025-09-30\features_d3643490c4b0.pkl，跳过 宜华健康 的数据处理。
INFO: 特征文件已存在于 data\processed\300013.SZ\None_to_2025-09-30\features_a496eedfeef4.pkl，跳过 新宁物流 的数据处理。
INFO: 特征文件已存在于 data\processed\300242.SZ\None_to_2025-09-30\features_4f6e73a5f824.pkl，跳过 佳云科技 的数据处理。
INFO: 特征文件已存在于 data\processed\002006.SZ\None_to_2025-09-30\features_665dc9104f40.pkl，跳过 精功科技 的数据处理。
INFO: 特征文件已存在

# **阶段二：模型训练与评估**

### 2.1 数据预加载与全局预处理 (L3 缓存)

In [3]:
FORCE_REPROCESS = True     # 是否重新处理数据

global_data_cache = {}

L3_CACHE_DIR = Path(global_settings.get('output_dir', 'data/processed'))
L3_CACHE_DIR.mkdir(parents=True, exist_ok=True)
L3_CACHE_PATH = L3_CACHE_DIR / "_preprocessed_cache.joblib"

if L3_CACHE_PATH.exists() and not FORCE_REPROCESS:
    print(f"INFO: 已找到 L3 缓存. 正在从 {L3_CACHE_PATH} 加载...")
    try:
        global_data_cache = joblib.load(L3_CACHE_PATH)
        print("SUCCESS: 已将 L3 缓存加载入内存.")
    except Exception as e:
        print(f"WARNNING: 加载 L3 缓存失败: {e}. 将重新预处理数据.")
        global_data_cache = {}

if not global_data_cache:
    print("INFO: 未找到 L3 缓存或为空. 开始重新预处理数据...\n")
    if config and stocks_to_process:
        lstm_builder_for_preprocessing = LSTMBuilder(config)
        
        for stock_info in tqdm(stocks_to_process, desc="Pre-processing Stocks"):
            ticker = stock_info.get('ticker'); keyword = stock_info.get('keyword', ticker)
            if not ticker: continue
            data_path = get_processed_data_path(stock_info, config)
            if not data_path.exists():
                print(f"\nERROR: 未找到 {keyword} 的 L2 数据, 跳过预处理.")
                continue
            
            df = pd.read_pickle(data_path); df.index.name = 'date'
            folds = _walk_forward_split(df, strategy_config)
            if not folds:
                print(f"\nWARNNING: 未为 {keyword} 生成 folds. 跳过预处理.")
                continue

            preprocessed_folds_lgbm, preprocessed_folds_lstm = [], []
            label_col = global_settings.get('label_column', 'label_alpha')
            features_for_model = [c for c in df.columns if c != label_col and not c.startswith('future_')]

            for train_df, val_df in folds:
                X_train_model, y_train = train_df[features_for_model], train_df[label_col]
                X_val_model, y_val = val_df[features_for_model], val_df[label_col]
                scaler_lgbm = StandardScaler()
                X_train_scaled = pd.DataFrame(scaler_lgbm.fit_transform(X_train_model), index=X_train_model.index, columns=features_for_model)
                X_val_scaled = pd.DataFrame(scaler_lgbm.transform(X_val_model), index=X_val_model.index, columns=features_for_model)
                preprocessed_folds_lgbm.append({'X_train_scaled': X_train_scaled, 'y_train': y_train, 'X_val_scaled': X_val_scaled, 'y_val': y_val})

                # --- 实现层级覆盖逻辑 ---
                use_lstm_for_this_stock = stock_info.get('use_lstm') 
                if use_lstm_for_this_stock is None:
                    use_lstm_for_this_stock = global_settings.get('use_lstm_globally', True)
                
                if 'lstm' in global_settings.get('models_to_train', []) and use_lstm_for_this_stock:
                    lstm_seq_len = lstm_builder_for_preprocessing.sequence_length
                    if len(train_df) < lstm_seq_len: continue
                    train_history_for_val = train_df.iloc[-lstm_seq_len:]
                    combined_df_for_lstm_val = pd.concat([train_history_for_val, val_df])
                    
                    scaler_lstm = StandardScaler()
                    train_df_scaled = train_df.copy(); combined_df_for_lstm_val_scaled = combined_df_for_lstm_val.copy()
                    train_df_scaled[features_for_model] = scaler_lstm.fit_transform(train_df[features_for_model])
                    combined_df_for_lstm_val_scaled[features_for_model] = scaler_lstm.transform(combined_df_for_lstm_val[features_for_model])

                    X_train_seq, y_train_seq, _ = lstm_builder_for_preprocessing._create_sequences(train_df_scaled, features_for_model)
                    X_val_seq, y_val_seq, dates_val_seq = lstm_builder_for_preprocessing._create_sequences(combined_df_for_lstm_val_scaled, features_for_model)

                    lstm_precision = default_model_params.get('lstm_params', {}).get('precision', 32)
                    torch_dtype = torch.float16 if lstm_precision == 16 else torch.float32
                    preprocessed_folds_lstm.append({'X_train_tensor': torch.from_numpy(X_train_seq).to(dtype=torch_dtype), 'y_train_tensor': torch.from_numpy(y_train_seq).unsqueeze(1).to(dtype=torch_dtype), 'X_val_tensor': torch.from_numpy(X_val_seq).to(dtype=torch_dtype), 'y_val_tensor': torch.from_numpy(y_val_seq).unsqueeze(1).to(dtype=torch_dtype), 'y_val_seq': y_val_seq, 'dates_val_seq': dates_val_seq})
            
            global_data_cache[ticker] = {'full_df': df, 'lgbm_folds': preprocessed_folds_lgbm, 'lstm_folds': preprocessed_folds_lstm}
            print(f"  - 已为  {keyword} 缓存 {len(preprocessed_folds_lgbm)} 个 LGBM folds 和 {len(preprocessed_folds_lstm)} 个 LSTM folds.")
        
        print(f"\nINFO: 预处理完成. 正在将 L3 缓存保存至 {L3_CACHE_PATH}...")
        try:
            joblib.dump(global_data_cache, L3_CACHE_PATH)
            print("SUCCESS: L3 缓存已保存.")
        except Exception as e:
            print(f"ERROR: 保存 L3 缓存失败: {e}")

INFO: L3 cache not found or is empty. Starting pre-processing...



Pre-processing Stocks:   0%|          | 0/10 [00:00<?, ?it/s]

  - Cached 104 folds for LGBM and 104 folds for LSTM for 平安银行.
  - Cached 98 folds for LGBM and 98 folds for LSTM for TCL科技.
  - Cached 95 folds for LGBM and 95 folds for LSTM for 兴业矿业.
  - Cached 104 folds for LGBM and 104 folds for LSTM for 孚日股份.
  - Cached 76 folds for LGBM and 76 folds for LSTM for 宜华健康.
  - Cached 98 folds for LGBM and 98 folds for LSTM for 新宁物流.
  - Cached 94 folds for LGBM and 94 folds for LSTM for 佳云科技.

ERROR: L2 data for ST南化 not found. Skipping pre-processing.
  - Cached 93 folds for LGBM and 93 folds for LSTM for 精功科技.
  - Cached 94 folds for LGBM and 94 folds for LSTM for 佳云科技.

INFO: Pre-processing finished. Saving L3 cache to data\processed\_preprocessed_cache.joblib...
SUCCESS: L3 cache saved.

--- Stage 2.1 Finished: All data is cached in memory. ---


### 2.2 超参数优化

In [4]:
RUN_HPO = False # 设为 True 以运行优化，False 则跳过

if RUN_HPO and config:
    MODELS_FOR_HPO = ['lgbm', 'lstm']
    
    hpo_tickers = hpo_config.get('tickers_for_hpo', [])
    
    if not hpo_tickers:
        print("INFO: 在配置文件中未指定用于 HPO 的股票，跳过此步骤。")
    elif 'global_data_cache' not in locals() or not global_data_cache:
        print("ERROR: 全局数据缓存 (global_data_cache) 为空。请先成功运行 2.1 预处理单元格。")
    else:
        print(f"--- 开始为模型 {MODELS_FOR_HPO} 和股票 {hpo_tickers} 进行超参数优化 ---\n")
        
        # 循环遍历要优化的每个模型类型
        for model_type_for_hpo in MODELS_FOR_HPO:
            print(f"\n" + "#"*80)
            print(f"# 开始为模型 [{model_type_for_hpo.upper()}] 进行 HPO")
            print("#"*80)
            
            hpo_results_list = []
            
            model_hpo_config = hpo_config.get(f'{model_type_for_hpo}_hpo_config', {})
            num_eval_folds = model_hpo_config.get('hpo_num_eval_folds', hpo_config.get('hpo_num_eval_folds', 2))

            for ticker in hpo_tickers:
                stock_info = next((s for s in stocks_to_process if s['ticker'] == ticker), None)
                if not stock_info:
                    print(f"WARNNING: 未在 'stocks_to_process' 中找到 HPO 股票 {ticker} 的配置。跳过。")
                    continue
                
                keyword = stock_info.get('keyword', ticker)

                use_lstm_for_this_stock = stock_info.get('use_lstm')
                if use_lstm_for_this_stock is None:
                    use_lstm_for_this_stock = global_settings.get('use_lstm_globally', True)
                
                if model_type_for_hpo == 'lstm' and not use_lstm_for_this_stock:
                    print(f"\nINFO: {keyword} 已配置为不使用 LSTM，跳过 LSTM 的 HPO。")
                    continue

                if ticker not in global_data_cache:
                    print(f"ERROR: 预处理数据缓存中未找到 {keyword} 的数据。跳过。")
                    continue

                all_preprocessed_folds = global_data_cache[ticker].get(f'{model_type_for_hpo}_folds', [])
                if not all_preprocessed_folds:
                    print(f"WARNNING: 缓存中未找到 {keyword} 的 '{model_type_for_hpo}' 预处理数据。跳过 HPO。")
                    continue
                
                hpo_folds_data = all_preprocessed_folds[-num_eval_folds:]
                
                print(f"\nINFO: 已为 {keyword} 加载最后 {len(hpo_folds_data)} 个 folds 用于 {model_type_for_hpo.upper()} HPO。")

                hpo_run_config = {
                    'global_settings': global_settings, 'strategy_config': strategy_config,
                    'default_model_params': default_model_params, 'stocks_to_process': [stock_info],
                    'hpo_config': hpo_config
                }
                
                best_params, best_value = run_hpo_for_ticker(
                    preprocessed_folds=hpo_folds_data,
                    ticker=ticker,
                    config=hpo_run_config,
                    model_type=model_type_for_hpo
                )
                
                if best_params and best_value is not None:
                    hpo_results_list.append({'ticker': ticker, 'keyword': keyword, 'best_score': best_value, **best_params})
            
            if hpo_results_list:
                hpo_log_dir = Path("hpo_logs"); hpo_log_dir.mkdir(exist_ok=True)
                hpo_best_results_path = hpo_log_dir / f"hpo_best_results_{model_type_for_hpo}.csv"
                
                current_hpo_df = pd.DataFrame(hpo_results_list).set_index('ticker')

                if hpo_best_results_path.exists():
                    print(f"\nINFO: 正在加载 [{model_type_for_hpo.upper()}] 的历史最佳 HPO 结果...")
                    historical_best_df = pd.read_csv(hpo_best_results_path).set_index('ticker')
                    
                    for ticker, current_row in current_hpo_df.iterrows():
                        if ticker not in historical_best_df.index or current_row['best_score'] > historical_best_df.loc[ticker, 'best_score']:
                            keyword = current_row.get('keyword', ticker)
                            historical_score = historical_best_df.loc[ticker, 'best_score'] if ticker in historical_best_df.index else 'N/A'
                            print(f"  - 新纪录! [{model_type_for_hpo.upper()}] {keyword} 的最佳分数从 {historical_score if isinstance(historical_score, str) else f'{historical_score:.4f}'} 提升至 {current_row['best_score']:.4f}.")
                            historical_best_df.loc[ticker] = current_row
                    final_best_df = historical_best_df
                else:
                    print(f"\nINFO: 未找到 [{model_type_for_hpo.upper()}] 的历史 HPO 结果，将本次结果作为初始最佳记录。")
                    final_best_df = current_hpo_df

                final_best_df.to_csv(hpo_best_results_path)
                print(f"SUCCESS: 最新的 [{model_type_for_hpo.upper()}] HPO 冠军榜已保存至 {hpo_best_results_path}")
                
                PARAM_MAP_CN = {'best_score': '最佳分数 (ICIR)', 'keyword': '股票名称', 'num_leaves': '叶子节点数', 'learning_rate': '学习率', 'min_child_samples': '叶节点最小样本数', 'feature_fraction': '特征采样比例', 'bagging_fraction': '数据采样比例', 'reg_alpha': 'L1正则化', 'reg_lambda': 'L2正则化', 'units_1': '隐藏层1单元数', 'units_2': '隐藏层2单元数', 'dropout': 'Dropout率'}
                display_df = final_best_df.reset_index().rename(columns=PARAM_MAP_CN)
                if '股票名称' in display_df.columns: display_df = display_df.set_index(['ticker', '股票名称'])
                
                print("\n" + "="*80)
                print(f"--- {model_type_for_hpo.upper()} HPO 最佳参数冠军榜 ---")
                display(display_df.style.format({'最佳分数 (ICIR)': '{:.4f}'}).background_gradient(cmap='viridis', subset=['最佳分数 (ICIR)']))
                
                param_cols_original = [c for c in hpo_results_list[0].keys() if c not in ['ticker', 'keyword', 'best_score']]
                final_hpo_params = final_best_df[param_cols_original].mean().to_dict()
                average_best_score = final_best_df['best_score'].mean()
                
                for p in ['num_leaves', 'min_child_samples', 'units_1', 'units_2']:
                    if p in final_hpo_params: final_hpo_params[p] = int(round(final_hpo_params[p]))
                
                param_key = f"{model_type_for_hpo}_params"
                config['default_model_params'][param_key].update(final_hpo_params)
                default_model_params[param_key] = config['default_model_params'][param_key]
                
                print(f"--- {model_type_for_hpo.upper()} HPO 综合结果 ---")
                print(f"本轮 HPO 冠军榜平均最高分 (ICIR): {average_best_score:.4f}")
                print(f"将用于后续训练的【{model_type_for_hpo.upper()} 平均参数】如下:")
                print(yaml.dump(default_model_params[param_key], allow_unicode=True))

else:
    print("INFO: 跳过 HPO 步骤。")

INFO: 跳过 HPO 步骤。


### 2.3 模型训练

In [None]:
# 2.3 模型训练

FORCE_RETRAIN = False # 是否重新训练模型
all_ic_history = []

print(f"INFO: 强制重新训练基础模型设置为：{FORCE_RETRAIN}")

if config and stocks_to_process:
    models_to_train = global_settings.get('models_to_train', ['lgbm', 'lstm'])
    stock_iterator = tqdm(stocks_to_process, desc="Processing Stocks")

    for stock_info in stock_iterator:
        ticker = stock_info.get('ticker')
        if not ticker or ticker not in global_data_cache:
            continue
        
        keyword = stock_info.get('keyword', ticker)
        stock_iterator.set_description(f"Processing {keyword}")
        
        cached_stock_data = global_data_cache[ticker]
        full_df = cached_stock_data['full_df']
        
        for model_type in models_to_train:
            use_lstm_for_this_stock = stock_info.get('use_lstm')
            if use_lstm_for_this_stock is None:
                use_lstm_for_this_stock = global_settings.get('use_lstm_globally', True)
            if model_type == 'lstm' and not use_lstm_for_this_stock:
                print(f"INFO: {keyword} 已配置为不使用 LSTM, 跳过.")
                continue

            model_dir = Path(global_settings.get('model_dir', 'models')) / ticker
            ic_history_path = model_dir / f"{model_type}_ic_history.csv"
            # 我们也检查模型文件本身
            file_suffixes = {'lgbm': '.pkl', 'lstm': '.pt'}
            model_files = list(model_dir.glob(f"{model_type}_model_*{file_suffixes[model_type]}"))

            if ic_history_path.exists() and model_files and not FORCE_RETRAIN:
                print(f"\nINFO: 已为 {keyword} [{model_type.upper()}] 找到现有模型和 IC 历史记录. 跳过训练.")
                try:
                    ic_history = pd.read_csv(ic_history_path, index_col='date', parse_dates=True)
                    all_ic_history.append(ic_history)
                except Exception as e:
                    print(f"  - WARNNING: 无法加载现有的 IC 历史记录: {e}")
                continue

            folds_key = f"{model_type}_folds"
            preprocessed_folds = cached_stock_data.get(folds_key)
            if not preprocessed_folds:
                print(f"\nWARNNING: 未找到 {keyword} 的 '{model_type}' 的预处理 folds. 跳过.")
                continue

            run_config = {
                'global_settings': global_settings, 
                'strategy_config': strategy_config,
                'default_model_params': default_model_params, 
                'stocks_to_process': [stock_info],
                'full_df_for_final_model': full_df
            }

            ic_history = run_training_for_ticker(
                preprocessed_folds=preprocessed_folds,
                ticker=ticker,
                model_type=model_type,
                config=run_config, 
                force_retrain=FORCE_RETRAIN,
                keyword=keyword
            )
            
            if ic_history is not None and not ic_history.empty:
                all_ic_history.append(ic_history)
else:
    print("ERROR: Config 或 stocks_to_process 为空.")

INFO: 强制重新训练基础模型设置为：False


Processing Stocks:   0%|          | 0/10 [00:00<?, ?it/s]


--- Starting LGBM training for 平安银行 (000001.SZ) ---
INFO: 开始对 平安银行 进行跨 104 folds 的前向验证...


正在 平安银行 上训练 LGBM :   0%|          | 0/104 [00:00<?, ?it/s]

### 2.3.5 融合模型训练

In [None]:
FORCE_FUSER_RETRAIN = True

if config and stocks_to_process:
    fuser_iterator = tqdm(stocks_to_process, desc="Training Fusers")
    for stock_info in fuser_iterator:
        ticker = stock_info.get('ticker')
        keyword = stock_info.get('keyword', ticker)
        fuser_iterator.set_description(f"Training Fuser for {keyword}")
        if not ticker: continue

        run_config = {
            'global_settings': global_settings, 
            'strategy_config': strategy_config,
            'default_model_params': default_model_params,
            'stocks_to_process': [stock_info]
        }
        fuser = ModelFuser(ticker, run_config)
        
        # 如果不强制重训，并且 fuser_meta.json 文件已存在，则跳过
        if not FORCE_FUSER_RETRAIN and fuser.meta_path.exists():
            print(f"INFO: Fusion model meta for {keyword} already exists. Skipping training.")
            continue

        print(f"\n--- 正在为 {keyword} ({ticker}) 训练融合元模型... ---")
        fuser.train()

### 2.4 结果聚合、评估与可视化

In [None]:
print("\\n--- Stage 2.4: Aggregating, Fusing, and Visualizing Results ---\n")

if all_ic_history:
    # 1. 准备基础数据，并确保对于每个 (ticker, model_type) 组合，日期是唯一的
    full_ic_df = pd.concat(all_ic_history).drop_duplicates(subset=['ticker', 'model_type', 'date'], keep='last')
    full_ic_df['ticker_name'] = full_ic_df['ticker'].map({s['ticker']: s.get('keyword', s['ticker']) for s in stocks_to_process})
    
    # 2. 模拟融合模型的表现
    fusion_ic_list = []
    for ticker, group_df in full_ic_df.groupby('ticker'):
        try:
            pivot_df = group_df.pivot(index='date', columns='model_type', values='rank_ic')
            if 'lgbm' not in pivot_df.columns or 'lstm' not in pivot_df.columns: continue
            
            pivot_df.dropna(inplace=True)
            if len(pivot_df) < 2: continue # 至少需要2个重叠点才能有意义
            
            span = strategy_config.get('fusion_ic_span', 120)
            rolling_ic_lgbm = abs(pivot_df['lgbm']).ewm(span=span, adjust=False).mean()
            rolling_ic_lstm = abs(pivot_df['lstm']).ewm(span=span, adjust=False).mean()
            total_rolling_ic = rolling_ic_lgbm + rolling_ic_lstm
            w_lgbm = (rolling_ic_lgbm / total_rolling_ic).fillna(0.5)
            w_lstm = 1 - w_lgbm
            
            pivot_df['fusion'] = (pivot_df['lgbm'] * w_lgbm) + (pivot_df['lstm'] * w_lstm)
            
            fusion_ic_stock_df = pivot_df[['fusion']].rename(columns={'fusion': 'rank_ic'}).reset_index()
            fusion_ic_stock_df['ticker'] = ticker
            fusion_ic_stock_df['model_type'] = 'FUSION'
            fusion_ic_stock_df['ticker_name'] = group_df['ticker_name'].iloc[0]
            fusion_ic_list.append(fusion_ic_stock_df)
        except Exception as e:
            print(f"WARNNING: 为 {ticker} 计算融合模型 IC 时出错: {e}")

    # 3. 合并所有数据
    final_eval_df = pd.concat([full_ic_df] + fusion_ic_list, ignore_index=True) if fusion_ic_list else full_ic_df
    
    # --- 4. 聚合与 ICIR 计算 ---
    def safe_std(x):
        return x.std(ddof=0) if len(x) > 1 else 0.0

    evaluation_summary = final_eval_df.groupby(['ticker_name', 'model_type'])['rank_ic'].agg(
        mean='mean',
        std=safe_std
    ).reset_index()

    # 只有在 std > 0 时才计算 icir，否则为 0 或由均值决定
    evaluation_summary['icir'] = np.where(
        evaluation_summary['std'] > 1e-8, 
        evaluation_summary['mean'] / evaluation_summary['std'], 
        evaluation_summary['mean'] * 100 # 如果 std=0, 说明表现极度稳定, 给一个由均值决定的高分
    )
    
    # --- 5. 可视化 ---
    print("\n--- ICIR 对比图 (缩放至合理范围) ---")
    
    # 【1. 定义更专业的颜色】
    # 使用一套更现代、区分度更高的颜色
    custom_palette = {
        "lgbm": "#49b6ff",
        "lstm": "#ffa915",
        "FUSION": "#2ecc71"
    }

    # 【2. 创建画布并绘制核心图表】
    # 增大 figsize 的宽度，让每个股票的柱子有更多空间
    fig, ax = plt.subplots(figsize=(20, 10))
    sns.barplot(
        data=evaluation_summary, 
        x='ticker_name', 
        y='icir', 
        hue='model_type',
        palette=custom_palette, # 使用自定义颜色
        ax=ax
    )

    # 【3. 为每个柱子添加精确的数值标签】
    for p in ax.patches:
        height = p.get_height()
        if np.isnan(height): continue # 跳过 NaN 值
        
        # 根据柱子的高度，决定标签的位置（在柱顶上方或柱底下方）
        y_offset = 0.03 * (2 - (-2)) # 动态计算偏移量，为 Y 轴范围的 3%
        y_pos = height + y_offset if height >= 0 else height - y_offset
        va = 'bottom' if height >= 0 else 'top'
        
        ax.text(
            p.get_x() + p.get_width() / 2., # X 轴位置：柱子中心
            y_pos,                          # Y 轴位置：在柱顶/底的上方/下方
            f'{height:.2f}',                # 标签文本：格式化为两位小数
            ha='center',                    # 水平居中
            va=va,                          # 垂直对齐
            fontsize=10,
            color='dimgray',
            fontweight='semibold'
        )

    # 【4. 优化图表美学细节】
    REASONABLE_ICIR_RANGE = [-2.0, 2.0]
    ax.set_ylim(REASONABLE_ICIR_RANGE)
    ax.set_title(f'模型信息比率 (ICIR) 对比 - 缩放视图 (Y轴范围: {REASONABLE_ICIR_RANGE})', fontsize=20, fontweight='bold', pad=20)
    ax.set_xlabel('股票', fontsize=14, fontweight='bold')
    ax.set_ylabel('ICIR (信息比率)', fontsize=14, fontweight='bold')
    
    # 优化网格线
    ax.grid(axis='y', linestyle='--', alpha=0.7)
    ax.grid(axis='x', linestyle='', alpha=0) # 关闭垂直网格线
    
    # 优化坐标轴标签
    ax.tick_params(axis='x', rotation=45, labelsize=12)
    ax.tick_params(axis='y', labelsize=12)
    
    # 移除顶部和右侧的边框
    sns.despine(ax=ax)
    
    # 优化图例
    ax.legend(title='模型类型', fontsize=12, title_fontsize=13, loc='upper left')
    
    # 添加 ICIR=0.5 的参考线
    ax.axhline(0.5, color='red', linestyle='--', label='ICIR=0.5 (良好)')
    
    # 重新绘制图例以包含参考线
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles=handles, labels=labels, title='模型类型', fontsize=12, title_fontsize=13, loc='upper left')

    plt.tight_layout()
    plt.show()

    
    # 绘制累积 IC 曲线图，使用 final_eval_df
    plot_df = final_eval_df.copy()
    plot_df['date'] = pd.to_datetime(plot_df['date'])
    plot_df.sort_values('date', inplace=True)
    # 确保 groupby 的列存在
    if 'ticker_name' in plot_df.columns and 'model_type' in plot_df.columns:
        plot_df['cumulative_ic'] = plot_df.groupby(['ticker_name', 'model_type'])['rank_ic'].cumsum()
        
        plt.figure(figsize=(16, 9))
        sns.lineplot(data=plot_df, x='date', y='cumulative_ic', hue='ticker_name', style='model_type', marker='o', markersize=4, linestyle='--')
        plt.title('模型累积 Rank IC 曲线 (含融合模型)', fontsize=16)
        plt.xlabel('日期', fontsize=12); plt.ylabel('累积 Rank IC', fontsize=12)
        plt.legend(title='股票/模型'); plt.tight_layout(); plt.show()

else:
    print("\nWARNNING: 训练期间未生成 IC 历史。跳过汇总和评估。")