# 模型增量更新与每日预测引擎 (批量版)

---

### **目标**
本 Notebook 是项目的**每日自动化运行脚本**。它会遍历配置文件中定义的所有股票，为每一只成功训练过模型的股票执行在线学习（热更新），并生成新的交易信号。

### **工作流程**
1.  **环境设置**: 导入库，加载配置。
2.  **主循环 (遍历股票)**: 对股票池中的每一只股票，执行以下步骤：
    a. **加载模型**: 加载该股票的基础模型 (LGBM, LSTM) 和融合模型 (Fuser)。如果任何模型缺失，则跳过该股票。
    b. **获取增量数据**: 智能地获取从上次批量训练至今的所有新数据。
    c. **执行增量训练**: 批量生成“历史预测”与“真实标签”，对融合模型进行 `partial_train`。
    d. **生成今日新预测**: 使用更新后的融合模型，生成用于次日交易的决策建议。

## 1. 环境设置与导入

In [None]:
import sys
import json
import yaml
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import torch
import joblib
from tqdm.autonotebook import tqdm
from sklearn.preprocessing import StandardScaler
from utils.config_utils import load_and_merge_configs_for_notebook

# --- 1. 环境与路径设置 ---
project_root = str(Path().resolve())
if project_root not in sys.path:
    print(f"将项目根目录添加到 sys.path: {project_root}")
    sys.path.append(project_root)

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# --- 2.  加载配置和所有模块 ---
# a. 加载配置
config = load_and_merge_configs_for_notebook()

# b. 手动加载所有模块
print("--- 正在加载所有项目模块... ---")
try:
    from main_train import run_periodic_retraining_workflow
    from data_process.get_data import initialize_apis, shutdown_apis, get_full_feature_df, get_latest_global_data
    from data_process.save_data import run_data_pipeline, get_processed_data_path
    from model.build_models import run_training_for_ticker
    from utils.hpo_utils import run_hpo_for_ticker
    from model.builders.model_fuser import ModelFuser
    from model.builders.lgbm_builder import LGBMBuilder
    from model.builders.lstm_builder import LSTMBuilder, LSTMModel
    from model.builders.tabtransformer_builder import TabTransformerBuilder, TabTransformerModel
    from risk_management.risk_manager import RiskManager
    from utils.date_utils import resolve_data_pipeline_dates
    from utils.encoding_utils import encode_categorical_features
    from utils.file_utils import find_latest_artifact_paths
    from utils.ml_utils import walk_forward_split
    print("INFO: 项目模块导入成功。")
except ImportError as e:
    raise ImportError(f"模块导入失败，请确保 Notebook 的运行目录在项目根目录。错误: {e}")

# c. 构建 modules 字典
modules = {
    'initialize_apis': initialize_apis, 'shutdown_apis': shutdown_apis,
    'get_full_feature_df': get_full_feature_df, 'get_latest_global_data': get_latest_global_data,
    'run_data_pipeline': run_data_pipeline, 'get_processed_data_path': get_processed_data_path,
    'run_training_for_ticker': run_training_for_ticker, 'run_hpo_for_ticker': run_hpo_for_ticker,
    'ModelFuser': ModelFuser, 'LGBMBuilder': LGBMBuilder, 'LSTMBuilder': LSTMBuilder, 'LSTMModel': LSTMModel,
    'TabTransformerBuilder': TabTransformerBuilder, 'TabTransformerModel': TabTransformerModel,
    'RiskManager': RiskManager,
    'resolve_data_pipeline_dates': resolve_data_pipeline_dates,
    'encode_categorical_features': encode_categorical_features,
    'find_latest_artifact_paths': find_latest_artifact_paths,
    'walk_forward_split': walk_forward_split,
    'pd': pd, 'torch': torch, 'joblib': joblib, 'tqdm': tqdm, 'StandardScaler': StandardScaler, 'Path': Path, 'yaml': yaml, 'json': json
}

# 更新流程

In [None]:
if config and modules:
    print("=== 准备启动：周期性自动化再训练 ===")
    print(f"=== 将会为配置文件中的 {len(config.get('stocks_to_process', []))} 只股票重新生成所有模型 ===")
    try:
        run_periodic_retraining_workflow(config, modules)

        print("=== 自动化再训练工作流成功执行完毕！ ===")
    except Exception as e:
        print(f"\n--- ERROR: 在执行周期性再训练时发生严重错误 ---")
        print(e)
        import traceback
        traceback.print_exc()