In [None]:
# run_training.py
import torch
from data import load_market_data
from combination import AlphaCombinationModel
from tokenizer import AlphaTokenizer
from envs import AlphaGenerationEnv
from generator import RLAlphaGenerator

# 1. 读取并注入行情
df = load_market_data("data/rb_20250606_primary.csv")
combo = AlphaCombinationModel(max_pool_size=50)
combo.inject_data(df, target_col="target")          # target 列是未来 10 s 的收益率

# 2. 初始化分词器 & 环境
tokenizer = AlphaTokenizer()
env = AlphaGenerationEnv(combo, tokenizer, max_len=20)

# 3. 配置 PPO
cfg = dict(
    vocab_size=tokenizer.vocab_size,
    hidden_dim=128,
    batch_size=256,          # 每轮收集多少 step
    update_epochs=4,
    gamma=0.99,
    clip_eps=0.2,
    entropy_coef=0.01,
    value_coef=0.5,
    device="cuda" if torch.cuda.is_available() else "cpu",
)

# 4. 训练
agent = RLAlphaGenerator(env, cfg)
agent.train(num_iterations=500)

# 5. 查看结果
print("已发现 alpha 数量:", len(combo.expr_list))
print("最新组合 IC:", combo.score())
for expr, w in zip(combo.expr_list, combo.weights):
    print(f"{w:+.3f} × {expr}")
