In [1]:
import os
import yaml
import pandas as pd

from utility import set_random_seed
from data import load_symbol_dfs
from combination import AlphaCombinationModel
from tokenizer import AlphaTokenizer
from alpha_generation_env import AlphaGenerationEnv
from generator import RLAlphaGenerator
from glob import glob
import warnings
from datetime import datetime
warnings.filterwarnings("ignore")

config_path = "configs/config.yaml"
config_path = "configs/trial_config.yaml"

with open(config_path, "r", encoding="utf-8") as f:
    cfg = yaml.safe_load(f)

now = datetime.now().strftime("%Y%m%d_%H%M%S")

orig_path = cfg["output"]["alphas_weights_path"]
base, ext = os.path.splitext(orig_path)
cfg["output"]["alphas_weights_path"] = f"{base}_{now}{ext}"

set_random_seed(cfg.get("random_seed", 42))

gen_cfg = cfg["generator"]
model_cfg = cfg["model"]
data_cfg = cfg["data"]
envs_cfg = cfg["env"]

In [2]:
# 加载数据

if True:  # 拆包
    n = data_cfg["n"]

    symbol_dict = data_cfg["symbol"]
    path = data_cfg["path"]

    start_date, end_date = data_cfg["date_range"][0], data_cfg["date_range"][1]

data_dfs = load_symbol_dfs(
    directory=path,
    symbols=symbol_dict,
    start_date=start_date,
    end_date=end_date,
    n=n,
    base_fields=envs_cfg["base_fields"],
)

data = pd.concat(data_dfs["rb"])

In [3]:
model_cfg["combiner_kwargs"]

{'alpha': 0.3, 'l1_ratio': 0.7, 'max_iter': 5000}

In [4]:
def reload_components():
    import importlib
    """
    重新加载以下模块，以便在开发过程中即时生效：
      - data.load_market_data
      - tokenizer.AlphaTokenizer
      - combination.AlphaCombinationModel
      - envs.AlphaGenerationEnv
      - generator.RLAlphaGenerator
    """
    import data, tokenizer, combination, alpha_generation_env, generator

    importlib.reload(data)
    importlib.reload(tokenizer)
    importlib.reload(combination)
    importlib.reload(alpha_generation_env)
    importlib.reload(generator)

    # 重新绑定到本地名称（可选）
    from tokenizer import AlphaTokenizer
    from combination import AlphaCombinationModel
    from alpha_generation_env import AlphaGenerationEnv
    from generator import RLAlphaGenerator

    return {
        "AlphaTokenizer": AlphaTokenizer,
        "AlphaCombinationModel": AlphaCombinationModel,
        "AlphaGenerationEnv": AlphaGenerationEnv,
        "RLAlphaGenerator": RLAlphaGenerator,
    }

components = reload_components()


In [5]:
# 模块初始化
combo = AlphaCombinationModel(
    max_pool_size=model_cfg["max_pool_size"],
    combiner=model_cfg["combiner"],
    combiner_kwargs=model_cfg["combiner_kwargs"],
)
combo.inject_data(data, target_col=data_cfg["target_col"])

tokenizer = AlphaTokenizer(base_fields=envs_cfg["base_fields"])
env = AlphaGenerationEnv(
    combo_model=combo, tokenizer=tokenizer, max_len=cfg["env"]["max_len"]
)

gen_cfg["vocab_size"] = tokenizer.vocab_size
gen_cfg["max_seq_len"] = cfg["env"]["max_len"]
agent = RLAlphaGenerator(env=env, config=gen_cfg)

print(f"Starting training for {gen_cfg['num_iterations']} iterations...")
agent.train(num_iterations=gen_cfg["num_iterations"])

out_cfg = cfg["output"]
os.makedirs(os.path.dirname(out_cfg["alphas_weights_path"]), exist_ok=True)
results = pd.DataFrame(
    {"expr": combo.expr_list, "ic": combo.ic_list, "weight": combo.weights}
)
results.to_csv(out_cfg["alphas_weights_path"], index=False)
print(f"Saved discovered alphas and weights to {out_cfg['alphas_weights_path']}")

Starting training for 300 iterations...
[Iter 0001]  AvgReturn=+0.0190   ComboIC=+0.1390
[Iter 0002]  AvgReturn=-0.0062   ComboIC=+0.0834
[Iter 0003]  AvgReturn=-0.0011   ComboIC=-0.0653
[Iter 0004]  AvgReturn=+0.0107   ComboIC=-0.0329
[Iter 0005]  AvgReturn=-0.0040   ComboIC=-0.0326
[Iter 0006]  AvgReturn=+0.0078   ComboIC=-0.0040
[Iter 0007]  AvgReturn=+0.0109   ComboIC=-0.0437
[Iter 0008]  AvgReturn=-0.0077   ComboIC=-0.0998
[Iter 0009]  AvgReturn=+0.0056   ComboIC=-0.1288
[Iter 0010]  AvgReturn=+0.0148   ComboIC=-0.1071
[Iter 0011]  AvgReturn=+0.0056   ComboIC=-0.1423


KeyboardInterrupt: 