In [1]:

import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
instruments = 'csi300'

In [2]:
import json
from collections import Counter
from alphagen.data.expression import *
from alphagen.models.alpha_pool import AlphaPool
from alphagen.utils.correlation import batch_pearsonr, batch_spearmanr
from alphagen_generic.features import *
from gan.utils.data import get_data_by_year


def pred_pool(capacity,data,cache):
    from alphagen_qlib.calculator import QLibStockDataCalculator
    pool = AlphaPool(capacity=capacity,
                    stock_data=data,
                    target=target,
                    ic_lower_bound=None)
    exprs = []
    for key in dict(Counter(cache).most_common(capacity)):
        exprs.append(eval(key))
    pool.force_load_exprs(exprs)
    pool._optimize(alpha=5e-3, lr=5e-4, n_iter=2000)

    exprs = pool.exprs[:pool.size]
    weights = pool.weights[:pool.size]
    calculator_test = QLibStockDataCalculator(data, target)
    ensemble_value = calculator_test.make_ensemble_alpha(exprs, weights)
    return ensemble_value



Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


# Infer

In [3]:
# for seed in range(5):
for seed in range(1):
    for train_end in range(2021,2024):
        for num in [1,10,20,50]:
            save_dir = f'out_gp/{instruments}_{train_end}_day_{seed}' 
            print(save_dir)
            
            returned = get_data_by_year(
                train_start = 2011,train_end=train_end,valid_year=train_end+1,test_year =train_end+2,
                instruments=instruments, target=target,freq='day',
            )
            data_all,data,data_valid,data_valid_withhead,data_test,data_test_withhead,name = returned

            cache = json.load(open(f'{save_dir}/40.json'))['cache']

            features = ['open_', 'close', 'high', 'low', 'volume', 'vwap']
            constants = [f'Constant({v})' for v in [-30., -10., -5., -2., -1., -0.5, -0.01, 0.01, 0.5, 1., 2., 5., 10., 30.]]
            terminals = features + constants

            pred = pred_pool(num,data_all,cache=cache)
            pred = pred[-data_test.n_days:]
            torch.save(pred.detach().cpu(),f"{save_dir}/pred_{num}.pt")
            


out_gp/csi300_2021_day_0
Data not exist, load from qlib


[4818:MainThread](2025-12-12 11:21:51,933) INFO - qlib.Initialization - [config.py:452] - default_conf: client.
[4818:MainThread](2025-12-12 11:21:52,443) INFO - qlib.Initialization - [__init__.py:82] - qlib successfully initialized based on client settings.
[4818:MainThread](2025-12-12 11:21:52,445) INFO - qlib.Initialization - [__init__.py:84] - data_path={'day': PosixPath('/root/autodl-tmp/qlib_data/cn_data_202512')}


real_start_time: 2010-08-04 00:00:00
real_end_time: 2022-02-21 00:00:00
real_start_time: 2021-08-05 00:00:00
real_end_time: 2023-02-20 00:00:00
real_start_time: 2019-08-06 00:00:00
real_end_time: 2023-02-20 00:00:00
real_start_time: 2022-08-05 00:00:00
real_end_time: 2024-02-20 00:00:00
real_start_time: 2020-08-06 00:00:00
real_end_time: 2024-02-20 00:00:00
real_start_time: 2010-08-04 00:00:00
real_end_time: 2024-02-20 00:00:00
out_gp/csi300_2021_day_0
out_gp/csi300_2021_day_0
out_gp/csi300_2021_day_0
out_gp/csi300_2022_day_0
Data not exist, load from qlib
real_start_time: 2010-08-04 00:00:00
real_end_time: 2023-02-20 00:00:00
real_start_time: 2022-08-05 00:00:00
real_end_time: 2024-02-20 00:00:00
real_start_time: 2020-08-06 00:00:00
real_end_time: 2024-02-20 00:00:00
real_start_time: 2023-08-04 00:00:00
real_end_time: 2025-02-20 00:00:00
real_start_time: 2021-08-05 00:00:00
real_end_time: 2025-02-20 00:00:00
real_start_time: 2010-08-04 00:00:00
real_end_time: 2025-02-20 00:00:00
out_g

# Read and combine result to show

In [None]:
result = []
for num in [1]:
    # for seed in range(5):
    for seed in range(1):
        cur_seed_ic = []
        cur_seed_ric = []
        for train_end in range(2021,2024):
                save_dir = f'out_gp/{instruments}_{train_end}_day_{seed}' 

                returned = get_data_by_year(
                    train_start = 2011,train_end=train_end,valid_year=train_end+1, test_year=train_end+2,
                    instruments=instruments, target=target,freq='day',
                )
                data_all,data,data_valid,data_valid_withhead,data_test,data_test_withhead,name = returned

                pred = torch.load(f"{save_dir}/pred_{num}.pt").to('cuda:0')
                
                # tgt = target.evaluate(data_test)
                tgt = target.evaluate(data_all)[-data_test.n_days:,:]

                ic_s = torch.nan_to_num(batch_pearsonr(pred,tgt),nan=0)
                rank_ic_s = torch.nan_to_num(batch_spearmanr(pred,tgt),nan=0)

                cur_seed_ic.append(ic_s)
                cur_seed_ric.append(rank_ic_s)
        
        ic = torch.cat(cur_seed_ic)
        rank_ic = torch.cat(cur_seed_ric)

        ic_mean = ic.mean().item()
        rank_ic_mean = rank_ic.mean().item()
        ic_std = ic.std().item()
        rank_ic_std = rank_ic.std().item()
        tmp = dict(
            seed = seed,
            num = num,
            ic = ic_mean,
            ric = rank_ic_mean,
            icir = ic_mean/ic_std,
            ricir = rank_ic_mean/rank_ic_std,
        )
        result.append(tmp)
import pandas as pd
print(pd.DataFrame(result).groupby(['num','seed']).mean().groupby('num').agg(['mean','std']))

[{'seed': 0, 'num': 1, 'ic': 0.004189000930637121, 'ric': 0.053484391421079636, 'icir': 0.04278258347401724, 'ricir': 0.34536648024986094}]
