In [1]:
import pandas as pd
import numpy as np
from itertools import combinations
from random import choice, sample
from joblib import Parallel, delayed
import GPminer as GPm
import FreeBack as FB
import time, datetime, os, re, shutil

# 数据

In [2]:
start = '2024-1-2'
end = '2024-8-30'

market = pd.read_csv('./market.csv.xz', parse_dates=['date']).set_index(['date', 'code']).loc[start:end] # 市场数据
benchmark = pd.read_csv('./benchmark.csv.xz', parse_dates=['date']).set_index(['date']).loc[start:end] # 指数数据

# 评级转化为连续因子
market['rating'] = market['rating'].fillna(market['rating'].mode().iloc[0]) # 众数填充
replace_dict = {'AAA': 0, 'AA+': 1, 'AA': 2, 'AA-': 3, 'A+': 4, 'A': 5, 'A-': 6, 'BBB+': 7, 'BBB': 8, 'BBB-': 9, 'BB+': 10, 'BB': 11, 'BB-': 12, 'B+': 13, 'B': 14, 'B-': 15, 'CCC': 16, 'CC': 17, 'C':18}
market['rating'] = market['rating'].replace(replace_dict)

  market['rating'] = market['rating'].replace(replace_dict)


# 遗传算法参数

In [3]:
fitness = 'sharpe' # 适应度目标
population_size = 10 # 种群规模
evolution_ratio = 0.2 # 每一代选择比例
prob_dict = {} # 全部变异算子等概率运行
select_alg = 'cut' # 子代选择方法 截断 cut  锦标赛 tournament
n_core = 4  # 并行计算核数，1代表串行
max_g = 30 # 最大进化代数，操作此代数结束进化
tolerance_g = 3 # 最大允许适应度不提升代数，适应多连续不提升超过此代数结束进化
n_batch = 3  # 进行n_batch组初始种群的进化

# 策略参数

## Score打分因子挖掘参数，以平衡性转债为例

In [4]:
##### 策略超参数 #####
# 所有策略共用一个选股池平衡性转债
share = GPm.ind.Pool(";list_days>1800|close<100|close>135|is_call=公告实施强赎,公告提示强赎,公告到期赎回")
#GPm.ind.Score.max_exp_len = 10  # 生成的策略最大因子数量（默认10因子）
#GPm.ind.Score.max_mul = 50 # 因子最大系数（默认50）
# 因子库
score_basket = ['close', 'conv_prem', 'bond_prem', 'mod_conv_prem', 'dblow',\
                 'list_days', 'volatility', 'volatility_stk', 'remain_size', 'pe_ttm', 'pb']
pool_basket = None
# 给出一个初始种子在此基础上变异生成初始种子,如果没有初始score的话则随机生成种子
#p0 = GPm.ind.Score("1*True*volatility+1*False*dblow")
p0 = None
hold_num = 5  # 持仓数量
price = 'close' # 结算价格
comm = 10/1e4 # 单边交易成本
max_extract = 1  # 最多排除100%标的

## Pool排除因子挖掘参数，以转股溢价率为例

In [14]:
##### 策略超参数 #####
# 所有策略共用一个转股溢价率
share = GPm.ind.Score("1*False*conv_prem")
# 因子库
score_basket = None 
pool_basket = ['close', 'list_days', 'remain_size', 'pb']
# 给出一个初始种子在此基础上变异生成初始种子,如果没有初始score的话则随机生成种子
#p0 = GPm.ind.Pool(";list_days>1800|close<100|close>135|is_call=公告实施强赎,公告提示强赎,公告到期赎回")
p0 = None
hold_num = 5  # 持仓数量
price = 'close' # 结算价格
comm = 10/1e4 # 单边交易成本
max_extract = 0.8  # 最多排除80%标的

## SP策略挖掘参数

In [8]:
##### 策略超参数 #####
share = None
# 因子库
score_basket =  ['close', 'conv_prem', 'bond_prem', 'mod_conv_prem', 'dblow',\
            'list_days', 'volatility', 'volatility_stk', 'remain_size', 'pe_ttm', 'pb']
pool_basket = ['close', 'list_days', 'remain_size', 'pb']
# 给出一个初始种子在此基础上变异生成初始种子,如果没有初始SP的话则随机生成种子
#p0 = GPm.ind.Pool("1*False*dblow&;list_days>1800|close<100|close>135|is_call=公告实施强赎,公告提示强赎,公告到期赎回")
p0 = None
hold_num = 5  # 持仓数量
price = 'close' # 结算价格
comm = 10/1e4 # 单边交易成本
max_extract = 0.8  # 最多排除80%标的

# 运行

In [15]:
#code_returns = None
code_returns = (market['close']/market['pre_close']-1).fillna(0)
miner0 = GPm.work.Miner(market, benchmark, share, score_basket=score_basket, pool_basket=pool_basket, p0=p0,\
                hold_num=hold_num, price=price, code_returns=code_returns, comm=comm, max_extract=max_extract)
miner0.prepare(fitness=fitness, population_size=population_size, evolution_ratio=evolution_ratio,\
                prob_dict=prob_dict, select_alg=select_alg, n_core=n_core,\
                max_g=max_g, tolerance_g=tolerance_g)
for i in range(n_batch):
    miner0.run()

KeyError: "['cash'] not in index"

In [16]:
eval0 = GPm.eval.Eval(market, score=share)

In [17]:
eval0.eval_pool(';list_days<570.0|list_days>1213.0')

In [18]:
eval0.eval_score()

In [20]:
strat0 = eval0.backtest(hold_num, price, code_returns)
post0 = FB.post.StratPost(strat0, eval0.market, benchmark=benchmark,\
                                        comm=comm, show=False)

In [22]:
strat0.returns

date
2024-01-02    0.000000
2024-01-03   -0.000178
2024-01-04    0.006636
2024-01-05   -0.006494
2024-01-08   -0.014095
                ...   
2024-08-26    0.002324
2024-08-27    0.000674
2024-08-28   -0.000205
2024-08-29   -0.006874
2024-08-30    0.004634
Length: 162, dtype: float64

In [11]:
import importlib
importlib.reload(GPm.work)

<module 'GPminer.work' from 'd:\\gitcode\\gpstrat\\GPminer\\work.py'>