In [1]:
import pandas as pd
import numpy as np
from itertools import combinations
from random import choice, sample
from joblib import Parallel, delayed
import GPminer as GPm
import FreeBack as FB
import time, datetime, os, re, shutil

# 超参数与数据

In [2]:
###### 遗传算法参数 #####
max_g = 500 # 最大进化代数，操作此代数结束进化
tolerance_g = 20 # 最大允许适应度不提升代数，适应多连续不提升超过此代数结束进化
population_size = 10 # 种群规模
evolution_ratio = 0.5 # 每一代选择比例
prob_dict = {} # 全部变异算子等概率运行
#prob_dict = {'mutation_exp_dw':0.3, 'mutation_exp_replace':0.3, 'mutation_exp_and':0.3,\
#             'cross_exp_exchange':0.1}  # 各种变异、联会发生概率
select_alg = 'tournament' # 子代选择方法 截断 cut  锦标赛 tournament
fitfactor = 'excess_sharpe' # 适应度目标
n_core = 10

##### 策略超参数 #####
start = '2024-1-1'
end = '2024-9-30'  # 起止时间
hold_num = 5  # 持仓数量
price = 'close' # 结算价格
comm = 10/1e4 # 单边交易成本
# 所有策略共用一个选股池 Pool, ;后表示排除条件
pool0 = GPm.ind.Pool(";left_years<1|close<100|close>135|is_call=公告实施强赎|"\
    "is_call=公告提示强赎|is_call=已满足强赎条件|is_call=公告到期赎回")
n_core = 10  # 并行计算核数
# 不同的打分因子 Score
#GPm.ind.Score.max_exp_len = 10  # 生成的策略最大因子数量（默认10因子）
#GPm.ind.Score.max_mul = 50 # 因子最大系数（默认50）
# 因子库
# 转股溢价率，双低，纯债溢价率，剩余规模，转债市占比，转债五日涨跌幅，波动率，正股市净率
basket = ['conv_prem', 'dblow', 'bond_prem', 'remain_size', 'cap_mv_rate', 'pct_chg_5', 'volatility', 'pb']

## 数据

In [3]:
# 市场数据，格式为pd.DataFrame multiindex(date，code)
market = pd.read_csv('./market.csv.xz', parse_dates=['date']).set_index(['date', 'code'])
# 指数数据，格式为pd.DataFrame index(date), 简单收益率序列，默认取第一列为基准指数
benchmark = pd.read_csv('./benchmark.csv.xz', parse_dates=['date']).set_index(['date'])

# 运行

In [4]:
def workflow(mark=None):
    t0 = time.time()
    # 生成初代种群
    select_basket = list(sample(basket, int(len(basket)/2)))
    gen0 = GPm.gen.Gen(select_basket)
    seeds = gen0.get_seeds()
    popu0 = GPm.popu.Population()
    popu0.add(seeds)
    GPm.ino.log('从%s个p中选择%s个p作为初始种群,耗时%.1lfs'%(len(popu0.codes),\
                            int(population_size/evolution_ratio), time.time()-t0))
    popu0 = popu0.subset(int(population_size/evolution_ratio))
    try:
        os.mkdir('result')
    except:
        shutil.rmtree('result')
        os.mkdir('result')
    fitness_all = pd.DataFrame()
    fitness = pd.DataFrame()
    eval0 = GPm.eval.Eval(market, pool0)
    gen0 = GPm.gen.Gen(basket, popu0)
    max_sharpe = -99999
    max_loc = 0
    for g in range(max_g):
        GPm.ino.log('第%s代'%(g))
        t0 = time.time()
        # 计算适应度
        def single(p):
            result = pd.DataFrame()
            eval0.eval_score(p)
            strat0 = eval0.backtest(hold_num, price)
            post0 = FB.post.StratPost(strat0, eval0.market, benchmark=benchmark, comm=comm, show=False)

            result.loc[p, 'return_total'] = post0.return_total
            result.loc[p, 'return_annual'] = post0.return_annual
            result.loc[p, 'sigma'] = -post0.sigma
            result.loc[p, 'sharpe'] = post0.sharpe
            result.loc[p, 'drawdown'] = -max(post0.drawdown)
            result.loc[p, 'excess_annual'] = post0.excess_return_annual
            result.loc[p, 'excess_sigma'] = -post0.excess_sigma
            result.loc[p, 'excess_sharpe'] = post0.excess_sharpe
            result.loc[p, 'excess_drawdown'] = -max(post0.excess_drawdown)
            result.loc[p, 'beta'] = post0.beta
            result.loc[p, 'alpha'] = post0.alpha*250*100
            return result
        if g!=0:
            # 之前已经计算过的无需计算
            fitness = fitness_all.loc[list(popu0.codes&set(fitness_all.index))]
        GPm.ino.log('本代%d个策略，其中%d个策略已有计算结果'%(len(popu0.codes), len(fitness)))
        if len(popu0.codes)!=len(fitness):
            fitness_list = Parallel(n_jobs=n_core)(delayed(single)(p) \
                                        for p in list(popu0.codes-set(fitness.index)))
            fitness_all = pd.concat([fitness_all, pd.concat(fitness_list)]).drop_duplicates()
            fitness = pd.concat([fitness, pd.concat(fitness_list)])
        GPm.ino.log('第%s轮进化适应度计算完成，耗时%.1lfs'%(g, time.time()-t0))
        fitness = fitness.sort_values(fitfactor, ascending=False)
        if fitness.iloc[0][fitfactor]>max_sharpe:
            max_sharpe = fitness.iloc[0][fitfactor]
            max_loc = g
        fitness.to_csv('result/'+'fitness%s.csv'%g)
        # 选择
        #popu0.reset(set(fitness[:population_size].index)) # 截断选择
        # 锦标赛，不放回
        if select_alg=='tournament':
            select = set()
            while len(select)<population_size:
                one = set(fitness.loc[sample(list(set(fitness.index)-select), int(len(fitness)/10))]\
                            .sort_values(by=fitfactor, ascending=False).index[:1])
                select = select|one
        popu0.reset(select)  
        GPm.ino.log('第%s轮进化完成，最大%s:%.2lf'%(g, fitfactor, fitness.iloc[0][fitfactor]))
        if ((g-max_loc)>tolerance_g)|(g==(max_g-1)):
            GPm.ino.log('=====此初始种群进化完成=====')
            fitness.loc[list(popu0.codes)].sort_values(by=fitfactor, ascending=False).\
                to_csv('result/'+'fitness%s.csv'%(g+1))
            # 重命名结果
            if mark==None:
                os.rename('result', datetime.datetime.today().date().strftime("%Y%m%d")+popu0.get_name())
            else:
                try:
                    os.rename('result', mark+popu0.get_name(1))
                except:
                    try:
                        os.rename('result', mark+popu0.get_name(2))
                    except:
                        try:
                            os.rename('result', mark+popu0.get_name(3))
                        except:
                            os.rename('result', mark+popu0.get_name(4))
            break
        # 种群繁殖
        #t0 = time.time()
        gen0.multiply(1/evolution_ratio)
        #GPm.ino.log('交叉变异生成第%s代种群, 耗时%.1lfs'%(g+1, time.time()-t0))

In [5]:
for i in range(3):
    workflow(datetime.datetime.today().now().strftime('%Y%m%d%H'))

Error: [Errno 13] Permission denied: 'log.txt'. You don't have permission to access the specified file.
Error: [Errno 13] Permission denied: 'log.txt'. You don't have permission to access the specified file.
Error: [Errno 13] Permission denied: 'log.txt'. You don't have permission to access the specified file.
Error: [Errno 13] Permission denied: 'log.txt'. You don't have permission to access the specified file.
Error: [Errno 13] Permission denied: 'log.txt'. You don't have permission to access the specified file.
