In [1]:
from factor_analysis_adv import *

### 1 基础设置

In [2]:
# 时间
start_date = '2020-01-01'
end_date = '2024-04-01'

print('训练开始:',start_date,'\n训练结束:',end_date)

# 标的
index_item = '000852.XSHG'
index_fix = INDEX_FIX(start_date,end_date,index_item)
stock_list = index_fix.columns.tolist()
date_list = index_fix.index.tolist()

训练开始: 2020-01-01 
训练结束: 2024-04-01


### 2 数据获取

In [3]:
# 特征构建
factor_names = ['volume', 'high', 'prev_close', 'total_turnover', 'close', 'num_trades','open', 'low']
price = get_price(stock_list,
               start_date,
               end_date,
               fields = factor_names
               )

# 补充因子
def turnover_rate(order_book_ids,start_date,end_date):
    return rqdatac.get_turnover_rate(order_book_ids,start_date,end_date,fields='today').today.unstack('order_book_id').reindex(columns=order_book_ids,index =pd.to_datetime(rqdatac.get_trading_dates(start_date,end_date)))

DAILY_TURNOVER_RATE = UserDefinedLeafFactor('DAILY_TURNOVER_RATE',turnover_rate)

turnover_ratio = execute_factor(DAILY_TURNOVER_RATE,stock_list,start_date,end_date)

### 3 特征构建

In [6]:
# 数据整理
raw_factor = {}
for i in factor_names:
    raw_factor[i] = price[i].unstack('order_book_id')
raw_factor['turnover_ratio'] = turnover_ratio


# 数据清洗
neu_factor = {}
for k,v in raw_factor.items():
    neu_factor[k] = data_clean(v,index_fix,index_item)


# 构建特征
gp_data = pd.DataFrame()
for k,v in neu_factor.items():
    gp_data[k] = v.stack()


# 预测目标 T+1 (标的未来一期收益)
ret_1 = get_price(stock_list,
                  start_date,
                  get_next_trading_date(end_date,2)
                  ,fields='open').open.unstack('order_book_id').pct_change(1).shift(-2).dropna(how ='all')
gp_data['ret_1'] = ret_1.stack()
gp_data['excess_return'] = (ret_1.sub(ret_1.mean(axis = 1),axis = 0)).stack()

### 4 数据存储

In [7]:
create_dir_not_exist('./data')
gp_data.to_pickle('./data/gp_data.pkl')