In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import sys
import os
from tqdm import tqdm
from matplotlib.colors import LinearSegmentedColormap

from plottable import ColumnDefinition, Table
from plottable.cmap import normed_cmap, centered_cmap
from plottable.formatters import decimal_to_percent
from plottable.plots import circled_image

# sns.set_theme(style='darkgrid')  # 图形主题
custom_params = {"axes.spines.right": False, "axes.spines.top": False}
sns.set_theme(style="ticks", rc=custom_params)
plt.rcParams['font.sans-serif'] = ['KaiTi']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
plt.rcParams["savefig.bbox"] = "tight"  # 图形保存时去除白边

from utils import BacktestUtils, PerfUtils, FactorProcess

import warnings
warnings.filterwarnings("ignore")

In [3]:
stock_close = pd.read_pickle(r'D:\Desktop\营业利润财务质量\data\test_close_day')
stock_open = pd.read_pickle(r'D:\Desktop\营业利润财务质量\data\test_open_day')

In [4]:
# 个股收盘价
stock_close.index = pd.to_datetime(stock_close.index, format="%Y%m%d")
# 个股开盘价
stock_open.index = pd.to_datetime(stock_open.index, format="%Y%m%d")

# 日频交易日期
daily_dates = stock_open.index.tolist()
# 所有日频交易日期序列
date_series = pd.Series(daily_dates, index=daily_dates)

# 因子评价

In [11]:
factor_name= ['基于营业利润计算过程的财务质量因子']
factors_dict = pd.read_pickle("D:/Desktop/营业利润财务质量/data/财务质量因子mini")

for key, value in factors_dict.items():

In [None]:
为什么只有一个股？应该是第一个因子----Yes

eval_model.factor_eval

In [12]:
factor_orig = factors_dict.loc[daily_dates[0]:daily_dates[-1], :]
factor_orig = factor_orig.dropna(how='all', axis=1)
factor_orig = factor_orig.dropna(how='all', axis=0)
factor_orig = factor_orig.replace([np.inf, -np.inf], np.nan)

# 因子预处理
print('因子预处理: Winsorize')
factor_processed = FactorProcess.winsorize(factor_orig)
print('因子预处理: Normalize')
factor_processed = FactorProcess.normalize(factor_processed)


因子预处理: Winsorize
因子预处理: Normalize


分层回测

eval_model.factor_eval.clsfy_backtest

In [42]:
origin_factor = factor_processed
freq='M'
start_date='2012-04-30'
end_date=factor_processed.index[-1]
layer_number = 2

In [17]:
# 回测净值
nav = pd.DataFrame(columns=['分层1', '分层2', '基准',
                            '分层1相对净值', '分层2相对净值'])

merge_factor是分层？---Q：每一层的权重矩阵，不包含在该层的权重为0

In [43]:
merge_factor = origin_factor.resample(freq).last().copy()
merge_factor = merge_factor.fillna(method='bfill', axis=0)
merge_factor = merge_factor.dropna(how='all')

# 当指标给定的最后一个日期大于最后一个交易日时（月末容易出现）
# 最后一个交易信号无法调仓
if merge_factor.index[-1] >= daily_dates[-1]:
    merge_factor = merge_factor.drop(index=merge_factor.index[-1])
    
# 调仓日期为生成信号的下一天，即月度初的第一个交易日
daily_dates = pd.Series(data=daily_dates, index=daily_dates)
merge_factor.index = [daily_dates[daily_dates > i].index[0] for i in merge_factor.index]

# 指标进行排序
merge_factor_rank = merge_factor.rank(method='average', ascending=False, axis=1)

# 各层持仓权重
port = []

回测,计算策略净值

划分各层归属

In [4]:
for layer_id in tqdm(range(layer_number), desc='分层回测中', leave=False):
    

分层回测中:   0%|                                                                                     | 0/5 [00:00<?, ?it/s]

0
1
2
3
4


                                                                                                                       

测试第一层

In [37]:
layer_id = 0

方案2: 仅从存在因子值的股票中进行分层, 每层股票数在不同调仓期可能会改变

In [44]:
thres_up = 1 / layer_number * (layer_id + 1)  #该层占所有股票的权重的上限
thres_down = 1 / layer_number * layer_id      #该层占所有股票的权重的下限

factor_layer = pd.DataFrame(np.zeros_like(merge_factor.values), index=merge_factor.index,
                                    columns=merge_factor.columns)
#选出哪些股票属于这一层，左开右闭
factor_layer[(merge_factor_rank.apply(lambda x: x > x.max() * thres_down, axis=1)) &
                 (merge_factor_rank.apply(lambda x: x <= x.max() * thres_up, axis=1))] = 1

# 全为零行替换为全仓=均仓
factor_layer[(factor_layer == 0).sum(axis=1) == factor_layer.shape[1]] = 1 / factor_layer.shape[1]

# 空值替换为全仓
factor_layer[factor_layer.isnull().sum(axis=1) == factor_layer.shape[1]] = 1 / factor_layer.shape[1]

# 无因子值的股票权重置为0
factor_layer[merge_factor_rank.isnull()] = 0

# 持仓归一化
factor_layer = (factor_layer.multiply(1 / factor_layer.sum(axis=1), axis=0)).loc[start_date:end_date, :]

port.append(factor_layer)

In [45]:
factor_layer   #第一层

S_INFO_WINDCODE,000004.SZ,000005.SZ,000007.SZ,000008.SZ,000009.SZ
2012-05-02,0.0,0.5,0.0,0.0,0.5
2012-06-01,0.0,0.5,0.0,0.0,0.5
2012-07-02,0.0,0.5,0.0,0.0,0.5
2012-08-01,0.0,0.5,0.0,0.0,0.5
2012-09-03,0.0,0.5,0.0,0.0,0.5
2012-10-08,0.0,0.5,0.0,0.0,0.5
2012-11-01,0.0,0.5,0.0,0.0,0.5
2012-12-03,0.0,0.5,0.0,0.0,0.5
2013-01-04,0.0,0.5,0.0,0.0,0.5
2013-02-01,0.0,0.5,0.0,0.0,0.5


回测,计算策略净值

eval_model.factor_eval.clsfy_backtest.BacktestUtils.cal_nav

In [87]:
from utils import BacktestUtilsOpenClose

In [89]:
nav[f'分层{layer_id + 1}'], _ = BacktestUtilsOpenClose.cal_nav(factor_layer,
                                                    stock_open.loc[start_date:end_date,
                                                    factor_layer.columns],
                                                    stock_close.loc[start_date:end_date,
                                                    factor_layer.columns],  
                                                    base_nav=0,
                                                    fee=0)

In [90]:
nav

2012-05-02 00:00:00                                             0.924037
2012-05-03 00:00:00                                             0.918839
2012-05-04 00:00:00                                                  NaN
2012-05-07 00:00:00                                                  NaN
2012-05-08 00:00:00                                                  NaN
                                             ...                        
2014-12-26 00:00:00                                                  NaN
2014-12-29 00:00:00                                                  NaN
2014-12-30 00:00:00                                                  NaN
2014-12-31 00:00:00                                                  NaN
分层1                    2012-05-02    1.000000
2012-05-03    0.994375
...
Name: 策略, Length: 652, dtype: object

# 拆分debug  cal_nav

In [52]:
df_port = factor_layer  #调仓矩阵，行索引为调仓日期，列索引为所有股票
backtest_open = stock_open.loc[start_date:end_date,factor_layer.columns]   #回测区间所有股票的开盘价
backtest_close = stock_close.loc[start_date:end_date,factor_layer.columns]   #回测区间所有股票的收盘价
fee = 0

In [53]:
# 获取所有调仓日期
refresh_dates = df_port.index.tolist()

# 节选出回测区间内的开盘价
backtest_open = backtest_open.loc[refresh_dates[0]:,:]

# 节选出回测区间内的收盘价，改成前一天的收盘价
backtest_close = backtest_close.loc[refresh_dates[0]:,:]

# 获取回测区间日频交易日
backtest_dates = backtest_open.index.tolist()

# nav：该层总净值。初始化净值曲线，从正式调仓日开始
nav = pd.Series(index=backtest_dates, name='策略', dtype=float)

# 初始化换手率记录，有手续费时，换手率必须计算
turn = pd.Series(index=refresh_dates, name='当期换手', dtype=float)


In [None]:
# 遍历每个日期
for date_index in range(len(backtest_dates)):

In [75]:
date_index = 1

In [76]:
# -----------------------------------------------------------------
# 获取对应日期
# -----------------------------------------------------------------
date = backtest_dates[date_index]

In [77]:
# 如果是回测期首日，则执行初次建仓
date_index == 0

False

In [63]:
# 获取当前调仓权重
new_weight = df_port.loc[date,:]

# portfolio：持仓个股净值。计算当前持仓个股净值，考虑第一次调仓的手续费
portfolio = (1 - fee) * new_weight

# 记录净值
nav[date] = 1 - fee

每到一个日期，都根据个股涨跌幅更新组合净值，将日期计数器自增1

In [78]:
# 当天开盘价
cur_open = backtest_open.iloc[date_index, :]

# 上一天的收盘价
prev_close = backtest_close.iloc[date_index-1, :]

# 判断最新的开盘价是否存在空值
cur_open_nan = cur_open[cur_open.isna()].index

In [79]:
# 当存在持有资产价格为空的情况时，重新计算权重分布，剔除此种资产
# 此种情况很少见，不做细节处理
if np.nansum(portfolio[cur_open_nan])> 0:

    # 提取前一个日期
    prev_date = backtest_dates[date_index-1]

    # 归一化当前持仓中个股权重, 空值记为0
    old_weight = portfolio / np.nansum(np.abs(portfolio))
    old_weight[old_weight.isnull()] = 0

    # 获取最新的持仓权重
    new_weight = old_weight.copy()
    new_weight[cur_open_nan]=0

    # 归一化当前持仓中个股权重, 空值记为0
    new_weight = new_weight / np.nansum(np.abs(new_weight))
    new_weight[new_weight.isnull()] = 0

    # 直接按照新的持仓组合分配权重
    portfolio = new_weight * nav[prev_date]


In [80]:
# 根据涨跌幅更新组合净值
portfolio = cur_open / prev_close * portfolio

# 未持有资产时，组合净值维持不变
if np.nansum(portfolio) == 0:
    nav[date] = nav.iloc[backtest_dates.index(date) - 1]
else:
    nav[date] = np.nansum(portfolio)

In [81]:
nav

2012-05-02    0.924037
2012-05-03    0.918839
2012-05-04         NaN
2012-05-07         NaN
2012-05-08         NaN
                ...   
2014-12-25         NaN
2014-12-26         NaN
2014-12-29         NaN
2014-12-30         NaN
2014-12-31         NaN
Name: 策略, Length: 651, dtype: float64

In [None]:
# 如果当前是调仓日，还需执行调仓操作
if date in refresh_dates:

In [71]:
# 归一化当前持仓中个股权重
old_weight = portfolio / np.nansum(np.abs(portfolio))
old_weight[old_weight.isnull()] = 0

# 获取最新的持仓权重
new_weight = df_port.loc[date,:]

# 计算换手率，最小为0，也即不换仓，最大为2，也就是全部换仓
turn_over = np.sum(np.abs(new_weight - old_weight))
turn[date] = turn_over / 2

# 更新换仓后的净值，也即扣除手续费
nav[date] = nav[date] * (1 - turn_over * fee)

# 更新持仓组合中个股的最新净值
portfolio = new_weight * nav[date]

In [72]:
nav

2012-05-02    0.924037
2012-05-03         NaN
2012-05-04         NaN
2012-05-07         NaN
2012-05-08         NaN
                ...   
2014-12-25         NaN
2014-12-26         NaN
2014-12-29         NaN
2014-12-30         NaN
2014-12-31         NaN
Name: 策略, Length: 651, dtype: float64