## 导入库

In [135]:
from atrader import *
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from statsmodels import  regression
import statsmodels.api as sm
import scipy.stats as st
import datetime as dt

## 数据处理

### 去极值

In [136]:
# 3sigma法去极值
def extreme_3sigma(dt,n=3):
    mean = dt.mean()           # 截面数据均值
    std = dt.std()             # 截面数据标准差
    dt_up = mean + n*std       # 上限
    dt_down = mean - n*std     # 下限
    return dt.clip(dt_down, dt_up, axis=1)   # 超出上下限的值，赋值为上下限

### 标准化

In [137]:
# Z值标准化
def standardize_z(dt):
    mean = dt.mean()     #  截面数据均值
    std = dt.std()       #  截面数据标准差
    return (dt - mean)/std

### 中性化

In [138]:
# 申万一级行业
shenwan_industry = {
'SWNLMY1':'sse.801010',
'SWCJ1':'sse.801020',
'SWHG1':'sse.801030',
'SWGT1':'sse.801040',
'SWYSJS1':'sse.801050',
'SWDZ1':'sse.801080',
'SWJYDQ1':'sse.801110',
'SWSPYL1':'sse.801120',
'SWFZFZ1':'sse.801130',
'SWQGZZ1':'sse.801140',
'SWYYSW1':'sse.801150',
'SWGYSY1':'sse.801160',
'SWJTYS1':'sse.801170',
'SWFDC1':'sse.801180',
'SWSYMY1':'sse.801200',
'SWXXFW1':'sse.801210',
'SWZH1':'sse.801230',
'SWJZCL1':'sse.801710',
'SWJZZS1':'sse.801720',
'SWDQSB1':'sse.801730',
'SWGFJG1':'sse.801740',
'SWJSJ1':'sse.801750',
'SWCM1':'sse.801760',
'SWTX1':'sse.801770',
'SWYH1':'sse.801780',
'SWFYJR1':'sse.801790',
'SWQC1':'sse.801880',
'SWJXSB1':'sse.801890'
}

In [139]:
def industry_exposure(target_idx):
    # 构建DataFrame，存储行业哑变量
    df = pd.DataFrame(index = [x.lower() for x in target_idx],columns = shenwan_industry.keys())
    for m in df.columns:        # 遍历每个行业
        # 行标签集合和某个行业成分股集合的交集
        temp = list(set(df.index).intersection(set(get_code_list(m).code.tolist())))
        df.loc[temp, m] = 1      # 将交集的股票在这个行业中赋值为1
    return df.fillna(0)         # 将 NaN 赋值为0

In [140]:
# 需要传入单个因子值和总市值
def neutralization(factor,mkv,industry = True):
    Y = factor.fillna(0)
    Y.rename(index = str.lower,inplace = True)
    df = pd.DataFrame(index = Y.index, columns = Y.columns)    # 构建输出矩阵
    for i in range(Y.shape[1]):       # 遍历每一个因子数据
        if (type(mkv) == pd.DataFrame) | (type(mkv) == pd.Series):
            mkv.rename(index = str.lower,inplace = True)
            lnmkv = mkv.iloc[:,0].apply(lambda x:math.log(x))                   # 市值对数化
            lnmkv = lnmkv.fillna(0)
            if industry:              # 行业、市值
                dummy_industry = industry_exposure(Y.index.tolist())
                X = pd.concat([dummy_industry,lnmkv],axis = 1,sort = False)     # 市值与行业合并           
            else:                     # 仅市值
                X = lnmkv           
        elif industry:                # 仅行业
            dummy_industry = industry_exposure(factor.index.tolist())
            X = dummy_industry
        # X = sm.add_constant(X)     # 添加常数项
        result = sm.OLS(Y.iloc[:,i].astype(float),X.astype(float)).fit()        # 线性回归
        df.iloc[:,i] = result.resid.tolist()                         # 每个因子数据存储到df中
    return df

## 单因子检验-IC值法

In [141]:
# 单因子测试-IC值法
def factortest_ICvalue(factor,stock):
    Normal_IC = list()                    # 构建列表，用来放pearson相关系数和P值
    Rank_IC =list()                       # 构建列表，用来放spearman相关系数和P值
    stock_return = -stock.diff(-1,axis=1).div(stock)    # 利用收盘价计算股票的月收益率
    factor = factor.fillna(0)
    stock_return = stock_return.fillna(0)            # NaN数据变为0
    for i in range(factor.shape[1]-1):              # 每个月的截面数据求相关系数
        pearson = st.pearsonr(stock_return.iloc[:,i],factor.iloc[:,i])
        spearman = st.spearmanr(stock_return.iloc[:,i],factor.iloc[:,i])
        Normal_IC.append(list(pearson))        # 获取pearson相关系数和P值
        Rank_IC.append(list(spearman))         # 获取spearman相关系数和P值
    return np.c_[np.array(Normal_IC),np.array(Rank_IC)]    # 将列表转为数组后进行合并

## 案例实现

### 获取因子数据

In [142]:
# 获取因子数据
factor = get_factor_by_factor(factor='rev_grow_f3', target_list=list(get_code_list('hs300',date='2018-01-01').code), begin_date='2018-01-01', end_date='2020-12-31')
mkv = get_factor_by_factor(factor='mkv', target_list=list(get_code_list('hs300',date='2018-01-01').code), begin_date='2018-01-01', end_date='2020-12-31')
mkv = mkv.set_index('date').T                        # 设置行标签，然后转置
factor = factor.set_index('date').T

In [143]:
factor

date,2018-01-02,2018-01-03,2018-01-04,2018-01-05,2018-01-08,2018-01-09,2018-01-10,2018-01-11,2018-01-12,2018-01-15,2018-01-16,2018-01-17,2018-01-18,2018-01-19,2018-01-22,2018-01-23,2018-01-24,...,2020-12-09,2020-12-10,2020-12-11,2020-12-14,2020-12-15,2020-12-16,2020-12-17,2020-12-18,2020-12-21,2020-12-22,2020-12-23,2020-12-24,2020-12-25,2020-12-28,2020-12-29,2020-12-30,2020-12-31
SZSE.000001,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,11.8393,...,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267,10.92267
SZSE.000002,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,18.3888,...,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687,17.60687
SZSE.000063,13.7852,13.7852,13.7852,13.7852,13.7852,13.7852,13.7852,13.7852,13.7852,14.5547,14.5547,14.5547,14.5547,14.5547,14.5547,14.5547,14.5547,...,14.37027,14.37027,14.37027,14.37027,14.37027,14.37027,14.37027,14.37027,14.37027,14.37027,14.37027,14.37027,14.37027,14.33758,14.30089,14.30089,14.30089
SZSE.000069,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,20.5873,...,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493,20.94493
SZSE.000100,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,8.5209,...,19.07602,19.07602,19.07602,19.07602,19.07602,19.03775,19.05101,19.05101,19.05101,19.05101,19.05101,19.05101,19.05101,19.05101,19.05101,19.05101,19.05101
SZSE.000157,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,13.4983,...,8.62199,8.62138,8.66379,8.66379,8.66379,8.66379,8.66379,8.66379,8.66379,8.66379,8.66379,8.66379,8.66379,8.66379,8.66379,8.66379,8.66379
SZSE.000166,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,...,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797,10.82797
SZSE.000333,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,12.7611,...,9.04697,9.04697,9.04697,9.04697,9.04697,9.04697,9.01029,9.01029,9.01029,9.01029,9.01029,9.01029,9.00832,9.00832,9.00832,9.00832,9.00832
SZSE.000338,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,7.5346,...,6.23004,6.23004,6.23004,6.23004,6.23004,6.23004,6.23004,6.23004,6.23004,6.23004,6.23004,6.23004,6.23004,6.22495,6.26046,6.26046,6.27159
SZSE.000413,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,27.3383,...,,,,,,,,,,,,,,,,,


### 获取行情数据

In [144]:
# 获取行情数据
data = get_kdata(target_list=list(get_code_list('hs300',date='2018-01-01').code), frequency='month', fre_num=1, begin_date='2018-01-01', end_date='2020-12-31',fq=1,fill_up=True,df=True,sort_by_date=False)
close = data.pivot_table(values='close',index='code',columns='time')           # 数据透视，形成收盘价dataframe
close

time,2018-01-31 15:00:00,2018-02-28 15:00:00,2018-03-30 15:00:00,2018-04-27 15:00:00,2018-05-31 15:00:00,2018-06-29 15:00:00,2018-07-31 15:00:00,2018-08-31 15:00:00,2018-09-28 15:00:00,2018-10-31 15:00:00,2018-11-30 15:00:00,2018-12-28 15:00:00,2019-01-31 15:00:00,2019-02-28 15:00:00,2019-03-29 15:00:00,2019-04-30 15:00:00,2019-05-31 15:00:00,...,2019-08-30 15:00:00,2019-09-30 15:00:00,2019-10-31 15:00:00,2019-11-29 15:00:00,2019-12-31 15:00:00,2020-01-23 15:00:00,2020-02-28 15:00:00,2020-03-31 15:00:00,2020-04-30 15:00:00,2020-05-29 15:00:00,2020-06-30 15:00:00,2020-07-31 15:00:00,2020-08-31 15:00:00,2020-09-30 15:00:00,2020-10-30 15:00:00,2020-11-30 15:00:00,2020-12-31 15:00:00
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
sse.600000,11.41092,10.79575,10.09394,10.05928,9.14086,8.28309,8.90467,9.04476,9.29868,9.61389,9.37748,8.5807,9.39499,10.27933,9.87656,10.48071,9.74522,...,10.18356,10.68913,11.294,10.75232,11.16761,10.24676,9.79536,9.1634,9.59674,9.54257,9.5516,9.86222,9.86222,8.93883,8.81508,9.57664,9.21489
sse.600004,16.21239,15.70395,15.00365,14.60074,15.94378,12.5574,13.53008,11.91076,12.45704,9.55007,9.88174,9.8037,11.52056,12.01807,14.41778,14.8665,15.28597,...,18.42014,22.11401,18.02612,16.45007,17.18884,15.11042,15.24833,12.36218,15.56354,16.10531,15.01192,13.98748,15.14634,13.54413,12.48926,14.95726,14.06161
sse.600009,45.60046,47.52601,47.49684,48.42071,52.38851,53.95423,58.6125,53.90601,57.72697,48.6706,49.21084,49.85913,48.42504,56.24377,61.04698,69.37648,67.91292,...,83.20436,78.97732,75.48284,74.37411,77.95769,68.28598,65.04888,60.19818,69.98868,71.66167,71.34489,67.24655,76.4,68.78,66.1,78.52,75.66
sse.600010,2.46104,2.35188,2.16333,2.0641,1.69693,1.53815,1.66274,1.50343,1.65278,1.61296,1.52335,1.47357,1.46361,1.7623,1.82204,1.69261,1.75235,...,1.5,1.46,1.37,1.28,1.32,1.25,1.17,1.15,1.08,1.1,1.08,1.17,1.16,1.15,1.14,1.21,1.17
sse.600011,5.68053,5.5725,6.18467,5.63552,6.42577,5.81336,6.91022,6.41663,7.04732,5.90476,6.30694,6.74569,5.8682,6.2978,5.99617,6.16984,5.94132,...,5.85036,5.38604,5.29318,5.56248,5.18174,5.02388,4.36455,4.35527,3.91881,4.02096,3.91881,4.4909,5.27608,5.18033,4.67283,4.92179,4.28981
sse.600015,8.61818,8.13696,7.79573,7.62075,7.28827,6.65207,6.79493,6.91994,7.29495,7.14316,6.95565,6.5985,6.75922,7.42889,7.36639,7.16102,6.69671,...,6.61522,6.74314,6.84365,6.78883,7.00811,6.76141,6.35025,5.91167,5.9939,5.8203,5.82047,6.04872,6.1153,5.82998,5.7634,6.2104,5.94411
sse.600016,6.32964,5.93142,5.5821,5.39347,5.35155,4.89045,5.16562,5.07216,5.38652,5.429,5.18261,4.86826,5.05517,5.54794,5.38652,5.43749,5.19111,...,5.21563,5.40415,5.52085,5.5388,5.66448,5.45801,5.13484,5.12586,5.28745,5.10791,5.08995,5.28514,5.29468,5.05618,5.0371,5.05618,4.96078
sse.600018,6.49797,6.8121,6.32745,6.25565,5.84279,5.49163,5.58377,5.16913,4.98485,4.97564,5.13228,4.77293,4.94799,5.50084,6.69868,7.43581,6.9751,...,5.40686,5.35976,5.47279,5.24672,5.43512,4.8982,4.2765,4.21999,3.9374,3.81494,3.95624,4.38872,4.32061,4.0676,4.0676,4.5055,4.44711
sse.600019,7.62672,7.67257,6.51099,7.06121,6.43457,6.25963,6.98282,6.33195,6.30784,6.14713,5.39984,5.22306,5.63286,6.0266,5.80964,5.76143,5.45661,...,5.03221,5.11882,5.02355,4.73772,4.97158,4.70308,4.50387,4.21805,4.21805,4.23537,4.18891,4.51962,4.53799,4.58392,4.90544,5.70464,5.4658
sse.600023,4.79527,4.59684,4.57204,4.21653,4.31574,3.85275,4.3072,4.17046,4.34138,4.04227,3.83717,4.04227,3.82862,4.085,4.16191,4.05082,3.98245,...,3.55857,3.46961,3.45182,3.42513,3.52299,3.33616,3.06927,3.01589,3.10485,3.08706,3.12265,3.31299,3.56639,3.40684,3.3693,3.53824,3.40684


In [145]:
code = sorted(set(list(data['code'])),key =list(data['code']).index)     # 数据透视形成的行标签排序与前面不一致，将原始数据的排序去重复项后排序不变
stock_close = close.loc[code]                                            # 形成行标签与前面一致的dataframe
stock_close

time,2018-01-31 15:00:00,2018-02-28 15:00:00,2018-03-30 15:00:00,2018-04-27 15:00:00,2018-05-31 15:00:00,2018-06-29 15:00:00,2018-07-31 15:00:00,2018-08-31 15:00:00,2018-09-28 15:00:00,2018-10-31 15:00:00,2018-11-30 15:00:00,2018-12-28 15:00:00,2019-01-31 15:00:00,2019-02-28 15:00:00,2019-03-29 15:00:00,2019-04-30 15:00:00,2019-05-31 15:00:00,...,2019-08-30 15:00:00,2019-09-30 15:00:00,2019-10-31 15:00:00,2019-11-29 15:00:00,2019-12-31 15:00:00,2020-01-23 15:00:00,2020-02-28 15:00:00,2020-03-31 15:00:00,2020-04-30 15:00:00,2020-05-29 15:00:00,2020-06-30 15:00:00,2020-07-31 15:00:00,2020-08-31 15:00:00,2020-09-30 15:00:00,2020-10-30 15:00:00,2020-11-30 15:00:00,2020-12-31 15:00:00
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
szse.000001,13.3486,11.44844,10.35585,10.30835,9.6718,8.63621,9.09055,9.77572,10.66354,10.52844,9.99767,9.05195,10.71179,11.92773,12.37164,13.36562,11.75402,...,13.81392,15.20897,15.86259,14.9163,16.04795,15.16019,14.14561,12.48716,13.58954,12.89857,12.70013,13.23592,14.96234,15.05164,17.61151,19.58598,19.1891
szse.000002,31.69887,27.59726,28.09519,23.96826,21.59675,20.76124,19.68098,21.09061,21.30984,21.24846,22.30957,20.88891,24.33532,24.54578,26.93985,25.32627,23.41452,...,23.53176,23.62297,24.19758,25.26472,29.35085,26.43218,26.98856,23.39495,24.44384,23.44055,23.84187,24.48033,25.78922,26.4985,26.05402,29.03297,27.14157
szse.000063,30.78573,31.8937,29.82615,30.97369,30.97369,12.89004,14.44318,18.88495,18.10344,16.74815,19.65657,19.37958,19.89399,29.47991,28.88636,31.80467,28.45108,...,28.50055,31.66617,33.04124,30.38014,35.00987,38.74927,49.46294,42.34028,40.64864,35.66278,39.69896,38.74927,38.78506,32.91758,32.09215,34.5386,33.46455
szse.000069,8.61616,7.0844,6.85963,6.44339,6.55994,6.01883,5.89012,5.47189,5.48932,5.02752,5.23663,5.53288,5.57645,6.02954,6.70916,6.92699,6.20265,...,6.19356,6.39365,6.40274,6.26632,7.08485,6.43912,5.93891,5.81158,5.92981,5.31136,5.79143,6.85224,6.83313,6.47952,6.26927,6.95736,6.77578
szse.000100,3.2459,3.40956,3.15498,2.95495,2.92068,2.72346,2.81738,2.67651,2.63894,2.31964,2.31964,2.30086,2.58259,3.03337,3.81285,3.4588,3.18904,...,3.10233,3.4299,3.16977,3.69967,4.30664,4.87508,5.66512,3.9887,4.57637,5.14841,6.11497,6.17415,6.9829,6.06566,5.9473,6.91386,6.9829
szse.000157,3.85541,3.67509,3.61498,3.6064,3.58064,3.52912,3.58922,3.49172,3.37443,3.01353,3.203,3.21202,3.35638,3.78946,4.04209,4.46615,4.8451,...,4.97422,5.34964,5.49042,5.58427,6.2694,5.91276,5.89399,5.34025,6.00661,5.97845,6.03477,7.75228,7.9963,7.6115,7.13213,8.25014,9.54163
szse.000166,5.04751,4.73616,4.68899,4.37765,4.37765,4.17086,4.23767,4.25676,4.29493,4.25676,4.23767,3.88453,4.21858,5.24936,5.26845,4.90577,4.55223,...,4.6101,4.6101,4.61974,4.59081,4.93801,4.64867,4.51365,4.25325,4.21467,4.16645,4.8705,5.62277,5.51494,5.19226,4.92825,5.28027,5.16293
szse.000333,54.37237,49.94586,49.56411,46.97365,49.20145,48.55981,44.27293,38.68418,37.4753,34.4438,35.97815,34.27642,40.47891,44.43102,45.31443,48.72719,47.34896,...,50.38464,48.78089,52.9812,51.83566,55.60639,52.59935,50.64239,46.22252,51.31062,56.34145,58.61911,70.34489,69.15859,71.17825,76.34504,85.14918,96.51221
szse.000338,7.72918,7.4827,7.27142,7.28022,7.68517,7.70278,7.73022,7.24991,7.74835,6.90339,6.96826,7.13505,8.20067,8.85858,10.98056,11.44388,10.84157,...,10.97326,10.63212,11.09559,12.66011,15.24204,12.98645,13.75431,11.47952,12.86167,12.51613,13.16882,15.53959,14.34483,14.61585,14.7827,16.30689,15.42755
szse.000413,7.95884,8.00755,7.53995,7.62763,7.39383,5.90337,5.99572,5.2857,5.34487,4.64471,4.63485,4.43762,4.04317,5.59141,5.95628,6.08448,4.93069,...,4.55,5.68,4.77,4.8,3.36,3.05,3.64,3.0,2.78,2.77,2.67,2.8,3.14,3.03,2.73,2.57,2.56


### 获取月末交易日

In [146]:
# 将日数据处理为月度数据
# 每月最后一个交易日
days = get_trading_days('SSE', '2018-01-01', '2020-12-31')
months = np.vectorize(lambda x: x.month)(days)
month_end = days[pd.Series(months) != pd.Series(months).shift(-1)]   # 月末的日期序列
month_end

array([datetime.datetime(2018, 1, 31, 0, 0),
       datetime.datetime(2018, 2, 28, 0, 0),
       datetime.datetime(2018, 3, 30, 0, 0),
       datetime.datetime(2018, 4, 27, 0, 0),
       datetime.datetime(2018, 5, 31, 0, 0),
       datetime.datetime(2018, 6, 29, 0, 0),
       datetime.datetime(2018, 7, 31, 0, 0),
       datetime.datetime(2018, 8, 31, 0, 0),
       datetime.datetime(2018, 9, 28, 0, 0),
       datetime.datetime(2018, 10, 31, 0, 0),
       datetime.datetime(2018, 11, 30, 0, 0),
       datetime.datetime(2018, 12, 28, 0, 0),
       datetime.datetime(2019, 1, 31, 0, 0),
       datetime.datetime(2019, 2, 28, 0, 0),
       datetime.datetime(2019, 3, 29, 0, 0),
       datetime.datetime(2019, 4, 30, 0, 0),
       datetime.datetime(2019, 5, 31, 0, 0),
       datetime.datetime(2019, 6, 28, 0, 0),
       datetime.datetime(2019, 7, 31, 0, 0),
       datetime.datetime(2019, 8, 30, 0, 0),
       datetime.datetime(2019, 9, 30, 0, 0),
       datetime.datetime(2019, 10, 31, 0, 0),
      

In [147]:
# 将datetime时间格式转为Timestamp格式
month_end_Timestamp = [pd.Timestamp(x) for x in month_end]
month_end_Timestamp

[Timestamp('2018-01-31 00:00:00'),
 Timestamp('2018-02-28 00:00:00'),
 Timestamp('2018-03-30 00:00:00'),
 Timestamp('2018-04-27 00:00:00'),
 Timestamp('2018-05-31 00:00:00'),
 Timestamp('2018-06-29 00:00:00'),
 Timestamp('2018-07-31 00:00:00'),
 Timestamp('2018-08-31 00:00:00'),
 Timestamp('2018-09-28 00:00:00'),
 Timestamp('2018-10-31 00:00:00'),
 Timestamp('2018-11-30 00:00:00'),
 Timestamp('2018-12-28 00:00:00'),
 Timestamp('2019-01-31 00:00:00'),
 Timestamp('2019-02-28 00:00:00'),
 Timestamp('2019-03-29 00:00:00'),
 Timestamp('2019-04-30 00:00:00'),
 Timestamp('2019-05-31 00:00:00'),
 Timestamp('2019-06-28 00:00:00'),
 Timestamp('2019-07-31 00:00:00'),
 Timestamp('2019-08-30 00:00:00'),
 Timestamp('2019-09-30 00:00:00'),
 Timestamp('2019-10-31 00:00:00'),
 Timestamp('2019-11-29 00:00:00'),
 Timestamp('2019-12-31 00:00:00'),
 Timestamp('2020-01-23 00:00:00'),
 Timestamp('2020-02-28 00:00:00'),
 Timestamp('2020-03-31 00:00:00'),
 Timestamp('2020-04-30 00:00:00'),
 Timestamp('2020-05-

### 得到月末的因子数据

In [148]:
# 获取月末的因子数据
factor = factor[month_end_Timestamp].rename(index = str.lower)  # 通过列标签取月末数据，并将行标签大写字母改为小写
mkv = mkv[month_end_Timestamp].rename(index = str.lower)
factor

date,2018-01-31,2018-02-28,2018-03-30,2018-04-27,2018-05-31,2018-06-29,2018-07-31,2018-08-31,2018-09-28,2018-10-31,2018-11-30,2018-12-28,2019-01-31,2019-02-28,2019-03-29,2019-04-30,2019-05-31,...,2019-08-30,2019-09-30,2019-10-31,2019-11-29,2019-12-31,2020-01-23,2020-02-28,2020-03-31,2020-04-30,2020-05-29,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-30,2020-11-30,2020-12-31
szse.000001,11.8393,11.8393,9.8152,10.0683,10.0683,10.0683,10.0683,10.1463,10.2101,10.4728,10.3503,10.3453,10.3453,10.3453,12.0581,11.8448,11.8207,...,11.8607,11.79,11.8486,11.8486,11.8573,11.8573,12.4452,3.7746,11.917,11.917,11.917,9.2522,9.9869,10.1826,10.90783,10.90783,10.92267
szse.000002,18.3888,18.3888,28.4769,27.8523,27.7218,27.9679,27.874,27.5129,27.3613,26.5428,26.4731,26.4701,26.4701,26.4701,18.9502,18.7339,18.7416,...,19.2866,19.2866,19.6345,19.5955,19.5928,19.5928,19.5928,17.6123,18.0688,18.0688,18.0688,17.9899,17.888,17.88,17.8572,17.61718,17.60687
szse.000063,14.5547,14.5547,18.7891,19.7364,20.1893,20.1893,19.8801,19.9481,19.6516,19.6298,19.5682,19.5682,19.5682,19.5682,19.3168,19.2315,18.127,...,18.2773,18.5047,18.5744,18.52,18.4581,18.4581,12.0015,10.5209,14.8979,14.8979,14.8979,15.2088,14.8462,14.6419,14.60642,14.37027,14.30089
szse.000069,20.5873,20.5873,20.5873,20.5873,21.7967,21.7967,21.5971,21.1617,21.1617,20.7428,20.1371,19.9531,19.9531,19.9531,19.9531,19.0115,19.0101,...,18.7121,18.8962,18.9465,19.2346,19.4737,19.4737,19.4737,19.4737,19.4737,19.4737,19.4737,18.9568,19.9704,20.2429,20.70105,20.94493,20.94493
szse.000100,8.5209,8.5209,8.5209,8.5209,4.4,4.4,9.3202,9.4956,8.4899,7.9158,8.0279,7.6515,7.6515,7.6515,17.1906,14.9286,14.9616,...,12.8923,13.6364,13.6193,14.0523,14.0523,14.0523,14.0523,14.0523,12.9051,12.9051,12.9051,21.4214,21.4214,20.0502,20.05019,19.07602,19.05101
szse.000157,13.4983,13.4983,13.4983,11.982,12.1993,12.1993,12.1993,12.1993,12.1664,12.1664,9.0386,9.0386,9.0386,9.0386,9.0386,4.596,5.1384,...,7.3473,8.2946,8.4554,8.9621,8.9621,8.9621,8.9621,8.9621,8.0784,8.0784,8.0784,7.8578,8.2095,8.074,8.29993,8.52331,8.66379
szse.000166,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,-4.58,14.3219,14.3219,14.3219,14.3219,14.3219,8.0,8.0,8.0,...,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,11.5539,11.5539,11.5539,10.828,10.828,10.828,10.82797,10.82797,10.82797
szse.000333,12.7611,12.7611,12.7611,15.7037,15.8364,15.299,15.299,14.6691,13.2247,13.0275,12.2513,12.2902,12.2902,12.2902,12.2902,8.8511,9.3344,...,9.301,9.3946,9.3491,9.4012,9.4011,9.4011,9.4011,9.4011,9.4011,9.4011,9.4011,8.8383,8.8298,8.9663,8.9862,9.04697,9.00832
szse.000338,7.5346,7.5346,7.5346,6.5532,6.0322,5.8912,6.0162,6.2312,6.2894,6.3161,6.5679,6.5679,6.5679,6.5679,5.2846,5.5096,5.4651,...,5.5914,5.86,5.86,5.6085,5.6,5.6,5.6,6.7161,6.6459,6.6459,6.6459,6.3708,6.2806,6.125,6.12495,6.23004,6.27159
szse.000413,27.3383,27.3383,27.3383,22.7971,22.7971,22.7971,22.7971,22.7971,24.3961,24.3961,24.9968,24.9968,24.9968,24.9968,24.9968,24.9968,24.9968,...,24.9968,12.5375,12.5375,12.5375,12.5375,12.5375,12.5375,12.5375,12.5375,12.5375,12.5375,12.5375,12.5375,12.5375,,,


### 因子数据处理

In [149]:
# 去极值和标准化
factor_S = standardize_z(extreme_3sigma(factor,5.2))


### 获取IC值

In [150]:
# 获取IC值
IC = factortest_ICvalue(factor_S,stock_close)
df = pd.DataFrame(data=IC,index=factor_S.columns[0:-1],
                  columns=['Normal_IC','Normal_pvalue','Rank_IC','Rank_pvalue'])
df

Unnamed: 0_level_0,Normal_IC,Normal_pvalue,Rank_IC,Rank_pvalue
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-31,0.25537,0.0,0.27788,0.0
2018-02-28,0.37016,0.0,0.35024,0.0
2018-03-30,-0.12656,0.0283924,-0.08521,0.140918
2018-04-27,0.00231,0.968198,0.06371,0.271347
2018-05-31,-0.01978,0.733003,0.05083,0.380296
2018-06-29,-0.15308,0.00790839,-0.16041,0.00535397
2018-07-31,0.03878,0.503363,0.04644,0.422869
2018-08-31,-0.2656,0.0,-0.22955,0.0
2018-09-28,-0.17616,0.00219608,-0.26156,0.0
2018-10-31,0.24792,0.0,0.25167,0.0


### IC值评价

In [None]:
# a、IC值序列均值大小
ma = df[['Normal_IC','Rank_IC']].mean()
print('IC值序列均值: \n',ma)

# b、IC值序列的标准差
st = df[['Normal_IC','Rank_IC']].std()
print('IC值序列的标准差: \n',st)

# c、IR比率
IR = df[['Normal_IC','Rank_IC']].mean()/df[['Normal_IC','Rank_IC']].std()
print('IR比率: \n',IR)

# IC值序列大于0的占比
NormalIC_ratio = len(df[(df['Normal_IC']>0)])/len(df['Normal_IC'])
RankIC_ratio = len(df[(df['Rank_IC']>0)])/len(df['Rank_IC'])
print('Normal_IC值序列大于0的占比: ',NormalIC_ratio)
print('Rank_IC值序列大于0的占比: ',RankIC_ratio)

# IC的P值序列小于0.1的占比
Normal_pvalue_ratio = len(df[(df['Normal_pvalue']<0.1)])/len(df['Normal_pvalue'])
Rank_pvalue_ratio = len(df[(df['Rank_pvalue']<0.1)])/len(df['Rank_pvalue'])
print('Normal_IC的P值序列小于0.1的占比: ',Normal_pvalue_ratio)
print('Rank_IC的P值序列小于0.1的占比: ',Rank_pvalue_ratio)

# IC值积累曲线
fig = plt.figure(figsize=(14,8))
df['Normal_IC'].cumsum().plot(kind = 'line',label = 'Normal_IC')
df['Rank_IC'].cumsum().plot(kind = 'line',label = 'Rank_IC')
plt.legend()
plt.show()