In [15]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as st
import statsmodels.api as api
import warnings
import os
from joblib import Parallel,delayed
from dateutil.relativedelta import relativedelta
from sklearn.linear_model import LinearRegression
warnings.filterwarnings('ignore')

In [16]:
old_path= 'D:/causis/pca_factor/'
all_data= [pd.read_csv(old_path+path,index_col=0,parse_dates=[0]).iloc[12:-1] for path in os.listdir(old_path) if 'csv' in path]

In [17]:
#将month_return的数据与因子数据对齐
month_return=pd.read_csv("D:/causis/month_return.csv",parse_dates=[0],index_col=0)
month_return=month_return.shift(-1)
month_return=month_return["20120101":"20201231"]
month_return=month_return.iloc[:-1]
month_return.index=all_data[0].index
month_return=month_return[all_data[0].columns]

In [18]:
#标准化
all_data=all_data+[month_return]
def stand(factor):
    return (factor-factor.mean())/factor.std()

for i in range(len(all_data)):
    all_data[i]=all_data[i].apply(lambda x:stand(x),axis=1)

In [19]:
factor=all_data[:-1]
month_return=all_data[-1]

In [20]:
#这里取出对应索引
lscolumns=[]
for i in os.listdir(old_path):
    i=i[:-4]
    lscolumns.append(i)

In [21]:
#计算IC相关指标
ICmean=pd.Series(map(lambda x:x.corrwith(month_return,axis=1).mean(),factor))
ICstd=pd.Series(map(lambda x:x.corrwith(month_return,axis=1).std(),factor))
IR=ICmean/ICstd
positive_rate=list(map(lambda x:x.corrwith(month_return,axis=1),factor))
positive_rate=pd.Series(map(lambda x:len(x[x>0]) / len(x),positive_rate))

In [22]:
#定义计算因子收益率的回归函数，这里返回因子收益率
def factor_return_params(ser):
    month_return_ser = month_return.loc[ser.name]
    index = ~ser.isna() * ~month_return_ser.isna()
    model = api.OLS(ser[index],api.add_constant(month_return_ser[index])).fit()
    params=model.params[1]
    return params

In [23]:
#定义计算因子收益率的回归函数，这里返回因子收益率的t值
def factor_return_t(ser):
    month_return_ser = month_return.loc[ser.name]
    index = ~ser.isna() * ~month_return_ser.isna()
    model = api.OLS(ser[index],api.add_constant(month_return_ser[index])).fit()
    t=model.tvalues[1]
    return t

In [30]:
#计算因子收益率以及检验值的平均值
factor_return=pd.Series(list(map(lambda x:x.apply(lambda x:factor_return_params(x),axis=1).mean(),factor)))
factor_t=pd.Series(list(map(lambda x:x.apply(lambda x:factor_return_t(x),axis=1).mean(),factor)))

In [31]:
#合并并以dataframe形式展现
result=pd.concat([ICmean,ICstd,IR,positive_rate,factor_return,factor_t],axis=1)
result.columns=['IC均值', 'IC标准差','IR值','正值次数比例','因子收益率均值','因子收益率t检验均值']
result.index=lscolumns

In [32]:
#这里为了方便观看，将第三行移到最后一行
result=result.iloc[~(result.index=="主成分10")].append(result.iloc[2])
round(result,3)

Unnamed: 0,IC均值,IC标准差,IR值,正值次数比例,因子收益率均值,因子收益率t检验均值
主成分0,-0.039,0.161,-0.244,0.364,-0.039,-2.358
主成分1,-0.039,0.084,-0.464,0.299,-0.039,-2.285
主成分2,0.001,0.084,0.007,0.533,0.001,0.029
主成分3,-0.007,0.061,-0.114,0.411,-0.007,-0.408
主成分4,0.003,0.058,0.046,0.523,0.003,0.16
主成分5,-0.022,0.096,-0.234,0.43,-0.022,-1.311
主成分6,-0.012,0.065,-0.181,0.477,-0.012,-0.701
主成分7,-0.0,0.045,-0.003,0.486,-0.0,-0.008
主成分8,-0.004,0.052,-0.083,0.495,-0.004,-0.252
主成分9,0.0,0.048,0.009,0.449,0.0,0.025
