# 因子相关性分析

## 导入模块

In [1]:
import numpy as np
import pandas as pd
import feather
from matplotlib import pyplot as plt
from scipy import stats
import statsmodels.api as sm
import sunlandsdatasdk as sd

## 设置回测区间

In [2]:
start_date = '2019-08-01'
end_date = '2024-12-31'

## 读入数据

### 读入风险因子

In [3]:
fields = [
    'beta', 'book_to_price', 'earnings_yield', 'growth',
    'leverage', 'liquidity', 'momentum', 'size'
]

In [4]:
# sd.auth('*', '*')
# issues = sd.get_index_stocks('999998', weight=False)
# factor_exposure = sd.get_factor_exposure(
#     issues,
#     start_date=start_date, end_date=end_date,
#     factors=fields
# )

# risk_factors = (
#     factor_exposure[fields]
#         .reset_index()
#         .rename(columns={'order_book_id': 'issue'})
# )
# feather.write_dataframe(risk_factors, '../data/risk_factors_daily.feather')

In [5]:
risk_factors_daily = feather.read_dataframe('../data/risk_factors_daily.feather')

### 读入点度中心性因子、相对动量因子

In [6]:
# dirname_connect = '../data/N_connect/N_connect_0_1/'
dirname_connect = '../data/N_connect_daily/N_connect_0_1/'
N_connect = feather.read_dataframe(dirname_connect + '/neutral_N_connect.feather')
N_connect = N_connect.rename(columns={
    'indus_factor': 'indus_connect',
    'neutral_factor': 'neutral_connect'
})
# dirname_peer = '../data/peer_ret/'
dirname_peer = '../data/peer_ret_daily/'
relative_without_posjump = feather.read_dataframe(dirname_peer + '/neutral_peer_without_posjump.feather')
relative_without_posjump = relative_without_posjump.rename(columns={
    'indus_factor': 'indus_peer',
    'neutral_factor': 'neutral_peer'
})

In [7]:
factors = pd.merge(
    risk_factors_daily,
    N_connect[['issue', 'date', 'N_connect', 'indus_connect', 'neutral_connect']],
    on=['issue', 'date'],
    how='inner'
)
factors = pd.merge(
    factors,
    relative_without_posjump[['issue', 'date', 'peer_relative_ret', 'indus_peer', 'neutral_peer']],
    on=['issue', 'date'],
    how='inner'
)

## 计算相关性

In [8]:
def get_corr(factors:pd.DataFrame, focus:str, factor_list:list):
    df_corr = factors.apply(stats.spearmanr, b=factors[focus], axis=0)
    return df_corr.iloc[0]

In [9]:
connect = 'N_connect'
peer = 'peer_relative_ret'
factor_list = [connect, peer] + fields
df_corr_connect = (
    factors
        .groupby('date')[factor_list]
        .apply(get_corr, focus=connect, factor_list=factor_list)
).mean().to_frame().T
df_corr_peer = (
    factors
        .groupby('date')[factor_list]
        .apply(get_corr, focus=peer, factor_list=factor_list)
).mean().to_frame().T
df_corr = pd.concat([df_corr_connect, df_corr_peer]).reset_index(drop=True)
df_corr

Unnamed: 0,N_connect,peer_relative_ret,beta,book_to_price,earnings_yield,growth,leverage,liquidity,momentum,size
0,1.0,0.424904,0.111463,0.201216,0.095384,-0.00212,-0.017475,-0.152909,-0.185992,-0.086891
1,0.424904,1.0,-0.110948,0.244906,0.128905,-0.032163,0.092816,-0.252872,-0.190264,0.006846


## 多元线性回归剔除因子

In [10]:
x = factors[fields]
x = sm.add_constant(x)
y = factors[[connect, peer]]
result = sm.OLS(y, x).fit()
factor_connect = pd.concat([factors[['issue', 'date']], result.resid[connect]], axis=1)
feather.write_dataframe(factor_connect, dirname_connect + '/N_connect_residual.feather')
factor_peer = pd.concat([factors[['issue', 'date']], result.resid[peer]], axis=1)
feather.write_dataframe(factor_peer, dirname_peer + '/peer_without_posjump_residual.feather')