# 因子相关性分析

## 导入模块

In [1]:
import numpy as np
import pandas as pd
import feather
from matplotlib import pyplot as plt
from scipy import stats
import sunlandsdatasdk as sd

## 设置回测区间

In [2]:
start_date = '2019-08-01'
end_date = '2024-12-31'

## 读入数据

### 读入风险因子

In [3]:
fields = [
    'beta', 'book_to_price', 'earnings_yield', 'growth',
    'leverage', 'liquidity', 'momentum', 'size'
]

In [4]:
# sd.auth('*', '*')
# issues = sd.get_index_stocks('999998', weight=False)
# factor_exposure = sd.get_factor_exposure(
#     issues,
#     start_date=start_date, end_date=end_date,
#     factors=fields
# )

# risk_factors = (
#     factor_exposure[fields]
#         .reset_index()
#         .rename(columns={'order_book_id': 'issue'})
# )
# feather.write_dataframe(risk_factors, '../data/risk_factors_daily.feather')

In [5]:
risk_factors_daily = feather.read_dataframe('../data/risk_factors_daily.feather')

### 读入点度中心性因子、相对动量因子

In [6]:
N_connect = feather.read_dataframe('../data/N_connect/N_connect_0_1/neutral_N_connect.feather')
N_connect = N_connect.rename(columns={
    'indus_factor': 'indus_connect',
    'neutral_factor': 'neutral_connect'
})
relative_without_posjump = feather.read_dataframe('../data/peer_ret/neutral_peer_without_posjump.feather')
relative_without_posjump = relative_without_posjump.rename(columns={
    'indus_factor': 'indus_peer',
    'neutral_factor': 'neutral_peer'
})

In [7]:
factors = pd.merge(
    risk_factors_daily,
    N_connect[['issue', 'date', 'N_connect', 'indus_connect', 'neutral_connect']],
    on=['issue', 'date'],
    how='inner'
)
factors = pd.merge(
    factors,
    relative_without_posjump[['issue', 'date', 'peer_relative_ret', 'indus_peer', 'neutral_peer']],
    on=['issue', 'date'],
    how='inner'
)

## 计算相关性

In [8]:
def get_corr(factors:pd.DataFrame, focus:str, factor_list:list):
    df_corr = factors.apply(stats.spearmanr, b=factors[focus], axis=0)
    return df_corr.iloc[0]

In [9]:
f1 = 'N_connect'
f2 = 'peer_relative_ret'
factor_list = [f1, f2] + fields
df_corr_connect = (
    factors
        .groupby('date')[factor_list]
        .apply(get_corr, focus=f1, factor_list=factor_list)
).mean().to_frame().T
df_corr_peer = (
    factors
        .groupby('date')[factor_list]
        .apply(get_corr, focus=f2, factor_list=factor_list)
).mean().to_frame().T
df_corr = pd.concat([df_corr_connect, df_corr_peer]).reset_index(drop=True)
df_corr

Unnamed: 0,N_connect,peer_relative_ret,beta,book_to_price,earnings_yield,growth,leverage,liquidity,momentum,size
0,1.0,0.429678,0.11127,0.200229,0.094464,-0.001618,-0.019364,-0.153215,-0.181602,-0.087161
1,0.429678,1.0,-0.112829,0.243645,0.129954,-0.0274,0.097073,-0.256727,-0.188764,0.008491


In [10]:
f1 = 'neutral_connect'
f2 = 'neutral_peer'
factor_list = [f1, f2] + fields
df_corr_connect = (
    factors
        .groupby('date')[factor_list]
        .apply(get_corr, focus=f1, factor_list=factor_list)
).mean().to_frame().T
df_corr_peer = (
    factors
        .groupby('date')[factor_list]
        .apply(get_corr, focus=f2, factor_list=factor_list)
).mean().to_frame().T
df_corr = pd.concat([df_corr_connect, df_corr_peer]).reset_index(drop=True)
df_corr

Unnamed: 0,neutral_connect,neutral_peer,beta,book_to_price,earnings_yield,growth,leverage,liquidity,momentum,size
0,1.0,0.435245,0.087737,0.200888,0.112309,0.010274,-0.002613,-0.16957,-0.147399,0.00206
1,0.435245,1.0,-0.050194,0.18273,0.081298,-0.019475,0.046282,-0.199396,-0.150696,0.000986
