# 跳跃关联动量因子

## 导入模块

In [97]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather
import statsmodels.api as sm

## 读入日线数据

### 日线数据

In [2]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20241231.feather')
price_1d = price_1d[(price_1d['date'] >= '2019-01-01') & (price_1d['date'] <= '2024-12-31')]

## 计算过去 20 天收益率

In [25]:
%%time
price_1d['ret_plus'] = 1 + price_1d['ret']
price_1d['ret_20'] = (
    price_1d
        .groupby('issue')['ret_plus']
        .transform(lambda x: x.rolling(20).apply(np.prod, raw=True) - 1)
)
price_1d = price_1d.drop(columns='ret_plus')

CPU times: total: 26 s
Wall time: 26.4 s


## 跳跃关联动量

### 读入相关性 & 矩阵展平 & 稀疏化处理

In [113]:
def get_correlation(date: str):
    date_pd = pd.to_datetime(date)
    date_str = date_pd.strftime('%Y%m%d')
    corr = feather.read_dataframe(f'../data/corr/corr_num_{date_str}.feather')
    np.fill_diagonal(corr.values, 0)
    corr = (
        corr
            .stack()
            .rename('corr')
            .rename_axis(['issue_i', 'issue_j'])
    )
    corr = corr.reset_index()
    
    med = corr.loc[corr['corr'] > 0, 'corr'].median()
    corr.loc[corr['corr'] < med, 'corr'] = 0

    return corr

### 计算绝对动量

In [116]:
def peer_ret_calc(corr_ret: pd.DataFrame):
    nume = (corr_ret['corr'] * corr_ret['ret_20']).sum()
    deno = corr_ret['corr'].sum()
    if deno == 0:
        return np.nan
    peer_ret = nume / deno
    return peer_ret

def get_peer_abs_ret(date: str, corr: pd.DataFrame):
    date_pd = pd.to_datetime(date)
    prc_date = price_1d.loc[price_1d['date'] == date_pd, ['issue', 'ret_20']]
    corr_ret = pd.merge(
        corr,
        prc_date,
        left_on='issue_j',
        right_on='issue',
        how='left'
    )
    
    peer_ret = (
        corr_ret
            .groupby('issue_i')[['corr', 'ret_20']]
            .apply(peer_ret_calc)
    )
    peer_ret = peer_ret.dropna()
    peer_ret = peer_ret.rename('peer_ret')
    peer_ret = peer_ret.rename_axis(index = {'issue_i': 'issue'})
    peer_ret = peer_ret.to_frame().reset_index()
    
    peer_ret = pd.merge(
        peer_ret,
        prc_date,
        on='issue',
        how='left'
    )
    peer_ret = peer_ret.dropna(subset='ret_20')
    peer_ret['date'] = date_pd
    return peer_ret

### 计算相对动量

In [117]:
def get_peer_relative_ret(peer_ret: pd.DataFrame, plot=False):
    x = peer_ret['ret_20']
    x = sm.add_constant(x)
    y = peer_ret['peer_ret']
    result = sm.OLS(y, x).fit()
    if plot:
        plt.hist2d(y, result.resid, bins=30)
        plt.show()
    return result.resid

### 规模运算

In [119]:
date = '2019-08-30'
corr = get_correlation(date)
peer_ret = get_peer_abs_ret(date, corr)
peer_ret['peer_relative_ret'] = get_peer_relative_ret(peer_ret)
peer_ret = peer_ret[['date', 'issue', 'ret_20', 'peer_ret', 'peer_relative_ret']]
peer_ret

Unnamed: 0,date,issue,ret_20,peer_ret,peer_relative_ret
0,2019-08-30,000001,0.030568,0.017180,0.001995
1,2019-08-30,000002,-0.041816,0.015868,0.001312
2,2019-08-30,000004,0.058133,0.020375,0.004951
3,2019-08-30,000005,0.099291,0.012767,-0.003015
4,2019-08-30,000006,0.033138,0.011101,-0.004107
...,...,...,...,...,...
3660,2019-08-30,688066,-0.136311,0.038520,0.024783
3661,2019-08-30,688088,-0.190850,0.017652,0.004390
3663,2019-08-30,688122,-0.261429,0.011305,-0.001345
3666,2019-08-30,688333,-0.251020,0.018824,0.006084
