# 跳跃关联动量因子

## 导入模块

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather
import statsmodels.api as sm
from tqdm.notebook import tqdm
import os
import sys

## 读入日线数据

### 日线数据

In [2]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20241231.feather')
price_1d = price_1d[(price_1d['date'] >= '2019-01-01') & (price_1d['date'] <= '2024-12-31')]

### 跳跃收益数据

In [3]:
jump = feather.read_dataframe('../data/jump_bak/jump.feather')
price_1d = pd.merge(
    price_1d,
    jump[['issue', 'date', 'ret_jump', 'ret_nojump', 'ret_posjump', 'ret_negjump']],
    on=['issue', 'date'],
    how='left'
)

In [4]:
price_1d.loc[
    (price_1d['ret'] < 0.01) & (price_1d['ret'] > -0.01),
    ['ret_jump', 'ret_nojump', 'ret_posjump', 'ret_negjump']
] = np.nan

price_1d['log_ret'] = np.log(1 + price_1d['ret'])
price_1d['ret_nojump'] = price_1d['ret_nojump'].fillna(price_1d['log_ret'])
price_1d = price_1d.fillna(0.)
price_1d['ret_without_posjump'] = price_1d['ret_nojump'] + price_1d['ret_negjump']

In [5]:
price_1d

Unnamed: 0,date,issue,preclose,open,high,low,close,numTrades,volume,value,adj,ret,is_limit_buy,is_limit_sell,ret_jump,ret_nojump,ret_posjump,ret_negjump,log_ret,ret_without_posjump
0,2019-01-02,000001,9.38,9.39,9.42,9.16,9.19,25140.0,53938632.0,4.986951e+08,108.031388,-0.020256,0.0,0.0,0.000000,-0.020464,0.000000,0.0,-0.020464,-0.020464
1,2019-01-03,000001,9.19,9.18,9.33,9.15,9.28,19151.0,41553795.0,3.844577e+08,108.031388,0.009793,0.0,0.0,0.000000,0.009746,0.000000,0.0,0.009746,0.009746
2,2019-01-04,000001,9.28,9.24,9.82,9.22,9.75,59551.0,148115906.0,1.422150e+09,108.031388,0.050647,0.0,0.0,0.000000,0.049406,0.000000,0.0,0.049406,0.049406
3,2019-01-07,000001,9.75,9.84,9.85,9.63,9.74,34912.0,86568766.0,8.411664e+08,108.031388,-0.001026,0.0,0.0,0.000000,-0.001026,0.000000,0.0,-0.001026,-0.001026
4,2019-01-08,000001,9.74,9.73,9.74,9.62,9.66,21454.0,40238811.0,3.892478e+08,108.031388,-0.008214,0.0,0.0,0.000000,-0.008247,0.000000,0.0,-0.008247,-0.008247
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6503290,2024-12-25,689009,45.40,45.39,45.58,44.30,45.01,15943.0,6357913.0,2.858237e+08,1.007044,-0.008590,0.0,0.0,0.000000,-0.008627,0.000000,0.0,-0.008627,-0.008627
6503291,2024-12-26,689009,45.01,45.11,46.09,44.88,45.09,13807.0,6335058.0,2.883992e+08,1.007044,0.001777,0.0,0.0,0.000000,0.001776,0.000000,0.0,0.001776,0.001776
6503292,2024-12-27,689009,45.09,45.12,48.08,45.06,46.84,24529.0,13048008.0,6.154821e+08,1.007044,0.038811,0.0,0.0,0.023654,0.021885,0.023654,0.0,0.038077,0.021885
6503293,2024-12-30,689009,46.84,46.36,48.63,46.30,48.13,24853.0,13080542.0,6.244473e+08,1.007044,0.027541,0.0,0.0,0.000000,0.027168,0.000000,0.0,0.027168,0.027168


### 调仓日

In [6]:
start_date = '2019-08-01'
end_date = '2025-01-01'
trade_date = price_1d['date'].sort_values().unique()
mes = pd.date_range(start=start_date, end=end_date, freq='1ME')
adj_date = np.array([], dtype=np.datetime64)
for me in mes:
    trade_date_before = trade_date[trade_date <= me]
    ad = trade_date_before[-1]
    adj_date = np.append(adj_date, ad)
df_adj = pd.DataFrame({'adj_date': adj_date})
feather.write_dataframe(df_adj, '../data/adj_date.feather')

## 计算过去 20 天收益率

In [7]:
%%time
ret_cols = ['log_ret', 'ret_nojump', 'ret_posjump', 'ret_negjump', 'ret_without_posjump']
ret_20_cols = [col + '_20' for col in ret_cols]
price_1d[ret_20_cols] = (
    price_1d
        .groupby('issue')[ret_cols]
        .transform(lambda x: x.rolling(20).apply(np.sum, raw=True))
)
price_1d['ret_20'] = np.exp(price_1d['log_ret_20']) - 1

CPU times: total: 2min
Wall time: 2min 6s


## 跳跃关联动量

### 读入相关性 & 矩阵展平 & 稀疏化处理

In [8]:
def get_correlation(date, factor_type: str):
    date_str = date.strftime('%Y%m%d')
    corr = feather.read_dataframe(f'../data/corr/corr_{factor_type}_{date_str}.feather')
    np.fill_diagonal(corr.values, 0)
    corr = (
        corr
            .stack()
            .rename('corr')
            .rename_axis(['issue_i', 'issue_j'])
    )
    corr = corr.reset_index()
    
    med = corr.loc[corr['corr'] > 0, 'corr'].median()
    corr.loc[corr['corr'] < med, 'corr'] = 0

    return corr

### 计算绝对动量

In [9]:
def peer_ret_calc(corr_ret:pd.DataFrame, ret_cols=['ret_20']):
    nume = corr_ret[ret_cols].mul(corr_ret['corr'], axis=0).sum()
    deno = corr_ret['corr'].sum()
    if deno == 0:
        return None
    peer_ret = nume / deno
    return peer_ret

def get_peer_abs_ret(date, corr:pd.DataFrame, price_1d:pd.DataFrame, ret_cols=['ret_20'], peer_cols=None):
    prc_date = price_1d.loc[price_1d['date'] == date, ['issue'] + ret_cols]
    corr_ret = pd.merge(
        corr,
        prc_date,
        left_on='issue_j',
        right_on='issue',
        how='left'
    )
    corr_ret = corr_ret.fillna(0.)
    
    peer_ret = (
        corr_ret
            .groupby('issue_i')[['corr'] + ret_cols]
            .apply(peer_ret_calc, ret_cols=ret_cols)
    )
    peer_ret = peer_ret.dropna()
    if peer_cols != None:
        map_ret_peer = {ret: peer for ret, peer in zip(ret_cols, peer_cols)}
    else:
        map_ret_peer = {ret: ret.replace('_20', '').replace('ret', 'peer') for ret in ret_cols}
    peer_ret = peer_ret.rename(columns=map_ret_peer)
    peer_ret = peer_ret.rename_axis('issue')
    peer_ret = peer_ret.reset_index()

    peer_ret = pd.merge(
        peer_ret,
        prc_date[['issue', 'ret_20']],
        on='issue',
        how='left'
    )
    peer_ret = peer_ret.dropna(subset='ret_20')
    peer_ret['date'] = date
    return peer_ret

### 计算相对动量

In [10]:
def get_peer_relative_ret(peer_ret:pd.DataFrame, peer_cols=['peer_ret'], relative_cols=None, plot=False):
    x = peer_ret['ret_20']
    x = sm.add_constant(x)
    y = peer_ret[peer_cols]
    result = sm.OLS(y, x).fit()

    relative_ret = result.resid
    if relative_cols != None:
        map_peer_relative = {peer: relative for peer, relative in zip(peer_cols, relative_cols)}
    else:
        map_peer_relative = {peer: peer.replace('peer', 'relative') for peer in peer_cols}
    relative_ret = relative_ret.rename(columns=map_peer_relative)
    return relative_ret

### 规模运算 & 保存

单次运行时间：10min 47s

In [25]:
def get_peer_ret_factor(factor_type:str, ret_cols=['ret_20'], peer_cols=None, relative_cols=None):
    if peer_cols == None:
        peer_cols = [ret_col.replace('_20', '').replace('ret', 'peer') for ret_col in ret_cols]
    if relative_cols == None:
        relative_cols = [peer_col.replace('peer', 'relative') for peer_col in peer_cols]

    peer_ret = None
    for date in tqdm(adj_date):
        corr = get_correlation(date, factor_type)
        prd = get_peer_abs_ret(
            date, corr,
            price_1d=price_1d,
            ret_cols=ret_cols, peer_cols=peer_cols
        )
        
        rrd = get_peer_relative_ret(
            prd,
            peer_cols=peer_cols, relative_cols=relative_cols
        )
        prd = pd.concat([prd, rrd], axis=1)
        peer_ret = pd.concat([peer_ret, prd], axis=0)
    return peer_ret

In [26]:
ret_cols = ['ret_20', 'ret_nojump_20', 'ret_posjump_20', 'ret_negjump_20', 'ret_without_posjump_20']
peer_cols = [ret_col.replace('_20', '').replace('ret', 'peer') for ret_col in ret_cols]
relative_cols = [peer_col.replace('peer', 'relative') for peer_col in peer_cols]
peer_ret_num = get_peer_ret_factor(
    'num',
    ret_cols=ret_cols,
    peer_cols=peer_cols, relative_cols=relative_cols
)
peer_ret_size = get_peer_ret_factor(
    'size',
    ret_cols=ret_cols,
    peer_cols=peer_cols, relative_cols=relative_cols
)

dirname = '../data/peer_ret/'
os.makedirs(dirname, exist_ok=True)
for peer, relative in zip(peer_cols, relative_cols):
    partial_num = peer_ret_num[['date', 'issue', peer, relative]]
    partial_num = partial_num.rename(columns={peer: 'peer_ret', relative: 'peer_relative_ret'})
    feather.write_dataframe(partial_num, dirname + peer + '_num.feather')

    partial_size = peer_ret_size[['date', 'issue', peer, relative]]
    partial_size = partial_size.rename(columns={peer: 'peer_ret', relative: 'peer_relative_ret'})
    feather.write_dataframe(partial_size, dirname + peer + '_size.feather')

  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

In [27]:
display(peer_ret_num)
display(peer_ret_size)

Unnamed: 0,issue,peer,peer_nojump,peer_posjump,peer_negjump,peer_without_posjump,ret_20,date,relative,relative_nojump,relative_posjump,relative_negjump,relative_without_posjump
0,000001,0.012749,-0.011877,0.106327,-0.087510,-0.099387,0.030568,2019-08-30,-0.000183,0.000778,-0.005317,0.004210,0.004988
1,000002,0.011109,-0.011485,0.103024,-0.085781,-0.097266,-0.041816,2019-08-30,-0.001095,0.001191,-0.007777,0.005756,0.006946
2,000004,0.022247,-0.011866,0.128540,-0.101561,-0.113427,0.058133,2019-08-30,0.009037,0.000780,0.016575,-0.009771,-0.008990
3,000005,0.012220,-0.011293,0.106055,-0.087856,-0.099149,0.099291,2019-08-30,-0.001404,0.001342,-0.006389,0.004039,0.005380
4,000006,0.011112,-0.010939,0.105274,-0.088390,-0.099329,0.033138,2019-08-30,-0.001846,0.001715,-0.006400,0.003337,0.005052
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5117,688799,-0.067454,-0.118520,0.130140,-0.087884,-0.206404,0.025465,2024-12-31,0.005496,0.008212,-0.012204,0.010565,0.018777
5118,688800,-0.071010,-0.128783,0.148146,-0.100150,-0.228933,0.124299,2024-12-31,0.000184,-0.002647,0.005376,-0.002511,-0.005158
5119,688819,-0.077978,-0.126466,0.135036,-0.096298,-0.222764,-0.056826,2024-12-31,-0.003566,0.000762,-0.006953,0.002825,0.003587
5120,688981,-0.078245,-0.131220,0.143609,-0.100687,-0.231906,0.084719,2024-12-31,-0.006348,-0.004845,0.001010,-0.002724,-0.007569


Unnamed: 0,issue,peer,peer_nojump,peer_posjump,peer_negjump,peer_without_posjump,ret_20,date,relative,relative_nojump,relative_posjump,relative_negjump,relative_without_posjump
0,000001,0.014476,-0.011743,0.107849,-0.087266,-0.099010,0.030568,2019-08-30,0.002264,0.000577,0.000035,0.001526,0.002102
1,000002,0.009836,-0.011757,0.101245,-0.084843,-0.096600,-0.041816,2019-08-30,-0.001469,0.000545,-0.005423,0.003673,0.004217
2,000004,0.018613,-0.009895,0.118337,-0.095605,-0.105501,0.058133,2019-08-30,0.006055,0.002431,0.010086,-0.006708,-0.004276
3,000005,0.012632,-0.012367,0.105666,-0.085611,-0.097978,0.099291,2019-08-30,-0.000441,-0.000030,-0.003237,0.003444,0.003414
4,000006,0.012528,-0.011875,0.103793,-0.084582,-0.096457,0.033138,2019-08-30,0.000284,0.000446,-0.004062,0.004220,0.004666
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5117,688799,-0.068664,-0.117285,0.127274,-0.087303,-0.204588,0.025465,2024-12-31,0.002917,0.008117,-0.014833,0.010707,0.018824
5118,688800,-0.068054,-0.129946,0.154410,-0.102183,-0.232129,0.124299,2024-12-31,0.001031,-0.005313,0.011154,-0.004865,-0.010177
5119,688819,-0.072307,-0.120807,0.129901,-0.090588,-0.211395,-0.056826,2024-12-31,0.001353,0.005235,-0.011249,0.007997,0.013232
5120,688981,-0.072438,-0.120369,0.130066,-0.091073,-0.211441,0.084719,2024-12-31,-0.002353,0.004572,-0.012730,0.006523,0.011095
