# 跳跃关联动量因子

## 导入模块

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather
import statsmodels.api as sm
from tqdm.notebook import tqdm
import os

## 读入日线数据

### 日线数据

In [2]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20241231.feather')
price_1d = price_1d[(price_1d['date'] >= '2019-01-01') & (price_1d['date'] <= '2024-12-31')]

### 跳跃收益数据

In [3]:
jump = feather.read_dataframe('../data/jump_bak/jump.feather')
price_1d = pd.merge(
    price_1d,
    jump[['issue', 'date', 'ret_jump', 'ret_nojump', 'ret_posjump', 'ret_negjump']],
    on=['issue', 'date'],
    how='left'
)
price_1d['log_ret'] = np.log(1 + price_1d['ret'])
price_1d['ret_nojump'] = price_1d['ret_nojump'].fillna(price_1d['log_ret'])
price_1d = price_1d.fillna(0.)
price_1d['ret_without_posjump'] = price_1d['ret_nojump'] + price_1d['ret_negjump']

In [4]:
price_1d

Unnamed: 0,date,issue,preclose,open,high,low,close,numTrades,volume,value,adj,ret,is_limit_buy,is_limit_sell,ret_jump,ret_nojump,ret_posjump,ret_negjump,log_ret,ret_without_posjump
0,2019-01-02,000001,9.38,9.39,9.42,9.16,9.19,25140.0,53938632.0,4.986951e+08,108.031388,-0.020256,0.0,0.0,0.000000,-0.020464,0.000000,0.00000,-0.020464,-0.020464
1,2019-01-03,000001,9.19,9.18,9.33,9.15,9.28,19151.0,41553795.0,3.844577e+08,108.031388,0.009793,0.0,0.0,0.019469,-0.014034,0.019469,0.00000,0.009746,-0.014034
2,2019-01-04,000001,9.28,9.24,9.82,9.22,9.75,59551.0,148115906.0,1.422150e+09,108.031388,0.050647,0.0,0.0,0.000000,0.049406,0.000000,0.00000,0.049406,0.049406
3,2019-01-07,000001,9.75,9.84,9.85,9.63,9.74,34912.0,86568766.0,8.411664e+08,108.031388,-0.001026,0.0,0.0,0.000000,-0.001026,0.000000,0.00000,-0.001026,-0.001026
4,2019-01-08,000001,9.74,9.73,9.74,9.62,9.66,21454.0,40238811.0,3.892478e+08,108.031388,-0.008214,0.0,0.0,0.000000,-0.008247,0.000000,0.00000,-0.008247,-0.008247
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6503290,2024-12-25,689009,45.40,45.39,45.58,44.30,45.01,15943.0,6357913.0,2.858237e+08,1.007044,-0.008590,0.0,0.0,-0.002425,-0.008888,0.006435,-0.00886,-0.008627,-0.017747
6503291,2024-12-26,689009,45.01,45.11,46.09,44.88,45.09,13807.0,6335058.0,2.883992e+08,1.007044,0.001777,0.0,0.0,0.000000,0.001776,0.000000,0.00000,0.001776,0.001776
6503292,2024-12-27,689009,45.09,45.12,48.08,45.06,46.84,24529.0,13048008.0,6.154821e+08,1.007044,0.038811,0.0,0.0,0.023654,0.021885,0.023654,0.00000,0.038077,0.021885
6503293,2024-12-30,689009,46.84,46.36,48.63,46.30,48.13,24853.0,13080542.0,6.244473e+08,1.007044,0.027541,0.0,0.0,0.000000,0.027168,0.000000,0.00000,0.027168,0.027168


### 调仓日

In [5]:
start_date = '2019-08-01'
end_date = '2025-01-01'
trade_date = price_1d['date'].sort_values().unique()
mes = pd.date_range(start=start_date, end=end_date, freq='1ME')
adj_date = np.array([], dtype=np.datetime64)
for me in mes:
    trade_date_before = trade_date[trade_date <= me]
    ad = trade_date_before[-1]
    adj_date = np.append(adj_date, ad)
df_adj = pd.DataFrame({'adj_date': adj_date})
feather.write_dataframe(df_adj, '../data/adj_date.feather')

## 计算过去 20 天收益率

In [7]:
%%time
ret_cols = ['log_ret', 'ret_nojump', 'ret_posjump', 'ret_negjump', 'ret_without_posjump']
ret_20_cols = [col + '_20' for col in ret_cols]
price_1d[ret_20_cols] = (
    price_1d
        .groupby('issue')[ret_cols]
        .transform(lambda x: x.rolling(20).apply(np.sum, raw=True))
)
price_1d['ret_20'] = np.exp(price_1d['log_ret_20']) - 1

CPU times: total: 2min 5s
Wall time: 2min 11s


In [8]:
price_1d.head(30)

Unnamed: 0,date,issue,preclose,open,high,low,close,numTrades,volume,value,...,ret_posjump,ret_negjump,log_ret,ret_without_posjump,log_ret_20,ret_nojump_20,ret_posjump_20,ret_negjump_20,ret_without_posjump_20,ret_20
0,2019-01-02,1,9.38,9.39,9.42,9.16,9.19,25140.0,53938632.0,498695100.0,...,0.0,0.0,-0.020464,-0.020464,,,,,,
1,2019-01-03,1,9.19,9.18,9.33,9.15,9.28,19151.0,41553795.0,384457700.0,...,0.019469,0.0,0.009746,-0.014034,,,,,,
2,2019-01-04,1,9.28,9.24,9.82,9.22,9.75,59551.0,148115906.0,1422150000.0,...,0.0,0.0,0.049406,0.049406,,,,,,
3,2019-01-07,1,9.75,9.84,9.85,9.63,9.74,34912.0,86568766.0,841166400.0,...,0.0,0.0,-0.001026,-0.001026,,,,,,
4,2019-01-08,1,9.74,9.73,9.74,9.62,9.66,21454.0,40238811.0,389247800.0,...,0.0,0.0,-0.008247,-0.008247,,,,,,
5,2019-01-09,1,9.66,9.74,10.08,9.7,9.94,50505.0,123348636.0,1229465000.0,...,0.0,0.0,0.028573,0.028573,,,,,,
6,2019-01-10,1,9.94,9.87,10.2,9.86,10.1,42926.0,107181766.0,1079711000.0,...,0.0,0.0,0.015968,0.015968,,,,,,
7,2019-01-11,1,10.1,10.11,10.22,10.05,10.2,27565.0,69636455.0,708001800.0,...,0.0,0.0,0.009852,0.009852,,,,,,
8,2019-01-14,1,10.2,10.22,10.25,10.07,10.11,24244.0,50044359.0,507862900.0,...,0.0,0.0,-0.008863,-0.008863,,,,,,
9,2019-01-15,1,10.11,10.11,10.28,10.09,10.24,26586.0,54216055.0,553027300.0,...,0.0,0.0,0.012777,0.012777,,,,,,


## 跳跃关联动量

### 读入相关性 & 矩阵展平 & 稀疏化处理

In [5]:
def get_correlation(date, factor_type: str):
    date_str = date.strftime('%Y%m%d')
    corr = feather.read_dataframe(f'../data/corr/corr_{factor_type}_{date_str}.feather')
    np.fill_diagonal(corr.values, 0)
    corr = (
        corr
            .stack()
            .rename('corr')
            .rename_axis(['issue_i', 'issue_j'])
    )
    corr = corr.reset_index()
    
    med = corr.loc[corr['corr'] > 0, 'corr'].median()
    corr.loc[corr['corr'] < med, 'corr'] = 0

    return corr

### 计算绝对动量

In [6]:
def peer_ret_calc(corr_ret: pd.DataFrame):
    nume = (corr_ret['corr'] * corr_ret['ret_20']).sum()
    deno = corr_ret['corr'].sum()
    if deno == 0:
        return np.nan
    peer_ret = nume / deno
    return peer_ret

def get_peer_abs_ret(date, corr: pd.DataFrame):
    prc_date = price_1d.loc[price_1d['date'] == date, ['issue', 'ret_20']]
    corr_ret = pd.merge(
        corr,
        prc_date,
        left_on='issue_j',
        right_on='issue',
        how='left'
    )
    
    peer_ret = (
        corr_ret
            .groupby('issue_i')[['corr', 'ret_20']]
            .apply(peer_ret_calc)
    )
    peer_ret = peer_ret.dropna()
    peer_ret = peer_ret.rename('peer_ret')
    peer_ret = peer_ret.rename_axis(index = {'issue_i': 'issue'})
    peer_ret = peer_ret.to_frame().reset_index()
    
    peer_ret = pd.merge(
        peer_ret,
        prc_date,
        on='issue',
        how='left'
    )
    peer_ret = peer_ret.dropna(subset='ret_20')
    peer_ret['date'] = date
    return peer_ret

### 计算相对动量

In [7]:
def get_peer_relative_ret(peer_ret: pd.DataFrame, plot=False):
    x = peer_ret['ret_20']
    x = sm.add_constant(x)
    y = peer_ret['peer_ret']
    result = sm.OLS(y, x).fit()
    if plot:
        plt.hist2d(y, result.resid, bins=30)
        plt.show()
    return result.resid

### 规模运算 & 保存

In [8]:
def get_peer_ret_factor(factor_type: str):
    peer_ret = None
    for date in tqdm(adj_date):
        corr = get_correlation(date, factor_type)
        prd = get_peer_abs_ret(date, corr)
        prd['peer_relative_ret'] = get_peer_relative_ret(prd)
        prd = prd[['date', 'issue', 'ret_20', 'peer_ret', 'peer_relative_ret']]
        peer_ret = pd.concat([peer_ret, prd])
    return peer_ret
peer_ret_num = get_peer_ret_factor('num')
peer_ret_size = get_peer_ret_factor('size')

  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

In [9]:
os.makedirs('../data/peer_ret/', exist_ok=True)
feather.write_dataframe(peer_ret_num, '../data/peer_ret/peer_ret_num.feather')
feather.write_dataframe(peer_ret_size, '../data/peer_ret/peer_ret_size.feather')