# 跳跃关联动量因子

## 导入模块

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather
import statsmodels.api as sm
from tqdm.notebook import tqdm
import os

## 读入日线数据

### 日线数据

In [2]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20241231.feather')
price_1d = price_1d[(price_1d['date'] >= '2019-01-01') & (price_1d['date'] <= '2024-12-31')]

### 调仓日

In [3]:
start_date = '2019-08-01'
end_date = '2025-01-01'
trade_date = price_1d['date'].sort_values().unique()
mes = pd.date_range(start=start_date, end=end_date, freq='1ME')
adj_date = np.array([], dtype=np.datetime64)
for me in mes:
    trade_date_before = trade_date[trade_date <= me]
    ad = trade_date_before[-1]
    adj_date = np.append(adj_date, ad)
df_adj = pd.DataFrame({'adj_date': adj_date})
feather.write_dataframe(df_adj, '../data/adj_date.feather')

## 计算过去 20 天收益率

In [4]:
%%time
price_1d['ret_plus'] = 1 + price_1d['ret']
price_1d['ret_20'] = (
    price_1d
        .groupby('issue')['ret_plus']
        .transform(lambda x: x.rolling(20).apply(np.prod, raw=True) - 1)
)
price_1d = price_1d.drop(columns='ret_plus')

CPU times: total: 26.5 s
Wall time: 27.2 s


## 跳跃关联动量

### 读入相关性 & 矩阵展平 & 稀疏化处理

In [5]:
def get_correlation(date, factor_type: str):
    date_str = date.strftime('%Y%m%d')
    corr = feather.read_dataframe(f'../data/corr/corr_{factor_type}_{date_str}.feather')
    np.fill_diagonal(corr.values, 0)
    corr = (
        corr
            .stack()
            .rename('corr')
            .rename_axis(['issue_i', 'issue_j'])
    )
    corr = corr.reset_index()
    
    med = corr.loc[corr['corr'] > 0, 'corr'].median()
    corr.loc[corr['corr'] < med, 'corr'] = 0

    return corr

### 计算绝对动量

In [6]:
def peer_ret_calc(corr_ret: pd.DataFrame):
    nume = (corr_ret['corr'] * corr_ret['ret_20']).sum()
    deno = corr_ret['corr'].sum()
    if deno == 0:
        return np.nan
    peer_ret = nume / deno
    return peer_ret

def get_peer_abs_ret(date, corr: pd.DataFrame):
    prc_date = price_1d.loc[price_1d['date'] == date, ['issue', 'ret_20']]
    corr_ret = pd.merge(
        corr,
        prc_date,
        left_on='issue_j',
        right_on='issue',
        how='left'
    )
    
    peer_ret = (
        corr_ret
            .groupby('issue_i')[['corr', 'ret_20']]
            .apply(peer_ret_calc)
    )
    peer_ret = peer_ret.dropna()
    peer_ret = peer_ret.rename('peer_ret')
    peer_ret = peer_ret.rename_axis(index = {'issue_i': 'issue'})
    peer_ret = peer_ret.to_frame().reset_index()
    
    peer_ret = pd.merge(
        peer_ret,
        prc_date,
        on='issue',
        how='left'
    )
    peer_ret = peer_ret.dropna(subset='ret_20')
    peer_ret['date'] = date
    return peer_ret

### 计算相对动量

In [7]:
def get_peer_relative_ret(peer_ret: pd.DataFrame, plot=False):
    x = peer_ret['ret_20']
    x = sm.add_constant(x)
    y = peer_ret['peer_ret']
    result = sm.OLS(y, x).fit()
    if plot:
        plt.hist2d(y, result.resid, bins=30)
        plt.show()
    return result.resid

### 规模运算 & 保存

In [8]:
def get_peer_ret_factor(factor_type: str):
    peer_ret = None
    for date in tqdm(adj_date):
        corr = get_correlation(date, factor_type)
        prd = get_peer_abs_ret(date, corr)
        prd['peer_relative_ret'] = get_peer_relative_ret(prd)
        prd = prd[['date', 'issue', 'ret_20', 'peer_ret', 'peer_relative_ret']]
        peer_ret = pd.concat([peer_ret, prd])
    return peer_ret
peer_ret_num = get_peer_ret_factor('num')
peer_ret_size = get_peer_ret_factor('size')

  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

In [9]:
os.makedirs('../data/peer_ret/', exist_ok=True)
feather.write_dataframe(peer_ret_num, '../data/peer_ret/peer_ret_num.feather')
feather.write_dataframe(peer_ret_size, '../data/peer_ret/peer_ret_size.feather')