# 检查跳跃关联度

## 导入模块

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather

## 读入数据

### 日线

In [2]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20241231.feather')
price_1d = price_1d[(price_1d['date'] >= '2019-01-01') & (price_1d['date'] <= '2024-12-31')]
price_1d = price_1d[(price_1d['ret'] > 0.01) | (price_1d['ret'] < -0.01)]

### 跳跃

In [3]:
jump = feather.read_dataframe('../data/jump/jump.feather')
jump = pd.merge(
    jump,
    price_1d[['issue', 'date']],
    on=['issue', 'date'],
    how='inner'
)

### 调仓日和对应的构建关联度开始时间 (120 天前)

In [8]:
trade_date = price_1d['date'].sort_values().unique()
mes = pd.date_range(start='2019-01-01', end='2024-12-31', freq='1ME')
adj_date = np.array([], dtype=np.datetime64)
calc_start_date = np.array([], dtype=np.datetime64)

for me in mes:
    trade_date_before = trade_date[trade_date <= me]
    ad = trade_date_before[-1]
    adj_date = np.append(adj_date, ad)
    if (len(trade_date_before) > 120):
        csd = trade_date_before[-120]
    else:
        csd = trade_date_before[0]
    calc_start_date = np.append(calc_start_date, csd)

In [10]:
ad = adj_date[7]
ad_str = ad.strftime('%Y%m%d')
csd = calc_start_date[7]
print(ad, csd)

2019-08-30 00:00:00 2019-03-11 00:00:00


### 跳跃关联度

In [11]:
corr_num = feather.read_dataframe(f'../data/corr/corr_num_{ad_str}.feather')
corr_size = feather.read_dataframe(f'../data/corr/corr_size_{ad_str}.feather')

## 检查 601921, 300556, 301486 在 2024-08-20 的关联跳跃

In [12]:
display(jump[(jump['date'] == '2024-08-20') & (jump['issue'] == '601921')])
display(jump[(jump['date'] == '2024-08-20') & (jump['issue'] == '300556')])
display(jump[(jump['date'] == '2024-08-20') & (jump['issue'] == '301486')])

Unnamed: 0,issue,date,jump,jump_count,ret_jump,ret_nojump,ret_posjump,ret_negjump,year_mon
1473279,601921,2024-08-20,True,2,0.077015,0.018295,0.077015,0.0,202408


Unnamed: 0,issue,date,jump,jump_count,ret_jump,ret_nojump,ret_posjump,ret_negjump,year_mon
857894,300556,2024-08-20,True,2,0.067435,0.010274,0.067435,0.0,202408


Unnamed: 0,issue,date,jump,jump_count,ret_jump,ret_nojump,ret_posjump,ret_negjump,year_mon
1120096,301486,2024-08-20,True,2,0.077936,-0.007949,0.077936,0.0,202408


## 检查股票 000001 与 000002 在 2019-08-30 的跳跃关联度

In [13]:
issue_i = '000001'
issue_j = '000002'
idx_date = (jump['date'] >= csd) & (jump['date'] <= ad)

### 股票 i, j 的跳跃

In [14]:
jump_i = jump[idx_date & (jump['issue'] == issue_i)].copy()
jump_i

Unnamed: 0,issue,date,jump,jump_count,ret_jump,ret_nojump,ret_posjump,ret_negjump,year_mon
11,1,2019-03-25,True,1,-0.015206,-0.02366492,0.0,-0.015206,201903
12,1,2019-03-27,True,1,0.011504,0.01137299,0.011504,0.0,201903
13,1,2019-03-28,True,1,-0.005703,-0.00730565,0.0,-0.005703,201903
14,1,2019-03-29,True,2,0.020121,0.02781167,0.020121,0.0,201903
15,1,2019-04-01,True,1,0.020062,0.007631676,0.020062,0.0,201904
16,1,2019-04-02,True,1,0.007559,0.006006024,0.007559,0.0,201904
17,1,2019-04-11,True,1,0.015896,-0.02983124,0.015896,0.0,201904
18,1,2019-04-15,True,2,0.040882,-0.0209621,0.040882,0.0,201904
19,1,2019-04-17,True,1,-0.011035,-0.00486619,0.0,-0.011035,201904
20,1,2019-04-24,True,1,0.02665,-0.0006922811,0.02665,0.0,201904


In [15]:
jump_j = jump[idx_date & (jump['issue'] == issue_j)].copy()
jump_j

Unnamed: 0,issue,date,jump,jump_count,ret_jump,ret_nojump,ret_posjump,ret_negjump,year_mon
418,2,2019-03-18,True,4,0.02403,0.021242,0.02403,0.0,201903
419,2,2019-03-20,True,1,0.015409,-0.002457,0.015409,0.0,201903
420,2,2019-03-25,True,1,-0.012063,-0.023861,0.0,-0.012063,201903
421,2,2019-03-29,True,3,0.022184,0.042354,0.022184,0.0,201903
422,2,2019-04-09,True,2,0.026902,0.03279,0.026902,0.0,201904
423,2,2019-04-10,True,1,-0.017476,-0.003959,0.0,-0.017476,201904
424,2,2019-04-22,True,5,-0.035522,-0.029101,0.0,-0.035522,201904
425,2,2019-04-26,True,1,-0.009916,-0.000688,0.0,-0.009916,201904
426,2,2019-04-30,True,2,-0.023809,0.007666,0.0,-0.023809,201904
427,2,2019-05-06,True,1,-0.030945,-0.032671,0.0,-0.030945,201905


### 以股票 i 为焦点的关联跳跃

| jump_date_i | direction | corr_jump_date_j |
| :---------: | :-------: | :--------------: |
| 3.29        | +         | 3.29             |
| 4.1         | +         |                  |
| 4.2         | -         |                  |
| 4.11        | +         | 4.11             |
| 4.15        | +         |                  |
| 4.30        | -         | 4.30             |
| 5.9         | +         | 5.8              |
| 5.10        | -         | 5.10             |
| 5.14        | +         |                  |
| 5.15        | +         |                  |
| 5.17        | -         | 5.17             |
| 5.22        | -         |                  |
| 5.30        | -         |                  |
| 6.11        | +         | 6.11             |
| 6.18        | +         |                  |
| 6.19        | -         |                  |
| 6.21        | -         |                  |
| 6.27        | +         |                  |
| 7.2         | +         | 7.3              |
| 7.3         | -         |                  |
| 7.16        | -         |                  |
| 7.22        | -         | 7.23             |
| 7.31        | -         | 7.31             |
| 8.2         | -         | 8.2              |
| 8.5         | -         | 8.5              |
| 8.7         | +         | 8.7              |
| 8.8         | +         | 8.8              |
| 8.12        | +         | 8.13             |
| 8.13        | +         | 8.13             |
| 8.21        | -         |                  |
| 8.23        | -         |                  |
| 8.26        | -         |                  |

In [10]:
print(corr_num.loc[issue_i, issue_j], corr_size.loc[issue_i, issue_j])

0.5625 0.5936543984419171


In [11]:
dates = np.array([
    '2019-03-29', '2019-04-01', '2019-04-02', '2019-04-11', '2019-04-15',
    '2019-04-30', '2019-05-09', '2019-05-10', '2019-05-14', '2019-05-15',
    '2019-05-17', '2019-05-22', '2019-05-30', '2019-06-11', '2019-06-18',
    '2019-06-19', '2019-06-21', '2019-06-27', '2019-07-02', '2019-07-03',
    '2019-07-16', '2019-07-22', '2019-07-31', '2019-08-02', '2019-08-05',
    '2019-08-07', '2019-08-08', '2019-08-12', '2019-08-13', '2019-08-21',
    '2019-08-23', '2019-08-26'
], dtype=np.datetime64)
dates_corr = np.array([
    '2019-03-29', '2019-04-11', '2019-04-30', '2019-05-09', '2019-05-10',
    '2019-05-17', '2019-06-11', '2019-07-02', '2019-07-22', '2019-07-31',
    '2019-08-02', '2019-08-05', '2019-08-07', '2019-08-08', '2019-08-12',
    '2019-08-13'
], dtype=np.datetime64)

In [12]:
len(dates_corr) / len(dates)

0.5

In [13]:
jump_i['abs_ret_jump'] = jump_i['ret_jump'].apply(np.abs)
jump_i.loc[jump_i['date'].isin(dates_corr), 'abs_ret_jump'].sum() / jump_i['abs_ret_jump'].sum()

np.float64(0.524812275828415)