# 关联跳跃和跳跃关联度

## 导入模块

In [118]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather
import sys
%load_ext line_profiler

## 读入数据

### 股价跳跃数据及处理

In [2]:
jump = feather.read_dataframe('../data/jump/jump.feather')
jump['date'] = pd.to_datetime(jump['date'])

In [3]:
jump = jump[(jump['ret_jump'] > 0.01) | (jump['ret_jump'] < -0.01)]
jump['sign'] = jump['ret_jump'].apply(np.sign)

## 关联跳跃

### 前一日后一日跳跃方向

In [4]:
jump_plus1 = jump[['issue', 'date', 'sign']].copy()
jump_plus1['date'] = jump_plus1['date'] + pd.Timedelta('1d')
jump_plus1 = jump_plus1.rename(columns={'sign': 'sign_yest'})
jump = pd.merge(
    jump,
    jump_plus1,
    on=['issue', 'date'],
    how='left'
)
jump_minus1 = jump[['issue', 'date', 'sign']].copy()
jump_minus1['date'] = jump_minus1['date'] - pd.Timedelta('1d')
jump_minus1 = jump_minus1.rename(columns={'sign': 'sign_tomo'})
jump = pd.merge(
    jump,
    jump_minus1,
    on=['issue', 'date'],
    how='left'
)
jump[['sign_yest', 'sign_tomo']] = jump[['sign_yest', 'sign_tomo']].fillna(0.)

### 关联跳跃矩阵

index: 每一次跳跃

column: 每一家公司

In [5]:
issues = jump['issue'].sort_values().unique()
global jump_corr
jump_corr = pd.DataFrame(data=False, index=jump.index, columns=issues, dtype=bool)

### 找出一日内的关联跳跃

In [6]:
def identify_corr(jump_date):
    global jump_corr
    
    idx_pos = jump_date.loc[jump_date['sign'] == 1].index
    idx_pos_corr = (jump_date['sign'] == 1) | (jump_date['sign_yest'] == 1) | (jump_date['sign_tomo'] == 1)
    issues_pos = jump_date.loc[idx_pos_corr, 'issue'].to_numpy()
    jump_corr.loc[idx_pos, issues_pos] = True
    
    idx_neg = jump_date.loc[jump_date['sign'] == -1].index
    idx_neg_corr = (jump_date['sign'] == -1) | (jump_date['sign_yest'] == -1) | (jump_date['sign_tomo'] == -1)
    issues_neg = jump_date.loc[idx_neg_corr, 'issue'].to_numpy()
    jump_corr.loc[idx_neg, issues_neg] = True

### 关联跳跃 & 保存

In [7]:
%%time
jump.groupby('date')[['issue', 'sign', 'sign_yest', 'sign_tomo']].apply(identify_corr)

CPU times: total: 15.4 s
Wall time: 15.7 s


In [8]:
# %%time
# feather.write_dataframe(jump_corr, '../data/jump/jump_corr.feather')

CPU times: total: 2min 19s
Wall time: 39.2 s


## 跳跃关联度

计算每个调仓日和对应的构建关联度开始时间 (120 天前)

In [80]:
trade_date = jump['date'].sort_values().unique()
mes = pd.date_range(start=trade_date[0], end=trade_date[-1], freq='1ME')
adj_date = np.array([], dtype=np.datetime64)
calc_start_date = np.array([], dtype=np.datetime64)

for me in mes:
    trade_date_before = trade_date[trade_date <= me]
    ad = trade_date_before[-1]
    adj_date = np.append(adj_date, ad)
    if (len(trade_date_before) > 120):
        csd = trade_date_before[-120]
    else:
        csd = trade_date_before[0]
    calc_start_date = np.append(calc_start_date, csd)

### 一家公司一个调仓日的跳跃关联度

In [180]:
def corr_calc(jump, jump_corr_date):
    issue = jump.iloc[-1]['issue']
    date = jump.iloc[-1]['date']
    
    idx = jump.index
    sum_num = jump['jump'].count()
    corr_num = jump_corr_date.loc[idx].sum() / sum_num
    abs_ret_jump = jump['ret_jump'].apply(np.abs)
    sum_size = abs_ret_jump.sum()
    corr_size = jump_corr_date.loc[idx].mul(abs_ret_jump, axis=0).sum() / sum_size

    corr_num = corr_num
    corr_num['issue'] = issue
    corr_num['date'] = date
    corr_num['type'] = 'num'
    corr_size = corr_size
    corr_size['issue'] = issue
    corr_size['date'] = date
    corr_size['type'] = 'size'
    
    return pd.concat([corr_num, corr_size], axis=1).T

In [190]:
%%time

ad = adj_date[-1]
csd = calc_start_date[-1]
issue = '000001'

jump_corr_date = jump_corr[(jump['date'] <= ad) & (jump['date'] >= csd)]
jump_date = jump[(jump['date'] <= ad) & (jump['date'] >= csd)]

corr = jump_date.groupby('issue')[['issue', 'date', 'jump', 'ret_jump']].apply(corr_calc, jump_corr_date=jump_corr_date).reset_index(drop=True)
corr

CPU times: total: 1min 16s
Wall time: 1min 17s


Unnamed: 0,000001,000002,000004,000005,000006,000007,000008,000009,000010,000011,...,688793,688798,688799,688800,688819,688981,689009,issue,date,type
0,1.0,0.333333,0.666667,0.0,0.333333,0.083333,0.083333,0.166667,0.083333,0.416667,...,0.333333,0.416667,0.166667,0.5,0.416667,0.25,0.5,000001,2024-05-16 00:00:00,num
1,1.0,0.247887,0.589104,0.0,0.323013,0.071833,0.114413,0.191161,0.061355,0.372779,...,0.368316,0.420397,0.19816,0.431921,0.43474,0.247485,0.511814,000001,2024-05-16 00:00:00,size
2,0.125,1.0,0.40625,0.0,0.3125,0.21875,0.03125,0.28125,0.125,0.3125,...,0.21875,0.21875,0.125,0.25,0.1875,0.1875,0.34375,000002,2024-05-31 00:00:00,num
3,0.144848,1.0,0.401638,0.0,0.306632,0.251252,0.038801,0.330694,0.179939,0.354945,...,0.176012,0.242091,0.121821,0.26657,0.183185,0.192419,0.36427,000002,2024-05-31 00:00:00,size
4,0.090909,0.136364,1.0,0.0,0.181818,0.151515,0.045455,0.181818,0.257576,0.166667,...,0.409091,0.318182,0.257576,0.378788,0.257576,0.090909,0.378788,000004,2024-05-29 00:00:00,num
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10223,0.19211,0.160499,0.50424,0.0,0.351352,0.172318,0.035229,0.34583,0.156474,0.232681,...,0.415122,0.407669,0.295591,0.396793,1.0,0.21584,0.421067,688819,2024-05-21 00:00:00,size
10224,0.142857,0.238095,0.333333,0.0,0.238095,0.238095,0.0,0.285714,0.095238,0.238095,...,0.380952,0.333333,0.285714,0.428571,0.333333,1.0,0.333333,688981,2024-05-30 00:00:00,num
10225,0.171304,0.241275,0.36179,0.0,0.28808,0.370795,0.0,0.270548,0.106046,0.279355,...,0.45301,0.359912,0.341969,0.428268,0.367323,1.0,0.371767,688981,2024-05-30 00:00:00,size
10226,0.098039,0.215686,0.529412,0.0,0.137255,0.196078,0.019608,0.254902,0.294118,0.176471,...,0.372549,0.411765,0.196078,0.392157,0.333333,0.117647,1.0,689009,2024-05-28 00:00:00,num
