# 关联跳跃和跳跃关联度

## 导入模块

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather
import sys

## 读入数据

### 股价跳跃数据及处理

In [2]:
jump = feather.read_dataframe('../data/jump/jump.feather')
jump['date'] = pd.to_datetime(jump['date'])

In [3]:
jump = jump[(jump['ret_jump'] > 0.01) | (jump['ret_jump'] < -0.01)]
jump['sign'] = jump['ret_jump'].apply(np.sign)

## 关联跳跃

### 前一日后一日跳跃方向

In [4]:
jump_plus1 = jump[['issue', 'date', 'sign']].copy()
jump_plus1['date'] = jump_plus1['date'] + pd.Timedelta('1d')
jump_plus1 = jump_plus1.rename(columns={'sign': 'sign_yest'})
jump = pd.merge(
    jump,
    jump_plus1,
    on=['issue', 'date'],
    how='left'
)
jump_minus1 = jump[['issue', 'date', 'sign']].copy()
jump_minus1['date'] = jump_minus1['date'] - pd.Timedelta('1d')
jump_minus1 = jump_minus1.rename(columns={'sign': 'sign_tomo'})
jump = pd.merge(
    jump,
    jump_minus1,
    on=['issue', 'date'],
    how='left'
)
jump[['sign_yest', 'sign_tomo']] = jump[['sign_yest', 'sign_tomo']].fillna(0.)

### 关联跳跃矩阵

index: 每一次跳跃

column: 每一家公司

In [5]:
issues = jump['issue'].sort_values().unique()
global jump_corr
jump_corr = pd.DataFrame(data=False, index=jump.index, columns=issues, dtype=bool)

### 找出一日内的关联跳跃

In [6]:
def identify_corr(jump_date):
    global jump_corr
    
    idx_pos = jump_date.loc[jump_date['sign'] == 1].index
    idx_pos_corr = (jump_date['sign'] == 1) | (jump_date['sign_yest'] == 1) | (jump_date['sign_tomo'] == 1)
    issues_pos = jump_date.loc[idx_pos_corr, 'issue'].to_numpy()
    jump_corr.loc[idx_pos, issues_pos] = True
    
    idx_neg = jump_date.loc[jump_date['sign'] == -1].index
    idx_neg_corr = (jump_date['sign'] == -1) | (jump_date['sign_yest'] == -1) | (jump_date['sign_tomo'] == -1)
    issues_neg = jump_date.loc[idx_neg_corr, 'issue'].to_numpy()
    jump_corr.loc[idx_neg, issues_neg] = True

### 关联跳跃 & 保存

In [7]:
%%time
jump.groupby('date')[['issue', 'sign', 'sign_yest', 'sign_tomo']].apply(identify_corr)

CPU times: total: 15.4 s
Wall time: 15.7 s


In [8]:
# %%time
# feather.write_dataframe(jump_corr, '../data/jump/jump_corr.feather')

CPU times: total: 2min 19s
Wall time: 39.2 s


## 跳跃关联度

计算每个调仓日和对应的构建关联度开始时间 (120 天前)

In [80]:
trade_date = jump['date'].sort_values().unique()
mes = pd.date_range(start=trade_date[0], end=trade_date[-1], freq='1ME')
adj_date = np.array([], dtype=np.datetime64)
calc_start_date = np.array([], dtype=np.datetime64)

for me in mes:
    trade_date_before = trade_date[trade_date <= me]
    ad = trade_date_before[-1]
    adj_date = np.append(adj_date, ad)
    if (len(trade_date_before) > 120):
        csd = trade_date_before[-120]
    else:
        csd = trade_date_before[0]
    calc_start_date = np.append(calc_start_date, csd)

### 一家公司一个调仓日的跳跃关联度

In [94]:
%%time

ad = adj_date[0]
csd = calc_start_date[0]
issue = '000001'

corr_num = pd.DataFrame(columns=np.append(['issue', 'date'], issues))
corr_size = pd.DataFrame(columns=np.append(['issue', 'date'], issues))

idx_date = (jump['date'] >= csd) & (jump['date'] <= ad)
idx = (jump['issue'] == issue) & idx_date
jump_corr_date = jump_corr.loc[idx]

CPU times: total: 375 ms
Wall time: 371 ms


In [95]:
%%time
sum_num = jump.loc[idx, 'jump'].count()
corr_num.loc[0, issues] = jump_corr_date.loc[idx].sum() / sum_num
abs_ret_jump = jump.loc[idx, 'ret_jump'].apply(np.abs)
sum_size = abs_ret_jump.sum()
corr_size.loc[0, issues] = jump_corr_date.loc[idx].mul(abs_ret_jump, axis=0).sum() / sum_size
corr_num

CPU times: total: 15.6 ms
Wall time: 13.9 ms


Unnamed: 0,issue,date,000001,000002,000004,000005,000006,000007,000008,000009,...,688787,688788,688789,688793,688798,688799,688800,688819,688981,689009
0,,,1.0,0.2,0.2,0.0,0.2,0.2,0.0,0.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
