# 关联跳跃和跳跃关联度

## 导入模块

In [9]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather
import sys

## 读入数据

### 股价跳跃数据及处理

In [2]:
jump = feather.read_dataframe('../data/jump/jump.feather')
jump['date'] = pd.to_datetime(jump['date'])

In [3]:
jump = jump[(jump['ret_jump'] > 0.01) | (jump['ret_jump'] < -0.01)]
jump['sign'] = jump['ret_jump'].apply(np.sign)

## 关联跳跃

### 前一日后一日跳跃方向

In [4]:
jump_plus1 = jump[['issue', 'date', 'sign']].copy()
jump_plus1['date'] = jump_plus1['date'] + pd.Timedelta('1d')
jump_plus1 = jump_plus1.rename(columns={'sign': 'sign_yest'})
jump = pd.merge(
    jump,
    jump_plus1,
    on=['issue', 'date'],
    how='left'
)
jump_minus1 = jump[['issue', 'date', 'sign']].copy()
jump_minus1['date'] = jump_minus1['date'] - pd.Timedelta('1d')
jump_minus1 = jump_minus1.rename(columns={'sign': 'sign_tomo'})
jump = pd.merge(
    jump,
    jump_minus1,
    on=['issue', 'date'],
    how='left'
)
jump[['sign_yest', 'sign_tomo']] = jump[['sign_yest', 'sign_tomo']].fillna(0.)

### 关联跳跃矩阵

index: 每一次跳跃

column: 每一家公司

In [5]:
issues = jump['issue'].sort_values().unique()
global jump_corr
jump_corr = pd.DataFrame(data=False, index=jump.index, columns=issues, dtype=bool)

### 找出一日内的关联跳跃

In [6]:
def identify_corr(jump_date):
    global jump_corr
    
    idx_pos = jump_date.loc[jump_date['sign'] == 1].index
    idx_pos_corr = (jump_date['sign'] == 1) | (jump_date['sign_yest'] == 1) | (jump_date['sign_tomo'] == 1)
    issues_pos = jump_date.loc[idx_pos_corr, 'issue'].to_numpy()
    jump_corr.loc[idx_pos, issues_pos] = True
    
    idx_neg = jump_date.loc[jump_date['sign'] == -1].index
    idx_neg_corr = (jump_date['sign'] == -1) | (jump_date['sign_yest'] == -1) | (jump_date['sign_tomo'] == -1)
    issues_neg = jump_date.loc[idx_neg_corr, 'issue'].to_numpy()
    jump_corr.loc[idx_neg, issues_neg] = True

### 关联跳跃 & 保存

In [7]:
%%time
jump.groupby('date')[['issue', 'sign', 'sign_yest', 'sign_tomo']].apply(identify_corr)

CPU times: total: 14.7 s
Wall time: 14.7 s


In [8]:
%%time
feather.write_dataframe(jump_corr, '../data/jump/jump_corr.feather')

CPU times: total: 2min 28s
Wall time: 42.5 s


In [11]:
sys.getsizeof(jump_corr) / 1e6

9800.559878

## 跳跃频率关联度

## 跳跃幅度关联度