# 识别计算跳跃收益

## 导入模块

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather
import math
from scipy import stats

## 读入测试数据

In [2]:
price_1m = feather.read_dataframe('../data/2024/StockPriceK1m_20240102.feather')
price_1m['date'] = pd.to_datetime(price_1m['date'].astype(str))
price_1d = feather.read_dataframe('../data/StockPriceK1d_20240630.feather')
price_1d['date'] = pd.to_datetime(price_1d['date'])

## 定义跳跃统计量

In [12]:
def mu(p: float):
    return (2 ** (p / 2)) * math.gamma((p + 1) / 2) / np.sqrt(np.pi)

def JS(ret, log_ret):
    n = len(ret)
    abs_log_ret = np.abs(log_ret)
    prod_log_ret = (abs_log_ret[:-5] * 
                    abs_log_ret[1:-4] *
                    abs_log_ret[2:-3] *
                    abs_log_ret[3:-2] *
                    abs_log_ret[4:-1] *
                    abs_log_ret[5:])
    sum_prod = prod_log_ret.sum()
    coef_Omega = (mu(6) / 9) * ((n ** 3) * (mu(1) ** -6) / (n - 5))
    Omega_SwV = coef_Omega * sum_prod
    
    SwV_N = 2 * (ret - log_ret).sum()
    
    if (Omega_SwV == 0 or SwV_N == 0):
        return np.nan

    prod_log_ret_V = abs_log_ret[:-1] * abs_log_ret[1]
    sum_prod_V = prod_log_ret_V.sum()
    coef_V = 1 / mu(1)
    V_01 = coef_V * sum_prod_V

    RV_N = (log_ret * log_ret).sum()
    
    js = n * (V_01 / np.sqrt(Omega_SwV)) * (1 - RV_N / SwV_N)
    return js

def pvalue(js: float):
    cdf = stats.norm.cdf(js, loc=0, scale=1)
    return 2 * min(cdf, 1 - cdf)

## 识别跳跃, 计算收益

In [22]:
%%time

def jump_identify(ret, log_ret):
    n = len(ret)
    jump = np.full(n, False, dtype=bool)
    med = np.median(ret)
    log_med = np.median(log_ret)
    ret_c = ret.copy()
    log_ret_c = log_ret.copy()
    js0 = JS(ret_c, log_ret_c)
    p = pvalue(js0)
    
    while (p < 0.05):
        js = np.zeros(n)
        for i in range(n):
            r = ret_c.copy()
            lr = log_ret_c.copy()
            r[i] = med
            lr[i] = log_med
            js[i] = JS(r, lr)
        js_diff = np.abs(js0) - np.abs(js)
        idx_max = np.argmax(js_diff)
        jump[idx_max] = True
        ret_c[idx_max] = med
        log_ret_c[idx_max] = log_med
        js0 = JS(ret_c, log_ret_c)
        p = pvalue(js0)

    return np.any(jump), log_ret[jump].sum()

def jump_identify_price(price_1m, price_1d, date, date_next):
    issues = price_1m['issue'].unique()
    issue = issues[0]
    prc = price_1m.copy()
    start_price = prc.loc[price_1m['time'] % 500 == 0, 'open'].to_numpy()
    end_price = prc.loc[price_1m['time'] % 500 == 400, 'close'].to_numpy()
    
    idx_d1 = (price_1d['issue'] == issue) &(price_1d['date'] == date)
    idx_d2 = (price_1d['issue'] == issue) &(price_1d['date'] == date_next)
    start_price = np.append(start_price, price_1d.loc[idx_d1, 'close'].to_numpy())
    end_price = np.append(end_price, price_1d.loc[idx_d2, 'open'].to_numpy())
    
    ret = (end_price - start_price) / start_price
    log_ret = np.log(1 + ret)

    print(issue, end=' ')
    flag_jump, ret_jump = jump_identify(ret, log_ret)
    df_jump = pd.DataFrame({'jump': [flag_jump], 'ret_jump': [ret_jump]})
    return df_jump
    # return jump_identify(ret, log_ret)

df_jump = (price_1m.iloc[:13920].groupby('issue')[['issue', 'time', 'open', 'close']].
           apply(jump_identify_price,
                 price_1d=price_1d,
                 date='2024-01-02',
                 date_next='2024-01-03'))
df_jump

000001 000002 000004 000005 000006 000007 000008 000009 000010 000011 000012 000014 000016 000017 000019 000020 000021 000023 000025 000026 000027 000028 000029 000030 000031 000032 000034 000035 000036 000037 000039 000040 000042 000045 000046 000048 000049 000050 000055 000056 000058 000059 000060 000061 000062 000063 000065 000066 000068 000069 000070 000078 000088 000089 000090 000096 000099 000100 CPU times: total: 1min 32s
Wall time: 1min 32s


Unnamed: 0_level_0,Unnamed: 1_level_0,jump,ret_jump
issue,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0,True,-0.005339
2,0,True,-0.002906
4,0,False,0.0
5,0,False,0.0
6,0,False,0.0
7,0,False,0.0
8,0,False,0.0
9,0,False,0.0
10,0,False,0.0
11,0,True,-0.002278


In [21]:
print(len(price_1m.loc[:13920, 'issue'].unique()))
print(len(price_1m['issue'].unique()))

59
5096
