# 识别计算跳跃收益

## 导入模块

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather
import math
from scipy import stats

## 读入测试数据

In [2]:
price_1m = feather.read_dataframe('../data/2024/StockPriceK1m_20240102.feather')
price_1m['date'] = pd.to_datetime(price_1m['date'].astype(str))
price_1d = feather.read_dataframe('../data/StockPriceK1d_20240630.feather')
price_1d['date'] = pd.to_datetime(price_1d['date'])

## 定义跳跃统计量

In [19]:
def mu(p: float):
    return (2 ** (p / 2)) * math.gamma((p + 1) / 2) / np.sqrt(np.pi)

def Omega_SwV(log_ret):
    abs_log_ret = np.abs(log_ret)
    prod_log_ret = (abs_log_ret[:-5] * 
                    abs_log_ret[1:-4] *
                    abs_log_ret[2:-3] *
                    abs_log_ret[3:-2] *
                    abs_log_ret[4:-1] *
                    abs_log_ret[5:])
    sum_prod = prod_log_ret.sum()
    n = len(log_ret)
    coef = (mu(6) / 9) * ((n ** 3) * (mu(1) ** -6) / (n - 5))
    return coef * sum_prod

def V_01(log_ret):
    abs_log_ret = np.abs(log_ret)
    prod_log_ret = abs_log_ret[:-1] * abs_log_ret[1]
    sum_prod = prod_log_ret.sum()
    coef = 1 / mu(1)
    return coef * sum_prod

def RV_N(log_ret):
    return (log_ret * log_ret).sum()

def SwV_N(ret, log_ret):
    return 2 * (ret - log_ret).sum()

def JS(ret, log_ret):
    n = len(ret)
    Omega = Omega_SwV(log_ret)
    SwV = SwV_N(ret, log_ret)
    if (Omega == 0 or SwV == 0):
        return np.nan
    js = n * (V_01(log_ret) / np.sqrt(Omega)) * (1 - RV_N(log_ret) / SwV)
    return js

def pvalue(js: float):
    cdf = stats.norm.cdf(js, loc=0, scale=1)
    return 2 * min(cdf, 1 - cdf)

## 识别跳跃, 计算收益

In [6]:
price_1m[price_1m['issue'] == '000005']

Unnamed: 0,time,issue,date,open,high,low,close,volume,value,num_trades
720,93000,000005,2024-01-02,1.08,1.10,1.08,1.10,876800.0,954553.0,75.0
721,93100,000005,2024-01-02,1.09,1.09,1.09,1.09,707100.0,772772.0,46.0
722,93200,000005,2024-01-02,1.09,1.09,1.08,1.08,298900.0,325575.0,25.0
723,93300,000005,2024-01-02,1.09,1.09,1.08,1.09,56100.0,61148.0,7.0
724,93400,000005,2024-01-02,1.09,1.09,1.09,1.09,372700.0,406243.0,17.0
...,...,...,...,...,...,...,...,...,...,...
955,145500,000005,2024-01-02,1.12,1.12,1.12,1.12,0.0,0.0,0.0
956,145600,000005,2024-01-02,1.12,1.12,1.12,1.12,1000.0,1120.0,1.0
957,145700,000005,2024-01-02,1.12,1.12,1.12,1.12,0.0,0.0,0.0
958,145800,000005,2024-01-02,1.12,1.12,1.12,1.12,0.0,0.0,0.0


In [25]:
%%time

def jump_identify(ret, log_ret):
    n = len(ret)
    jump = np.full(n, False, dtype=bool)
    med = np.median(ret)
    log_med = np.median(log_ret)
    ret_c = ret.copy()
    log_ret_c = log_ret.copy()
    js0 = JS(ret_c, log_ret_c)
    p = pvalue(js0)
    
    while (p < 0.05):
        js = np.zeros(n)
        for i in range(n):
            r = ret_c.copy()
            lr = log_ret_c.copy()
            r[i] = med
            lr[i] = log_med
            js[i] = JS(r, lr)
        js_diff = np.abs(js0) - np.abs(js)
        idx_max = np.argmax(js_diff)
        jump[idx_max] = True
        ret_c[idx_max] = med
        log_ret_c[idx_max] = log_med
        js0 = JS(ret_c, log_ret_c)
        p = pvalue(js0)

    return np.any(jump), log_ret[jump].sum()

def jump_identify_price(price_1m, price_1d, date, date_next):
    issues = price_1m['issue'].unique()
    issue = issues[0]
    prc = price_1m.copy()
    start_price = prc.loc[price_1m['time'] % 500 == 0, 'open'].to_numpy()
    end_price = prc.loc[price_1m['time'] % 500 == 400, 'close'].to_numpy()
    
    idx_d1 = (price_1d['issue'] == issue) &(price_1d['date'] == date)
    idx_d2 = (price_1d['issue'] == issue) &(price_1d['date'] == date_next)
    start_price = np.append(start_price, price_1d.loc[idx_d1, 'close'].to_numpy())
    end_price = np.append(end_price, price_1d.loc[idx_d2, 'open'].to_numpy())
    
    ret = (end_price - start_price) / start_price
    log_ret = np.log(1 + ret)

    # flag_jump, ret_jump = jump_identify(ret, log_ret)
    # df_jump = pd.DataFrame({'jump': [flag_jump], 'ret_jump': [ret_jump]})
    # return df_jump
    print(issue, end=' ')
    return jump_identify(ret, log_ret)

df_jump = (price_1m.groupby('issue')[['issue', 'time', 'open', 'close']].
           apply(jump_identify_price,
                 price_1d=price_1d,
                 date='2024-01-02',
                 date_next='2024-01-03'))
df_jump

000001 000002 000004 000005 000006 000007 000008 000009 000010 000011 000012 000014 000016 000017 000019 000020 000021 000023 000025 000026 000027 000028 000029 000030 000031 000032 000034 000035 000036 000037 000039 000040 000042 000045 000046 000048 000049 000050 000055 000056 000058 000059 000060 000061 000062 000063 000065 000066 000068 000069 000070 000078 000088 000089 000090 000096 000099 000100 000151 000153 000155 000156 000157 000158 000159 000166 000301 000333 000338 000400 000401 000402 000403 000404 000407 000408 000409 000410 000411 000413 000415 000416 000417 000419 000420 000421 000422 000423 000425 000426 000428 000429 000430 000488 000498 000501 000503 000504 000505 000506 000507 000509 000510 000513 000514 000516 000517 000518 000519 000520 000521 000523 000524 000525 000526 000528 000529 000530 000531 000532 000533 000534 000536 000537 000538 000539 000541 000543 000544 000545 000546 000547 000548 000550 000551 000552 000553 000554 000555 000557 000558 000559 000560

KeyboardInterrupt: 