# W 式切割

## 导入模块

In [1]:
import numpy as np
import pandas as pd
import feather
import os
from tqdm.notebook import tqdm
from numpy.lib.stride_tricks import as_strided as stride

## 读入数据

In [2]:
start_date = '2023-01-01'
start_date_redundancy = pd.to_datetime('2022-12-01')
end_date = '2023-12-31'

In [3]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20241231.feather')
price_1d = price_1d[(price_1d['date'] >= start_date_redundancy) & (price_1d['date'] <= end_date)]
price_1d = price_1d.reset_index(drop=True)
price_1d

Unnamed: 0,date,issue,preclose,open,high,low,close,numTrades,volume,value,adj,ret,is_limit_buy,is_limit_sell
0,2022-12-01,000001,13.03,13.38,13.66,13.07,13.10,122323.0,200268883.0,2.658203e+09,113.9362,0.005372,0.0,0.0
1,2022-12-02,000001,13.10,13.14,13.15,12.69,12.90,104558.0,140420278.0,1.803659e+09,113.9362,-0.015267,0.0,0.0
2,2022-12-05,000001,12.90,13.09,13.57,13.01,13.53,157593.0,228762742.0,3.057232e+09,113.9362,0.048837,0.0,0.0
3,2022-12-06,000001,13.53,13.34,13.66,13.25,13.43,88809.0,125257891.0,1.680594e+09,113.9362,-0.007391,0.0,0.0
4,2022-12-07,000001,13.43,13.33,13.37,13.03,13.15,101201.0,137737690.0,1.815668e+09,113.9362,-0.020849,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1321239,2023-12-25,689009,31.06,30.85,31.20,30.06,30.08,5695.0,3591121.0,1.096494e+08,1.0000,-0.031552,0.0,0.0
1321240,2023-12-26,689009,30.08,30.14,30.25,26.00,27.85,13831.0,9042296.0,2.519455e+08,1.0000,-0.074136,0.0,0.0
1321241,2023-12-27,689009,27.85,27.90,28.89,27.18,28.89,13530.0,5488847.0,1.551564e+08,1.0000,0.037343,0.0,0.0
1321242,2023-12-28,689009,28.89,28.58,29.85,28.44,29.20,9638.0,5027247.0,1.472011e+08,1.0000,0.010730,0.0,0.0


## 获取成交额分位数

In [4]:
price_1d[(price_1d['date'] == '2022-12-01') & (price_1d['issue'] == '000564')]

Unnamed: 0,date,issue,preclose,open,high,low,close,numTrades,volume,value,adj,ret,is_limit_buy,is_limit_sell
38682,2022-12-01,564,2.35,2.35,2.35,2.35,2.35,0.0,0.0,0.0,7.181602,0.0,1.0,1.0


In [5]:
def qcut_weight(price:pd.DataFrame, q:float) -> float:
    prc = (
        price[['value_avg', 'num_trades']]
            .reset_index(drop=True).copy()
    )
    if prc['num_trades'].sum() == 0:
        return np.nan
    prc = prc.sort_values('value_avg')
    prc['weight'] = prc['num_trades'].cumsum().div(prc['num_trades'].sum())
    return prc[prc['weight'] >= q].iloc[0]['value_avg']

In [13]:
q = 13/16
label_2 = '_1316'
value_col = 'value_q'

In [7]:
price_1d['value_q'] = np.nan
trade_date = price_1d['date'].sort_values().unique()
for date in tqdm(trade_date):
    year = date.year
    date_str = date.strftime('%Y%m%d')
    price_1m = feather.read_dataframe(f'../data/StockPriceK1m/{year}/StockPriceK1m_{date_str}.feather')
    price_1m['value_avg'] = price_1m['value'].div(price_1m['num_trades'])
    value_q = ( \
        price_1m \
            .groupby('issue')[['value_avg', 'num_trades']] \
            .apply(qcut_weight, q=q) \
    )
    price_1d.loc[price_1d['date'] == date, 'value_q'] = \
        price_1d.loc[price_1d['date'] == date, 'issue'].map(value_q)

  0%|          | 0/264 [00:00<?, ?it/s]

## 理想反转因子

In [8]:
price_1d['value_avg'] = price_1d['value'].div(price_1d['numTrades'])

rolling.apply

In [9]:
def roll_np(df: pd.DataFrame, apply_func: callable, window: int,
         return_col_num: int, **kwargs):
    v = df.reset_index().values
    dim0, dim1 = v.shape
    stride0, stride1 = v.strides

    stride_values = stride(v, (dim0 - (window - 1), window, dim1), (stride0, stride0, stride1))
    result_values = np.full((dim0, return_col_num), np.nan)
    for idx, values in enumerate(stride_values, window - 1):
        result_values[idx, ] = apply_func(values, **kwargs)

    return result_values

In [18]:
def m_np(price) -> float:
    prc = price[np.argsort(price[:, 1])]
    d = len(prc)
    m_low = prc[: d // 2, 2].sum()
    m_high = prc[d // 2:, 2].sum()
    return m_high - m_low

def m_np_value(price) -> float:
    prc = np.nan_to_num(price, nan=0, posinf=0, neginf=0)
    return (prc[:, 1] * prc[:, 2]).sum()

def m_np_standard(price) -> float:
    prc = np.nan_to_num(price, nan=0, posinf=0, neginf=0)
    value = prc[:, 1]
    if np.std(value) < 1e-8:
        return 0
    else:
        prc[:, 1] = (prc[:, 1] - np.median(value)) / np.std(value)
        return (prc[:, 1] * prc[:, 2]).sum()

def m_np_rank(price) -> float:
    d = len(price)
    prc = np.nan_to_num(price, nan=0, posinf=0, neginf=0)
    rank = prc[:, 1].argsort().argsort()
    rank = rank - d // 2
    return (rank * prc[:, 2]).sum()

m_np_calc = m_np_standard
label_1 = '_standard'

def m_series_calc(price:pd.DataFrame, d:int=20) -> pd.Series:
    if len(price) < d:
        return pd.Series([np.nan] * len(price))
    m_np = roll_np(
        df=price[[value_col, 'ret']],
        apply_func=m_np_calc,
        window=d, return_col_num=1
    )
    return pd.Series(data=m_np.reshape(m_np.shape[0]), name='m')

tqdm.pandas()
price_1d['m'] = (
    price_1d
        .groupby('issue')[[value_col, 'ret']]
        .progress_apply(m_series_calc)
        .reset_index(drop=True)
)

  0%|          | 0/5157 [00:00<?, ?it/s]

## 保存

In [19]:
price_1d

Unnamed: 0,date,issue,preclose,open,high,low,close,numTrades,volume,value,adj,ret,is_limit_buy,is_limit_sell,value_q,value_avg,m
0,2022-12-01,000001,13.03,13.38,13.66,13.07,13.10,122323.0,200268883.0,2.658203e+09,113.9362,0.005372,0.0,0.0,29985.813880,21731.015649,
1,2022-12-02,000001,13.10,13.14,13.15,12.69,12.90,104558.0,140420278.0,1.803659e+09,113.9362,-0.015267,0.0,0.0,21727.312500,17250.318122,
2,2022-12-05,000001,12.90,13.09,13.57,13.01,13.53,157593.0,228762742.0,3.057232e+09,113.9362,0.048837,0.0,0.0,23950.856688,19399.541684,
3,2022-12-06,000001,13.53,13.34,13.66,13.25,13.43,88809.0,125257891.0,1.680594e+09,113.9362,-0.007391,0.0,0.0,24998.704255,18923.684711,
4,2022-12-07,000001,13.43,13.33,13.37,13.03,13.15,101201.0,137737690.0,1.815668e+09,113.9362,-0.020849,0.0,0.0,21907.143617,17941.202789,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1321239,2023-12-25,689009,31.06,30.85,31.20,30.06,30.08,5695.0,3591121.0,1.096494e+08,1.0000,-0.031552,0.0,0.0,26091.823529,19253.625637,-0.041557
1321240,2023-12-26,689009,30.08,30.14,30.25,26.00,27.85,13831.0,9042296.0,2.519455e+08,1.0000,-0.074136,0.0,0.0,23722.074468,18215.998409,-0.010428
1321241,2023-12-27,689009,27.85,27.90,28.89,27.18,28.89,13530.0,5488847.0,1.551564e+08,1.0000,0.037343,0.0,0.0,13780.593985,11467.581744,-0.037026
1321242,2023-12-28,689009,28.89,28.58,29.85,28.44,29.20,9638.0,5027247.0,1.472011e+08,1.0000,0.010730,0.0,0.0,19097.160000,15272.995746,-0.055056


In [20]:
os.makedirs('../data/factor_m_1d/', exist_ok=True)
factor_m = price_1d[['date', 'issue', 'm']]
feather.write_dataframe(factor_m, f'../data/factor_m_1d/factor_m{label_1}{label_2}_1d.feather')