# UMR

## 导入模块

In [1]:
import numpy as np
import pandas as pd
import feather
from matplotlib import pyplot as plt
import os
from tqdm.notebook import tqdm

## 读入数据

In [2]:
start_date = pd.to_datetime('2023-01-01')
end_date = pd.to_datetime('2023-12-31')

### 日线数据

In [3]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20241231.feather')
price_1d = price_1d[(price_1d['date'] >= start_date) & (price_1d['date'] <= end_date)]
price_1d = price_1d.set_index('date')
price_1d

Unnamed: 0_level_0,issue,preclose,open,high,low,close,numTrades,volume,value,adj,ret,is_limit_buy,is_limit_sell
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-01-03,000001,13.16,13.20,13.85,13.05,13.77,153969.0,219412794.0,2.971547e+09,113.9362,0.046353,0.0,0.0
2023-01-04,000001,13.77,13.71,14.42,13.63,14.32,145553.0,218968253.0,3.110729e+09,113.9362,0.039942,0.0,0.0
2023-01-05,000001,14.32,14.40,14.74,14.37,14.48,110434.0,166542518.0,2.417272e+09,113.9362,0.011173,0.0,0.0
2023-01-06,000001,14.48,14.50,14.72,14.48,14.62,88527.0,119574471.0,1.747915e+09,113.9362,0.009669,0.0,0.0
2023-01-09,000001,14.62,14.75,14.88,14.52,14.80,85023.0,105765911.0,1.561368e+09,113.9362,0.012312,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-25,689009,31.06,30.85,31.20,30.06,30.08,5695.0,3591121.0,1.096494e+08,1.0000,-0.031552,0.0,0.0
2023-12-26,689009,30.08,30.14,30.25,26.00,27.85,13831.0,9042296.0,2.519455e+08,1.0000,-0.074136,0.0,0.0
2023-12-27,689009,27.85,27.90,28.89,27.18,28.89,13530.0,5488847.0,1.551564e+08,1.0000,0.037343,0.0,0.0
2023-12-28,689009,28.89,28.58,29.85,28.44,29.20,9638.0,5027247.0,1.472011e+08,1.0000,0.010730,0.0,0.0


### 指数数据

In [4]:
hs500 = feather.read_dataframe('../data/IndexPriceK1m_000905.feather')
hs500['ret_index'] = hs500['close'] / hs500['close'].shift(1) - 1

hs500['date'] = pd.to_datetime(hs500['date'], format='ISO8601')
hs500 = hs500[(hs500['date'] >= start_date) & (hs500['date'] <= end_date)]
hs500 = hs500.set_index('date')

trade_time = hs500['time']
map_trade_time = {t: t - 100 for t in trade_time}
map_trade_time[100000] = 95900
map_trade_time[110000] = 105900
map_trade_time[140000] = 135900
map_trade_time[150000] = 145900
hs500['time'] = hs500['time'].apply(map_trade_time.get)

hs500

Unnamed: 0_level_0,issue,time,open,high,low,close,volume,total_turnover,ret_index
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-01-03,000905,93000,5861.5504,5866.7705,5848.1730,5866.7705,271447600.0,3.122681e+09,0.000393
2023-01-03,000905,93100,5867.2028,5869.3492,5864.7310,5868.4264,121567900.0,1.577402e+09,0.000282
2023-01-03,000905,93200,5868.7400,5873.2643,5868.5401,5869.3010,119083100.0,1.507961e+09,0.000149
2023-01-03,000905,93300,5868.8271,5872.4702,5867.7116,5869.2914,106960500.0,1.392936e+09,-0.000002
2023-01-03,000905,93400,5869.1081,5873.9350,5869.1081,5871.6361,97447000.0,1.279079e+09,0.000399
...,...,...,...,...,...,...,...,...,...
2023-12-29,000905,145500,5427.8942,5427.8942,5426.9078,5427.5558,72972100.0,8.617607e+08,-0.000099
2023-12-29,000905,145600,5427.4492,5429.2000,5427.4492,5428.5533,70521200.0,8.273674e+08,0.000184
2023-12-29,000905,145700,5430.6208,5430.6208,5430.5996,5430.5996,6651600.0,7.802966e+07,0.000377
2023-12-29,000905,145800,5430.5996,5430.5996,5430.5996,5430.5996,0.0,0.000000e+00,0.000000


### 分钟线测试数据

In [5]:
date = pd.to_datetime('2023-01-03')

def price_1m_read(date:np.datetime64):
    year = date.year
    date_str = date.strftime('%Y%m%d')
    price_1m = feather.read_dataframe(f'../data/StockPriceK1m/{year}/StockPriceK1m_{date_str}.feather')
    price_1m['date'] = pd.to_datetime(price_1m['date'], format='ISO8601')
    
    price_1m = pd.merge(
        price_1m[['date', 'time', 'issue', 'high', 'low', 'close']],
        price_1d.loc[date, ['issue', 'preclose']],
        on='issue',
        how='left'
    )
    price_1m['close_prev'] = (
        price_1m
            .groupby('issue')['close']
            .shift(1).fillna(price_1m['preclose'])
    )
    price_1m['ret'] = price_1m['close'] / price_1m['close_prev'] - 1
    
    price_1m = pd.merge(
        price_1m,
        hs500.loc[date, ['time', 'ret_index']],
        on='time',
        how='left'
    )
    price_1m['excess'] = price_1m['ret'] - price_1m['ret_index']
    # price_1m['excess'] = price_1m['ret']
    
    return price_1m

price_1m = price_1m_read(date)
price_1m

Unnamed: 0,date,time,issue,high,low,close,preclose,close_prev,ret,ret_index,excess
0,2023-01-03,93000,000001,13.23,13.06,13.16,13.16,13.16,0.000000,0.000393,-0.000393
1,2023-01-03,93100,000001,13.18,13.11,13.14,13.16,13.16,-0.001520,0.000282,-0.001802
2,2023-01-03,93200,000001,13.17,13.11,13.15,13.16,13.14,0.000761,0.000149,0.000612
3,2023-01-03,93300,000001,13.19,13.16,13.19,13.16,13.15,0.003042,-0.000002,0.003043
4,2023-01-03,93400,000001,13.20,13.16,13.18,13.16,13.19,-0.000758,0.000399,-0.001158
...,...,...,...,...,...,...,...,...,...,...,...
1177195,2023-01-03,145500,689009,30.70,30.59,30.59,30.49,30.67,-0.002608,-0.000212,-0.002396
1177196,2023-01-03,145600,689009,30.69,30.59,30.69,30.49,30.59,0.003269,0.000039,0.003231
1177197,2023-01-03,145700,689009,30.69,30.69,30.69,30.49,30.69,0.000000,0.000179,-0.000179
1177198,2023-01-03,145800,689009,30.69,30.69,30.69,30.49,30.69,0.000000,0.000000,0.000000


## 计算风险指标

### 计算调整后风险系数

In [6]:
def risk_calc(r:pd.Series, d:int=10):
    return r.rolling(d, min_periods=1).mean() - r

### 真实波动

In [7]:
def tr_calc(price_1m:pd.DataFrame):
    tr = price_1m.copy()
    tr['tr1'] = tr['high'] - tr['low']
    tr['tr2'] = np.abs(tr['high'] - tr['close_prev'])
    tr['tr3'] = np.abs(tr['low'] - tr['close_prev'])
    tr['r'] = tr[['tr1', 'tr2', 'tr3']].max(axis=1) / tr['close_prev']
    return tr[['date', 'time', 'issue', 'r']]

## 计算动量反转因子

### 计算单日内反转因子

In [8]:
def rev_calc(date:np.datetime64, price_1m:pd.DataFrame, risk:pd.DataFrame=None, m:int=60, decay:bool=True):
    m = 60
    H = m / 2
    weight = 2 ** ((np.arange(m) - m) / H)
    weight = weight / weight.sum()

    ret_risk = price_1m[['time', 'issue', 'excess']].copy()
    if risk is None:
        ret_risk['risk'] = -1
    else:
        ret_risk = pd.merge(
            ret_risk,
            risk[['time', 'issue', 'risk']],
            on=['time', 'issue'],
            how='left'
        ).sort_values(['issue', 'time'])
    weight = np.tile(weight, len(ret_risk['issue'].unique()))
    
    start_time = np.array([93000, 103000, 130000, 135400])
    end_time = np.array([102900, 112900, 135900, 145300])
    rev = None
    for st, et in zip(start_time, end_time):
        rr = ret_risk[(ret_risk['time'] >= st) & (ret_risk['time'] <= et)].copy()
        if not decay:
            rr['weight'] = 1
        else:
            rr['weight'] = weight

        rr['rev'] = rr['weight'] * rr['risk'] * rr['excess']
        rev_time = (
            rr
                .groupby('issue')['rev']
                .sum().reset_index()
        )
        rev_time['time'] = et
        rev = pd.concat([rev, rev_time])
    rev['date'] = date
    rev = rev.sort_values(['issue', 'time']).reset_index(drop=True)
    return rev

### 遍历所有交易日

In [9]:
trade_date = price_1d.index.sort_values().unique()
label = 'tr'
risk_func = tr_calc
risk_prev = None
os.makedirs(f'../data/factor_rev/{label}_rev/', exist_ok=True)
for date in tqdm(trade_date):
    year = date.year
    date_str = date.strftime('%Y%m%d')
    os.makedirs(f'../data/factor_rev/{label}_rev/{year}/', exist_ok=True)
    price_1m = price_1m_read(date)
    
    risk = risk_func(price_1m)
    risk_2d = pd.concat([risk_prev, risk])
    risk_prev = risk
    risk_2d['risk'] = risk_calc(risk_2d['r'])
    risk = risk_2d[risk_2d['date'] == date]
    
    rev = rev_calc(date, price_1m, risk=risk, decay=True)
    feather.write_dataframe(rev, f'../data/factor_rev/{label}_rev/{year}/{label}_rev_{date_str}.feather')
    del price_1m, risk, rev

  0%|          | 0/242 [00:00<?, ?it/s]

In [10]:
def datetime_calc(date:pd.Series, time:pd.Series):
    hh = time // 10000
    mm = (time % 10000) // 100
    ss = time % 100
    timedelta = pd.to_timedelta(hh, 'h') + pd.to_timedelta(mm, 'm') + pd.to_timedelta(ss, 's')
    datetime = date + timedelta
    return datetime

rev = None
for date in trade_date:
    year = date.year
    date_str = date.strftime('%Y%m%d')
    rev_daily = feather.read_dataframe(f'../data/factor_rev/{label}_rev/{year}/{label}_rev_{date_str}.feather')
    rev = pd.concat([rev, rev_daily])
rev['datetime'] = datetime_calc(rev['date'], rev['time'])
rev = rev.reset_index(drop=True)
feather.write_dataframe(rev, f'../data/factor_rev/{label}_rev/{label}_rev.feather')