# UMR

## 导入模块

In [1]:
import numpy as np
import pandas as pd
import feather
from matplotlib import pyplot as plt
import os
from tqdm.notebook import tqdm

%load_ext line_profiler

## 读入数据

In [2]:
start_date = pd.to_datetime('2023-01-01')
end_date = pd.to_datetime('2023-12-31')

### 日线数据

In [3]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20241231.feather')
price_1d = price_1d[(price_1d['date'] >= start_date) & (price_1d['date'] <= end_date)]
price_1d = price_1d.set_index('date')

### 指数数据

In [4]:
hs500 = feather.read_dataframe('../data/IndexPriceK1m_000905.feather')
hs500['ret_index'] = hs500['close'] / hs500['close'].shift(1) - 1

hs500['date'] = pd.to_datetime(hs500['date'], format='ISO8601')
hs500 = hs500[(hs500['date'] >= start_date) & (hs500['date'] <= end_date)]
hs500 = hs500.set_index('date')

trade_time = hs500['time']
map_trade_time = {t: t - 100 for t in trade_time}
map_trade_time[100000] = 95900
map_trade_time[110000] = 105900
map_trade_time[140000] = 135900
map_trade_time[150000] = 145900
hs500['time'] = hs500['time'].apply(map_trade_time.get)

### 分钟线测试数据

In [5]:
date = pd.to_datetime('2023-01-03')

def price_1m_read(date:np.datetime64):
    year = date.year
    date_str = date.strftime('%Y%m%d')
    price_1m = feather.read_dataframe(f'../data/StockPriceK1m/{year}/StockPriceK1m_{date_str}.feather')
    price_1m['date'] = pd.to_datetime(price_1m['date'], format='ISO8601')
    
    price_1m = pd.merge(
        price_1m[['date', 'time', 'issue', 'high', 'low', 'close']],
        price_1d.loc[date, ['issue', 'preclose']],
        on='issue',
        how='left'
    )
    price_1m['close_prev'] = (
        price_1m
            .groupby('issue')['close']
            .shift(1).fillna(price_1m['preclose'])
    )
    price_1m['ret'] = price_1m['close'] / price_1m['close_prev'] - 1
    
    price_1m = pd.merge(
        price_1m,
        hs500.loc[date, ['time', 'ret_index']],
        on='time',
        how='left'
    )
    price_1m['excess'] = price_1m['ret'] - price_1m['ret_index']
    # price_1m['excess'] = price_1m['ret']
    
    return price_1m

price_1m = price_1m_read(date)

## 计算风险指标

### 计算调整后风险系数

In [6]:
def risk_calc(r:pd.Series, d:int=10):
    return r.rolling(d, min_periods=1).mean() - r

### 真实波动

In [7]:
def tr_calc(price_1m:pd.DataFrame):
    tr = price_1m.copy()
    tr['tr1'] = tr['high'] - tr['low']
    tr['tr2'] = np.abs(tr['high'] - tr['close_prev'])
    tr['tr3'] = np.abs(tr['low'] - tr['close_prev'])
    tr['r'] = tr[['tr1', 'tr2', 'tr3']].max(axis=1) / tr['close_prev']
    return tr[['date', 'time', 'issue', 'r']]
tr = tr_calc(price_1m)
tr['risk'] = tr.groupby('issue')['r'].transform(risk_calc)

## 计算动量反转因子

### 计算单日内反转因子

In [8]:
def rev_calc(date:np.datetime64, price_1m:pd.DataFrame, risk:pd.DataFrame=None, m:int=60, decay:bool=True):
    m = 60
    H = m / 2
    weight = 2 ** ((np.arange(m) - m) / H)
    weight = weight / weight.sum()

    ret_risk = price_1m[['time', 'issue', 'excess']].copy()
    if risk is None:
        ret_risk['risk'] = -1
    else:
        ret_risk = pd.merge(
            ret_risk,
            risk[['time', 'issue', 'risk']],
            on=['time', 'issue'],
            how='left'
        ).sort_values(['issue', 'time'])
    weight = np.tile(weight, len(ret_risk['issue'].unique()))
    
    start_time = np.array([93000, 103000, 130000, 135400])
    end_time = np.array([102900, 112900, 135900, 145300])
    rev = None
    for st, et in zip(start_time, end_time):
        rr = ret_risk[(ret_risk['time'] >= st) & (ret_risk['time'] <= et)].copy()
        if not decay:
            rr['weight'] = 1
        else:
            rr['weight'] = weight

        rr['rev'] = rr['weight'] * rr['risk'] * rr['excess']
        rev_time = (
            rr
                .groupby('issue')['rev']
                .sum().reset_index()
        )
        rev_time['time'] = et
        rev = pd.concat([rev, rev_time])
    rev['date'] = date
    rev = rev.sort_values(['issue', 'time']).reset_index(drop=True)
    return rev

### 性能测试

In [9]:
# %lprun -f price_1m_read price_1m_read(date)

In [10]:
# %lprun -f tr_calc tr_calc(price_1m)

In [11]:
# %lprun -f rev_calc rev_calc(date, price_1m, tr)

### 遍历所有交易日

In [12]:
trade_date = price_1d.index.sort_values().unique()
label = 'tr'
risk_func = tr_calc
risk_prev = None
os.makedirs(f'../data/factor_rev/{label}_rev/', exist_ok=True)
for date in tqdm(trade_date):
    year = date.year
    date_str = date.strftime('%Y%m%d')
    os.makedirs(f'../data/factor_rev/{label}_rev/{year}/', exist_ok=True)
    price_1m = price_1m_read(date)
    
    risk = risk_func(price_1m)
    risk_2d = pd.concat([risk_prev, risk])
    risk_prev = risk
    risk_2d['risk'] = risk_2d.groupby('issue')['r'].transform(risk_calc)
    risk = risk_2d[risk_2d['date'] == date]
    
    rev = rev_calc(date, price_1m, risk=risk, decay=True)
    feather.write_dataframe(rev, f'../data/factor_rev/{label}_rev/{year}/{label}_rev_{date_str}.feather')
    del price_1m, risk, rev

  0%|          | 0/242 [00:00<?, ?it/s]

In [13]:
def datetime_calc(date:pd.Series, time:pd.Series):
    hh = time // 10000
    mm = (time % 10000) // 100
    ss = time % 100
    timedelta = pd.to_timedelta(hh, 'h') + pd.to_timedelta(mm, 'm') + pd.to_timedelta(ss, 's')
    datetime = date + timedelta
    return datetime

rev = None
for date in trade_date:
    year = date.year
    date_str = date.strftime('%Y%m%d')
    rev_daily = feather.read_dataframe(f'../data/factor_rev/{label}_rev/{year}/{label}_rev_{date_str}.feather')
    rev = pd.concat([rev, rev_daily])
rev['datetime'] = datetime_calc(rev['date'], rev['time'])
rev = rev.reset_index(drop=True)
feather.write_dataframe(rev, f'../data/factor_rev/{label}_rev/{label}_rev.feather')