# UMR

## 导入模块

In [1]:
import numpy as np
import pandas as pd
import feather
import os
from matplotlib import pyplot as plt

## 读入数据

In [2]:
start_date = pd.to_datetime('2023-01-01')
start_date_redundancy = pd.to_datetime('2022-10-01')
end_date = pd.to_datetime('2023-12-31')

### 指数数据

In [3]:
hs500 = feather.read_dataframe('../data/IndexPriceK1m_000905.feather')

hs500['date'] = pd.to_datetime(hs500['date'], format='ISO8601')
hs500 = hs500[(hs500['date'] >= start_date_redundancy) & (hs500['date'] <= end_date)]

trade_time = hs500['time'].unique()
map_trade_time = {t: t - 100 for t in trade_time}
map_trade_time[100000] = 95900
map_trade_time[110000] = 105900
map_trade_time[140000] = 135900
map_trade_time[150000] = 145900
hs500['time'] = hs500['time'].apply(map_trade_time.get)

time_60m = [102900, 112900, 135900, 145300, 145900]
hs500 = hs500[hs500['time'].isin(time_60m)]
hs500['ret_index'] = hs500['close'] / hs500['close'].shift(1) - 1

### 60 分钟线数据

In [4]:
def normalize(ret:pd.Series) -> pd.Series:
    return ret.sub(ret.mean()).div(ret.std())

In [5]:
price_60m = feather.read_dataframe('../data/StockPriceK60m_cache.feather')
price_60m = price_60m[(price_60m['date'] >= start_date_redundancy) & (price_60m['date'] <= end_date)]
price_60m['close_prev'] = price_60m.groupby('issue')['close'].shift(1)
price_60m['ret'] = price_60m['close'] / price_60m['close_prev'] - 1
price_60m = pd.merge(
    price_60m,
    hs500[['date', 'time', 'ret_index']],
    on=['date', 'time'],
    how='left'
)
price_60m['excess'] = price_60m['ret'] - price_60m['ret_index']
price_60m = price_60m[price_60m['time'] != 145900]

## 计算风险指标

### 计算调整后风险系数

In [6]:
def risk_calc(r:pd.Series, d:int=10) -> pd.Series:
    return r.rolling(d, min_periods=1).mean() - r

def risk_calc_norm(r:pd.Series, d:int=10) -> pd.Series:
    roll = r.rolling(d, min_periods=1)
    return (roll.mean() - r) / roll.std()

### 真实波动

In [7]:
def tr_calc(price_60m:pd.DataFrame) -> pd.DataFrame:
    tr = price_60m[['datetime', 'issue']].copy()
    tr['tr1'] = price_60m['high'] - price_60m['low']
    tr['tr2'] = np.abs(price_60m['high'] - price_60m['close_prev'])
    tr['tr3'] = np.abs(price_60m['low'] - price_60m['close_prev'])
    tr['r'] = tr[['tr1', 'tr2', 'tr3']].max(axis=1) / price_60m['close_prev']
    return tr[['datetime', 'issue', 'r']]

### 平均单笔成交量

In [8]:
def avt_calc(price_60m:pd.DataFrame) -> pd.DataFrame:
    avt = price_60m[['datetime', 'issue']].copy()
    avt['r'] = np.sqrt(price_60m['volume'] / price_60m['num_trades'])
    return avt[['datetime', 'issue', 'r']]

### 平均单笔成交额

In [9]:
def avaluet_calc(price_60m:pd.DataFrame) -> pd.DataFrame:
    avaluet = price_60m[['datetime', 'issue']].copy()
    avaluet['r'] = np.sqrt(price_60m['value'] / price_60m['num_trades'])
    return avaluet[['datetime', 'issue', 'r']]

## 计算动量反转因子

In [10]:
def remove_neg(risk:pd.Series, sigma:float=0.) -> pd.Series:
    rc = risk.copy()
    rc_std = rc.std()
    rc[rc > -sigma * rc_std] = 0
    return rc

In [11]:
def umr_factor_gen(label:str):  
    if 'tr' in label:
        r_calc = tr_calc
    elif 'avt' in label:
        r_calc = avt_calc
    elif 'avaluet' in label:
        r_calc = avaluet_calc
        
    risk = r_calc(price_60m)
    risk['risk'] = risk.groupby('issue')['r'].transform(risk_calc)
    
    ret_risk = pd.merge(
        price_60m[['datetime', 'issue', 'excess']],
        risk[['datetime', 'issue', 'risk']],
        on=['datetime', 'issue'],
        how='outer'
    )
    # ret_risk = price_60m[['datetime', 'issue', 'excess']].copy()
    # ret_risk['risk'] = -1
    if 'excess' in label:
        ret_risk['excess'] = ret_risk.groupby('datetime')['excess'].transform(normalize)
    if 'risk' in label:
        ret_risk['risk'] = ret_risk.groupby('datetime')['risk'].transform(normalize)
    if 'only' in label:
        ret_risk.loc[ret_risk['risk'] > 0, 'risk'] = 0
        # ret_risk['risk'] = ret_risk.groupby('datetime')['risk'].transform(remove_neg, sigma=1.0)
    
    def rev_calc(ret_risk:pd.DataFrame, weight:np.array) -> pd.Series:
        rev = np.convolve(ret_risk['risk'] * ret_risk['excess'], weight, mode='full')[:len(ret_risk)]
        return pd.Series(rev, index=ret_risk.index, name='rev')
    
    def rev_calc_nodecay(ret_risk:pd.DataFrame, m:int=60) -> pd.Series:
        rev = ret_risk['excess'].mul(ret_risk['risk']).rolling(m, min_periods=1).sum()
        return rev
    
    m = 60
    H = m / 2
    w = 2 ** (-np.arange(m) / H)
    w /= w.sum()
    # ret_risk['rev'] = ret_risk.groupby('issue', group_keys=False)[['excess', 'risk']].apply(rev_calc_nodecay)
    ret_risk['rev'] = ret_risk.groupby('issue', group_keys=False)[['excess', 'risk']].apply(rev_calc, weight=w)
    
    os.makedirs('../data/factor_rev_60m/', exist_ok=True)
    feather.write_dataframe(ret_risk, f'../data/factor_rev_60m/{label}_rev_60m.feather')

In [12]:
core = 'avt'
labels = np.array(['', '_excess_norm', '_risk_norm', '_excess_risk_norm'])
labels = np.concat([core + labels, core + labels + '_only'])
for label in labels:
    umr_factor_gen(label)
    print(label)

avt
avt_excess_norm
avt_risk_norm
avt_excess_risk_norm
avt_only
avt_excess_norm_only
avt_risk_norm_only
avt_excess_risk_norm_only
