# UMR

## 导入模块

In [1]:
import numpy as np
import pandas as pd
import feather
import os

## 读入数据

In [2]:
start_date = pd.to_datetime('2023-01-01')
start_date_redundancy = pd.to_datetime('2022-10-01')
end_date = pd.to_datetime('2023-12-31')

### 指数数据

In [3]:
hs500 = pd.read_pickle('../data/IndexPriceK1d_000905.pkl')
hs500['date'] = pd.to_datetime(hs500['date'], format='ISO8601')
hs500['ret_index'] = hs500['close'] / hs500['close'].shift(1) - 1
hs500 = hs500[(hs500['date'] >= start_date_redundancy) & (hs500['date'] <= end_date)]
hs500 = hs500.set_index('date')
hs500

Unnamed: 0_level_0,open,high,low,close,value,volume,ret_index
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-10-10,5729.7548,5758.0415,5618.8038,5635.7109,9.732094e+10,9.746934e+09,-0.013712
2022-10-11,5642.4791,5703.2705,5606.9944,5684.5063,8.873310e+10,8.657147e+09,0.008658
2022-10-12,5680.3641,5820.4537,5594.2971,5820.4537,1.108164e+11,1.061018e+10,0.023915
2022-10-13,5789.6610,5876.4759,5780.1758,5830.3355,1.149222e+11,1.057223e+10,0.001698
2022-10-14,5866.8591,5996.0440,5866.8591,5973.4389,1.384630e+11,1.182122e+10,0.024545
...,...,...,...,...,...,...,...
2023-12-25,5301.5735,5339.6644,5288.4665,5315.3746,8.101815e+10,6.591877e+09,0.000694
2023-12-26,5312.7999,5313.9193,5238.0417,5258.2841,8.091036e+10,6.753201e+09,-0.010741
2023-12-27,5261.5699,5293.7973,5235.4559,5285.5301,8.425344e+10,7.539792e+09,0.005182
2023-12-28,5280.6904,5400.2364,5275.4028,5382.7966,1.268962e+11,1.076992e+10,0.018402


### 日线数据

In [4]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20241231.feather')
price_1d = price_1d[(price_1d['date'] >= start_date_redundancy) & (price_1d['date'] <= end_date)]
price_1d['ret_index'] = price_1d['date'].map(hs500['ret_index'])
price_1d['excess'] = price_1d['ret'] - price_1d['ret_index']
price_1d

Unnamed: 0,date,issue,preclose,open,high,low,close,numTrades,volume,value,adj,ret,is_limit_buy,is_limit_sell,ret_index,excess
5035,2022-10-10,000001,11.84,11.70,11.77,11.46,11.47,55618.0,96608018.0,1.119090e+09,113.9362,-0.031250,0.0,0.0,-0.013712,-0.017538
5036,2022-10-11,000001,11.47,11.54,11.58,11.41,11.48,39149.0,41525337.0,4.767492e+08,113.9362,0.000872,0.0,0.0,0.008658,-0.007786
5037,2022-10-12,000001,11.48,11.45,11.62,11.35,11.60,43996.0,55957243.0,6.417765e+08,113.9362,0.010453,0.0,0.0,0.023915,-0.013462
5038,2022-10-13,000001,11.60,11.51,11.54,11.31,11.34,57916.0,85261597.0,9.700217e+08,113.9362,-0.022414,0.0,0.0,0.001698,-0.024112
5039,2022-10-14,000001,11.34,11.45,11.63,11.40,11.53,62135.0,109606158.0,1.265487e+09,113.9362,0.016755,0.0,0.0,0.024545,-0.007790
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14923890,2023-12-25,689009,31.06,30.85,31.20,30.06,30.08,5695.0,3591121.0,1.096494e+08,1.0000,-0.031552,0.0,0.0,0.000694,-0.032245
14923891,2023-12-26,689009,30.08,30.14,30.25,26.00,27.85,13831.0,9042296.0,2.519455e+08,1.0000,-0.074136,0.0,0.0,-0.010741,-0.063395
14923892,2023-12-27,689009,27.85,27.90,28.89,27.18,28.89,13530.0,5488847.0,1.551564e+08,1.0000,0.037343,0.0,0.0,0.005182,0.032161
14923893,2023-12-28,689009,28.89,28.58,29.85,28.44,29.20,9638.0,5027247.0,1.472011e+08,1.0000,0.010730,0.0,0.0,0.018402,-0.007672


## 计算风险指标

### 计算调整后风险系数

In [5]:
def risk_calc(r:pd.Series, d:int=10) -> pd.Series:
    return r.rolling(d, min_periods=1).mean() - r

### 真实波动

In [6]:
def tr_calc(price_1d:pd.DataFrame) -> pd.DataFrame:
    tr = price_1d[['date', 'issue']].copy()
    tr['tr1'] = price_1d['high'] - price_1d['low']
    tr['tr2'] = np.abs(price_1d['high'] - price_1d['preclose'])
    tr['tr3'] = np.abs(price_1d['low'] - price_1d['preclose'])
    tr['r'] = tr[['tr1', 'tr2', 'tr3']].max(axis=1) / price_1d['preclose']
    return tr[['date', 'issue', 'r']]

### 平均单笔成交量

In [7]:
def avt_calc(price_1d:pd.DataFrame) -> pd.DataFrame:
    avt = price_1d[['date', 'issue']].copy()
    avt['r'] = np.sqrt(price_1d['volume'] / price_1d['numTrades'])
    return avt[['date', 'issue', 'r']]

### 平均单笔成交额

In [8]:
def avaluet_calc(price_1d:pd.DataFrame) -> pd.DataFrame:
    avaluet = price_1d[['date', 'issue']].copy()
    avaluet['r'] = np.sqrt(price_1d['value'] / price_1d['numTrades'])
    return avaluet[['date', 'issue', 'r']]

## 计算动量反转因子

In [9]:
label = 'avaluet'
r_calc = avaluet_calc
risk = r_calc(price_1d)
risk['risk'] = risk.groupby('issue')['r'].transform(risk_calc)

ret_risk = pd.merge(
    price_1d[['date', 'issue', 'excess']],
    risk[['date', 'issue', 'risk']],
    on=['date', 'issue'],
    how='outer'
)
# ret_risk = price_1d[['date', 'issue', 'excess']].copy()
# ret_risk['risk'] = -1

def rev_calc(ret_risk:pd.DataFrame, weight:np.array) -> pd.Series:
    rev = np.convolve(ret_risk['risk'] * ret_risk['excess'], w, mode='full')[:len(ret_risk)]
    return pd.Series(rev, index=ret_risk.index, name='rev')

m = 60
H = m / 2
w = 2 ** (-np.arange(m) / H)
w /= w.sum()
ret_risk['rev'] = ret_risk.groupby('issue', group_keys=False)[['excess', 'risk']].apply(rev_calc, weight=w)
# ret_risk['rev'] = ret_risk['excess'].mul(ret_risk['risk']).rolling(m, min_periods=1).sum()

In [10]:
os.makedirs('../data/factor_rev_1d/', exist_ok=True)
feather.write_dataframe(ret_risk, f'../data/factor_rev_1d/{label}_rev_1d.feather')