In [1]:
# 全局设置
import os
import datetime as dt

import numpy as np
import pandas as pd

import QuantStudio.api as QS
fd = QS.FactorDB.FactorTools
Factorize = QS.FactorDB.Factorize

In [2]:
# # 生成Demo数据
# TDB = QS.RiskDB.HDF5FRDB(sys_args={"主目录": "../Data/RiskData"})
# TDB.connect();

# SDB = QS.RiskDB.HDF5FRDB(sys_args={"主目录": "D:\\Data\\RiskData"})
# SDB.connect();

# # 生成Demo数据：BarraRiskData
# RT = SDB.getTable("BarraRiskData")
# CovDTs = RT.getDateTime()[-3:]
# IDs = RT.getID()[:500]
# FactorCov = RT.readFactorCov(dts=CovDTs)
# SpecificRisk = RT.readSpecificRisk(dts=CovDTs, ids=IDs)
# DTs = RT.getFactorReturnDateTime(start_dt=CovDTs[0]-dt.timedelta(31), end_dt=CovDTs[-1])
# FactorData = RT.readFactorData(dts=DTs, ids=IDs)
# FactorReturn = RT.readFactorReturn(dts=DTs)
# SpecificReturn = RT.readSpecificReturn(dts=DTs, ids=IDs)
# for iDT in DTs:
#     if iDT in CovDTs:
#         TDB.writeData(table_name="BarraRiskData", idt=iDT, 
#                       factor_data=FactorData.loc[:, iDT], factor_cov=FactorCov.loc[iDT], 
#                       specific_risk=SpecificRisk.loc[iDT], factor_ret=FactorReturn.loc[iDT], 
#                       specific_ret=SpecificReturn.loc[iDT])
#     else:
#         TDB.writeData(table_name="BarraRiskData", idt=iDT, 
#                       factor_data=FactorData.loc[:, iDT], factor_cov=None, 
#                       specific_risk=None, factor_ret=FactorReturn.loc[iDT], 
#                       specific_ret=SpecificReturn.loc[iDT])

# # 删除多余的表
# for iTableName in TDB.TableNames:
#     if iTableName not in ['BarraRiskData']:
#         TDB.deleteTable(iTableName)

# 风险库

In [2]:
# 创建风险库
# RDB = QS.RiskDB.HDF5FRDB(sys_args={"主目录": "../Data/RiskData"}).connect()
RDB = QS.RiskDB.HDF5FRDB(config_file="../config/HDF5FRDBConfig.json").connect()

print(RDB.Args)

{'名称': 'HDF5FRDB', '主目录': '../Data/RiskData'}


## 风险表列表

In [3]:
# 风险库中的所有风险表列表
RDB.TableNames

['BarraRiskData']

# 风险表

## 创建风险表

getTable(table_name, args={}):
* table_name: str, 风险表名
* args: dict, 指定的参数集
* 返回: 风险表对象

In [4]:
# 获取风险库中的某个风险表
RT = RDB.getTable("BarraRiskData", args={})
print(RT.Name)
print(RT.Args)

BarraRiskData
{'遍历模式': {'向前缓冲时点数': 12, '向后缓冲时点数': 0, '缓冲区大小': 300, '遍历时点': [], '遍历ID': []}}


## 元信息

getMetaData(key=None, args={}):
* key: str 或者 None, 元信息字段名
* args: dict, 临时修改的参数集
* 返回: 如果 key=None, Series(index=[字段]); 否则元信息值

In [5]:
# 读取风险表的所有元信息
print(RT.getMetaData())

Series([], dtype: float64)


In [6]:
# 读取风险表的某个元信息
print(RT.getMetaData(key="Description"))

None


## 时点序列

getDateTime(start_dt=None, end_dt=None):
* start_dt: datetime 或者 None, 起始时点
* end_dt: datetime 或者 None, 终止时点
* 返回: list(datetime)

In [7]:
# 获取风险表的时点序列
DTs = RT.getDateTime()
print(DTs[0], " - ", DTs[-1])

2018-07-31 00:00:00  -  2018-09-28 00:00:00


In [8]:
# 给定起始时点和截止时点, 获取风险表的时点序列
RT.getDateTime(start_dt=dt.datetime(2018,8,15), end_dt=dt.datetime(2018,12,1))

[datetime.datetime(2018, 8, 31, 0, 0), datetime.datetime(2018, 9, 28, 0, 0)]

## ID 序列

getID(idt=None):
* idt: datetime 或者 None, 目标时点
* 返回: list(str)

In [9]:
# 获取风险表的 ID 序列
IDs = RT.getID()
print(IDs[0], ", ..., ", IDs[-1])

000001.SZ , ...,  000990.SZ


In [10]:
# 给定目标因子,目标时点, 获取因子表中指定因子指定时点的 ID 序列
IDs = RT.getID(idt=dt.datetime(2018, 7, 31))
print(IDs[0], ", ..., ", IDs[-1])

000001.SZ , ...,  000990.SZ


## 读取数据

readCov(dts, ids=None): 读取风险数据(协方差矩阵)
* dts: list(datetime), 待读取的时点列表
* ids: list(str) 或者 None, 待读取的 ID 列表, None 表示所有 ID
* 返回: Panel(items=[时点], major_axis=[ID], minor_axis=[ID])

readCorr(dts, ids=None): 读取风险数据(相关系数矩阵)
* dts: list(datetime), 待读取的时点列表
* ids: list(str) 或者 None, 待读取的 ID 列表, None 表示所有 ID
* 返回: Panel(items=[时点], major_axis=[ID], minor_axis=[ID])

In [11]:
# 给定时点列表, ID 列表, 获取风险数据
DTs = RT.getDateTime()
Data = RT.readCov(dts=DTs, ids=["000001.SZ", "000002.SZ", "000058.SZ", "000990.SZ"])
# Data = RT.readCorr(dts=DTs, ids=["000001.SZ", "000002.SZ", "000058.SZ", "000990.SZ"])
print("三维数据 : ")
print(Data)

iDT = dt.datetime(2018, 7, 31)
print(f"时点切片 : {iDT}")
print(Data.loc[iDT])

iID = "000001.SZ"
print(f"ID 切片 : {iID}")
print(Data.loc[:, iID, iID])

三维数据 : 
<class 'QuantStudio.Tools.QSObjects.Panel'>
Dimensions: 3 (items) x 4 (major_axis) x 4 (minor_axis)
Items axis: 2018-07-31 00:00:00 to 2018-09-28 00:00:00
Major_axis axis: 000001.SZ to 000990.SZ
Minor_axis axis: 000001.SZ to 000990.SZ
时点切片 : 2018-07-31 00:00:00
           000001.SZ  000002.SZ  000058.SZ  000990.SZ
000001.SZ   0.007933   0.003463   0.003860   0.001773
000002.SZ   0.003463   0.013027   0.005020   0.001881
000058.SZ   0.003860   0.005020   0.012819   0.002399
000990.SZ   0.001773   0.001881   0.002399   0.006138
ID 切片 : 000001.SZ
2018-07-31    0.007933
2018-08-31    0.008083
2018-09-28    0.007616
dtype: float64


# 多因子风险表

## 因子列表

In [12]:
# 获取风险表中的所有因子列表
print(RT.FactorNames)

['Aerospace and Defense', 'Airlines', 'Automobiles and Components', 'Banks', 'Beta', 'Beverages', 'BookToPrice', 'Building Products', 'Chemicals', 'Commercial and Professional Services', 'Construction Materials', 'Construction and Engineering', 'Diversified Financial Services', 'Diversified Metals', 'EarningsYield', 'Electrical Equipment', 'Energy', 'Food Products', 'Food Staples Retail Household Personal Prod', 'Growth', 'Hardware and Semiconductors', 'Health', 'Hotels Restaurants and Leisure', 'Household Durables (non-Homebuilding)', 'Industrial Conglomerates', 'Industrial Machinery', 'Leisure Products Textiles Apparel and Luxury', 'Leverage', 'Liquidity', 'Marine', 'Market', 'Materials', 'Media', 'Momentum', 'NonlinearSize', 'Real Estate', 'ResidualVolatility', 'Retail', 'Road Rail and Transportation Infrastructure', 'Size', 'Software', 'Trading Companies and Distributors', 'Utilities']


## 时点序列

### 因子收益时点序列

getFactorReturnDateTime(start_dt=None, end_dt=None):
* start_dt: datetime 或者 None, 起始时点
* end_dt: datetime 或者 None, 终止时点
* 返回: list(datetime)

In [13]:
# 获取风险表因子收益的时点序列
DTs = RT.getFactorReturnDateTime(dt.datetime(2018,8,15), end_dt=dt.datetime(2018,12,1))
print(DTs[0], " - ", DTs[-1])

2018-08-15 00:00:00  -  2018-09-28 00:00:00


### 特异性收益时点序列

getSpecificReturnDateTime(start_dt=None, end_dt=None):
* start_dt: datetime 或者 None, 起始时点
* end_dt: datetime 或者 None, 终止时点
* 返回: list(datetime)

In [14]:
# 获取风险表特异性收益的时点序列
DTs = RT.getSpecificReturnDateTime(start_dt=dt.datetime(2018,8,15), end_dt=dt.datetime(2018,12,1))
print(DTs[0], " - ", DTs[-1])

2018-08-15 00:00:00  -  2018-09-28 00:00:00


## 读取数据

### 因子协方差阵

readFactorCov(dts):
* dts: list(datetime), 待读取的时点列表
* 返回: Panel(items=[时点], major_axis=[因子], minor_axis=[因子])

In [15]:
# 给定时点列表, 获取因子风险数据
DTs = RT.getDateTime()
Data = RT.readFactorCov(dts=DTs)
print("三维数据 : ")
print(Data)

iDT = dt.datetime(2018, 7, 31)
print(f"时点切片 : {iDT}")
print(Data.loc[iDT].iloc[:5, :5])

三维数据 : 
<class 'QuantStudio.Tools.QSObjects.Panel'>
Dimensions: 3 (items) x 43 (major_axis) x 43 (minor_axis)
Items axis: 2018-07-31 00:00:00 to 2018-09-28 00:00:00
Major_axis axis: Market to Utilities
Minor_axis axis: Market to Utilities
时点切片 : 2018-07-31 00:00:00
                      Market      Size      Beta  Momentum  ResidualVolatility
Market              0.002345 -0.000134  0.000511 -0.000201            0.000320
Size               -0.000134  0.000085 -0.000042  0.000043           -0.000042
Beta                0.000511 -0.000042  0.000198 -0.000059            0.000096
Momentum           -0.000201  0.000043 -0.000059  0.000101           -0.000025
ResidualVolatility  0.000320 -0.000042  0.000096 -0.000025            0.000079


### 特异性风险

readSpecificRisk(dts, ids=None):
* dts: list(datetime), 待读取的时点列表
* ids: list(str) 或者 None, 待读取的ID列表, None 表示所有 ID
* 返回: DataFrame(index=[时点], columns=[ID])

In [16]:
# 读取特异性风险
DTs = RT.getDateTime()
Data = RT.readSpecificRisk(dts=DTs)
print(Data.iloc[:, :5])

            000001.SZ  000002.SZ  000003.SZ  000004.SZ  000005.SZ
2018-07-31   0.061979   0.085968        NaN   0.101172   0.067750
2018-08-31   0.066042   0.086336        NaN   0.103598   0.064798
2018-09-28   0.063228   0.082736        NaN   0.099805   0.062178


### 因子数据

readFactorData(dts, ids=None):
* dts: list(datetime), 待读取的时点列表
* ids: list(str) 或者 None, 待读取的ID列表, None 表示所有 ID
* 返回: Panel(items=[因子], major_axis=[时点], minor_axis=[ID])

In [17]:
# 读取截面因子数据
DTs = RT.getFactorReturnDateTime()
Data = RT.readFactorData(dts=DTs)
print("三维数据 : ")
print(Data)

iDT = dt.datetime(2018, 7, 31)
print(f"时点切片 : {iDT}")
print(Data.loc[:, iDT].iloc[:5, :4])

三维数据 : 
<class 'QuantStudio.Tools.QSObjects.Panel'>
Dimensions: 43 (items) x 64 (major_axis) x 500 (minor_axis)
Items axis: Market to Utilities
Major_axis axis: 2018-07-02 00:00:00 to 2018-09-28 00:00:00
Minor_axis axis: 000001.SZ to 000990.SZ
时点切片 : 2018-07-31 00:00:00
           Market      Size      Beta  Momentum
000001.SZ     1.0  0.874437  0.421393 -0.633064
000002.SZ     1.0  1.362186  0.652622 -0.213705
000003.SZ     1.0       NaN       NaN       NaN
000004.SZ     1.0 -3.393355 -1.176095 -0.918994
000005.SZ     1.0 -3.199509  0.167393 -1.770191


### 因子收益率

readFactorReturn(dts):
* dts: list(datetime), 待读取的时点列表
* 返回: DataFrame(index=[时点], columns=[因子])

In [18]:
# 读取因子收益率
DTs = RT.getFactorReturnDateTime()
Data = RT.readFactorReturn(dts=DTs)
print(Data.iloc[:10, :5])

              Market      Size      Beta  Momentum  ResidualVolatility
2018-07-02 -0.021136 -0.003471 -0.003187  0.001284            0.002730
2018-07-03  0.003245 -0.002876  0.005124 -0.002341           -0.000578
2018-07-04 -0.013776  0.000824 -0.003769 -0.000280           -0.001183
2018-07-05 -0.015353  0.004563 -0.005968 -0.000334           -0.004394
2018-07-06  0.004116  0.001232  0.002294 -0.001037           -0.000923
2018-07-09  0.024431  0.001638  0.008059  0.003069            0.000564
2018-07-10  0.004891 -0.000890  0.003299  0.001223           -0.001186
2018-07-11 -0.017712  0.001920 -0.003708  0.003333           -0.003930
2018-07-12  0.025299  0.000598  0.006507  0.000165            0.001898
2018-07-13  0.003934  0.000280  0.003052  0.002448           -0.000169


### 特异性收益率

readSpecificReturn(dts, ids=None):
* dts: list(datetime), 待读取的时点列表
* ids: list(str) 或者 None, 待读取的ID列表, None 表示所有 ID
* 返回: DataFrame(index=[时点], columns=[ID])

In [19]:
# 读取特异性收益率
DTs = RT.getSpecificReturnDateTime()
Data = RT.readSpecificReturn(dts=DTs)
print(Data.iloc[:10, :5])

            000001.SZ  000002.SZ  000003.SZ  000004.SZ  000005.SZ
2018-07-02  -0.018507  -0.032144        NaN  -0.026324  -0.011967
2018-07-03  -0.006278   0.030201        NaN   0.012241  -0.003303
2018-07-04   0.004347   0.000288        NaN   0.002606   0.017238
2018-07-05   0.003699   0.013294        NaN   0.004203  -0.000689
2018-07-06  -0.001462  -0.000165        NaN  -0.001477   0.005163
2018-07-09   0.002846   0.001136        NaN  -0.027490   0.001318
2018-07-10  -0.008561  -0.006429        NaN  -0.012560  -0.001769
2018-07-11  -0.000336  -0.007929        NaN   0.015468  -0.003026
2018-07-12   0.000300  -0.009165        NaN   0.004866  -0.001185
2018-07-13   0.003241  -0.012005        NaN  -0.029130   0.015952


# 数据写入

## 风险库

writeData(self, table_name, idt, icov, **kwargs):
* table_name: str, 要存入的风险表名称;
* idt: datetime, 风险数据对应的时点, 
* icov: DataFrame, 风险数据

## 多因子风险库

writeData(self, table_name, idt, factor_data=None, factor_cov=None, specific_risk=None, factor_ret=None, specific_ret=None, **kwargs):
* table_name: str, 要存入的风险表名称;
* idt: datetime, 风险数据对应的时点, 
* factor_data: DataFrame, 因子数据,
* factor_cov: DataFrame, 因子风险数据,
* specific_risk: Series, 特异性风险数据,
* factor_ret: DataFrame, 因子收益数据,
* specific_ret: DataFrame, 特异性收益数据,

In [20]:
# 数据写入
RT = RDB.getTable("BarraRiskData")
CovDTs = RT.getDateTime()[-2:]
IDs = RT.getID()[:5]
FactorCov = RT.readFactorCov(dts=CovDTs)
print("待写入的因子风险数据 : ")
print(FactorCov.iloc[:4, :4])
SpecificRisk = RT.readSpecificRisk(dts=CovDTs, ids=IDs)
print("待写入的特异性风险数据 : ")
print(SpecificRisk)
DTs = RT.getFactorReturnDateTime(start_dt=CovDTs[0]-dt.timedelta(31), end_dt=CovDTs[-1])[:10]
FactorData = RT.readFactorData(dts=DTs, ids=IDs)
print("待写入的因子数据 : ")
print(FactorData)
FactorReturn = RT.readFactorReturn(dts=DTs)
print("待写入的因子收益数据 : ")
print(FactorReturn.iloc[:, :4])
SpecificReturn = RT.readSpecificReturn(dts=DTs, ids=IDs)
print("待写入的特异性收益数据 : ")
print(SpecificReturn)
for iDT in DTs:
    if iDT in CovDTs:
        RDB.writeData(table_name="TestTable", idt=iDT, 
                      factor_data=FactorData.loc[:, iDT], factor_cov=FactorCov.loc[iDT], 
                      specific_risk=SpecificRisk.loc[iDT], factor_ret=FactorReturn.loc[iDT], 
                      specific_ret=SpecificReturn.loc[iDT])
    else:
        RDB.writeData(table_name="TestTable", idt=iDT, 
                      factor_data=FactorData.loc[:, iDT], factor_cov=None, 
                      specific_risk=None, factor_ret=FactorReturn.loc[iDT], 
                      specific_ret=SpecificReturn.loc[iDT])

print("写入后的风险表 : ")
print(RDB.TableNames)

待写入的因子风险数据 : 
<class 'QuantStudio.Tools.QSObjects.Panel'>
Dimensions: 2 (items) x 4 (major_axis) x 43 (minor_axis)
Items axis: 2018-08-31 00:00:00 to 2018-09-28 00:00:00
Major_axis axis: Market to Momentum
Minor_axis axis: Market to Utilities
待写入的特异性风险数据 : 
            000001.SZ  000002.SZ  000003.SZ  000004.SZ  000005.SZ
2018-08-31   0.066042   0.086336        NaN   0.103598   0.064798
2018-09-28   0.063228   0.082736        NaN   0.099805   0.062178
待写入的因子数据 : 
<class 'QuantStudio.Tools.QSObjects.Panel'>
Dimensions: 43 (items) x 10 (major_axis) x 5 (minor_axis)
Items axis: Market to Utilities
Major_axis axis: 2018-07-31 00:00:00 to 2018-08-13 00:00:00
Minor_axis axis: 000001.SZ to 000005.SZ
待写入的因子收益数据 : 
              Market      Size      Beta  Momentum
2018-07-31  0.000289 -0.000465  0.000469  0.000139
2018-08-01 -0.017273 -0.000239 -0.002445  0.001304
2018-08-02 -0.020522  0.000783 -0.003595  0.003322
2018-08-03 -0.016425 -0.001145 -0.003346 -0.004359
2018-08-06 -0.020084 -0.00037

# 风险库其他操作

## 设置表的元信息

setTableMetaData(table_name, key=None, value=None, meta_data=None):
* table_name: str, 风险表名
* key: str 或者 None, 元信息字段名
* value: str 或者 None, 元信息字段值
* meta_data: dict 或者 None, 元信息集

In [21]:
# 设置表的元信息
TargetTable = "TestTable"
RT = RDB.getTable(TargetTable)
print("设置前的元信息 : ")
print(RT.getMetaData())
RDB.setTableMetaData(table_name=TargetTable, key="Description", value="这是一张测试表")
print("设置后的元信息 : ")
print(RT.getMetaData())

设置前的元信息 : 
Series([], dtype: float64)
设置后的元信息 : 
Description    这是一张测试表
dtype: object


## 重命名表

renameTable(old_table_name, new_table_name):
* old_table_name: str, 旧风险表名
* new_table_name: str, 新风险表名

In [22]:
# 重命名表
print("重命名前风险表 : ")
print(RDB.TableNames)
RDB.renameTable(old_table_name="TestTable", new_table_name="TestTable_New")
print("重命名后风险表 : ")
print(RDB.TableNames)

重命名前风险表 : 
['BarraRiskData', 'TestTable']
重命名后风险表 : 
['BarraRiskData', 'TestTable_New']


## 删除表

deleteTable(table_name):
* table_name: str, 风险表名

In [23]:
# 删除表
print("删除前风险表 : ")
print(RDB.TableNames)
RDB.deleteTable(table_name="TestTable_New")
print("删除后因子表 : ")
print(RDB.TableNames)

删除前风险表 : 
['BarraRiskData', 'TestTable_New']
删除后因子表 : 
['BarraRiskData']
