In [1]:
import os
os.environ['DB_URL'] = 'mysql+mysqlconnector://ultron:123456@127.0.0.1/ultron'
os.environ['IGNORE_WARNINGS'] = '0'

In [2]:
import pandas as pd

In [3]:
from jdw import SurfaceAPI



/root/ultron/2023-01-04.log


In [4]:
from ultron.factor.data.processing import factor_processing
from ultron.factor.data.standardize import standardize
from ultron.factor.data.winsorize import winsorize_normal
from ultron.factor.fitness.metrics import Metrics

In [5]:
### 指定因子
factor_columns = ['aiEtopZ180', 'aiDaPE60']
universe = 'zz500'
begin_date = '2021-01-01'
end_date = '2022-08-31'

### 自定义中性化
risk_styles = ['SIZE'
]

industry_styles = [
    'Bank', 'RealEstate', 'Health', 'Transportation', 'Mining', 'NonFerMetal',
    'HouseApp', 'LeiService', 'MachiEquip', 'BuildDeco', 'CommeTrade',
    'CONMAT', 'Auto', 'Textile', 'FoodBever', 'Electronics', 'Computer',
    'LightIndus', 'Utilities', 'Telecom', 'AgriForest', 'CHEM', 'Media',
    'IronSteel', 'NonBankFinan', 'ELECEQP', 'AERODEF', 'Conglomerates'
]

neutralized_styles =  risk_styles + industry_styles

#### 提取因子

In [6]:
factors_data = SurfaceAPI.StkFactors().universe_fetch(universe=SurfaceAPI.StkUniverse(universe), 
                      start_date=begin_date,end_date=end_date,columns=factor_columns)
factors_data.head()

Unnamed: 0,trade_date,code,aiDaPE60,aiEtopZ180
64500,2021-01-04,8,-1.6828,1.2471
64501,2021-01-04,9,-0.0575,0.7943
64502,2021-01-04,12,0.0224,-0.156
64503,2021-01-04,21,0.287,4.1238
64504,2021-01-04,27,-1.3536,-1.2306


#### 提取收益率

In [7]:
### 提取数据库中已存储的
yields_data = SurfaceAPI.StkYields().universe_fetch(universe=SurfaceAPI.StkUniverse(universe), 
                      start_date=begin_date,end_date=end_date, name='ret_f1r_cc')
yields_data.head()

Unnamed: 0,trade_date,code,nxt1_ret
0,2022-06-28,9,-0.066553
1,2022-06-29,9,0.030937
2,2022-06-30,9,0.032025
3,2022-07-01,9,-0.006425
4,2022-07-04,9,-0.007269


In [8]:
# horizon 指定累计收益率累计，offset 偏移日期
yields_data = SurfaceAPI.StkYields().fetch_returns(universe=SurfaceAPI.StkUniverse(universe), 
                      start_date=begin_date,end_date=end_date, horizon=1, offset=0) 
yields_data.head()

Unnamed: 0,trade_date,code,nxt1_ret
0,2021-01-04,8,0.061516
117,2021-01-04,9,-0.035333
519,2021-01-04,12,-0.057864
921,2021-01-04,21,-0.007708
1323,2021-01-04,27,-0.014486


#### 风格因子

In [9]:
risk_data = SurfaceAPI.RiskModel().universe_risk(universe=SurfaceAPI.StkUniverse(universe), 
                      start_date=begin_date,end_date=end_date)
risk_data.head()

Unnamed: 0,trade_date,code,srisk,BETA,MOMENTUM,SIZE,EARNYILD,RESVOL,GROWTH,BTOP,...,Telecom,AgriForest,CHEM,Media,IronSteel,NonBankFinan,ELECEQP,AERODEF,Conglomerates,COUNTRY
9000,2021-12-31,9,43.108,0.855,0.953,-0.348,-0.658,2.092,0.255,-0.739,...,0,0,0,0,0,0,0,0,1,1
9001,2021-12-31,12,37.03,0.374,0.497,-0.526,0.662,0.462,-0.198,-0.198,...,0,0,0,0,0,0,0,0,0,1
9002,2021-12-31,21,24.791,-0.947,-0.865,-0.713,-0.351,-0.488,-0.859,-0.169,...,0,0,0,0,0,0,0,0,0,1
9003,2021-12-31,27,27.388,-1.236,0.029,-0.317,1.135,0.721,0.198,0.935,...,0,0,0,0,0,0,0,0,0,1
9004,2021-12-31,28,18.25,-1.469,-1.115,-1.109,1.116,-1.346,-0.434,1.603,...,0,0,0,0,0,0,0,0,0,1


#### 提取行业信息

In [10]:
industry = SurfaceAPI.StkIndustry().universe_fetch(universe=SurfaceAPI.StkUniverse(universe), 
                      start_date=begin_date,end_date=end_date, category='sw', level='1')
industry.head()

Unnamed: 0,trade_date,code,industry_code,industry
0,2021-01-04,8,1030309,机械设备
1,2021-01-04,9,1030328,综合
2,2021-01-04,12,1030306,建筑材料
3,2021-01-04,21,1030312,电子
4,2021-01-04,27,1030318,公用事业


In [11]:
total_data = factors_data.merge(industry,on=['trade_date','code'])
total_data.head()

Unnamed: 0,trade_date,code,aiDaPE60,aiEtopZ180,industry_code,industry
0,2021-01-04,8,-1.6828,1.2471,1030309,机械设备
1,2021-01-04,9,-0.0575,0.7943,1030328,综合
2,2021-01-04,12,0.0224,-0.156,1030306,建筑材料
3,2021-01-04,21,0.287,4.1238,1030312,电子
4,2021-01-04,27,-1.3536,-1.2306,1030318,公用事业


##### 空值处理方式--行业中位数

In [12]:
def industry_median(factors_data):
    def _industry_median(standard_data, factor_name):
        median_values = standard_data[[
                'trade_date', 'industry_code', 'code', factor_name
            ]].groupby(['trade_date', 'industry_code']).median()[factor_name]

        median_values.name = factor_name + '_median'
        factor_data = standard_data[[
                'trade_date', 'industry_code', 'code', factor_name
            ]].merge(median_values.reset_index(),
                     on=['trade_date', 'industry_code'],
                     how='left')
        factor_data['standard_' +
                        factor_name] = factor_data[factor_name].mask(
                            pd.isnull(factor_data[factor_name]),
                            factor_data[factor_name + '_median'])
        return factor_data.drop(
                [factor_name + '_median'],
                axis=1).set_index(['trade_date', 'code', 'industry_code'])

    res = []
    standarad_cols = [
            'standard_' + col for col in factor_columns
        ]

    for col in factor_columns:
        rts = _industry_median(factors_data, col)
        res.append(rts)

    factors_data = pd.concat(res, axis=1)

    factors_data = factors_data.fillna(0)
    factors_data = factors_data.reset_index().set_index(
            ['trade_date', 'code'])
    factors_data = factors_data.drop(
            factor_columns, axis=1).rename(columns=dict(
                zip(standarad_cols, factor_columns)))
    return factors_data.reset_index()

In [13]:
factors_data = industry_median(total_data)
factors_data.head()

Unnamed: 0,trade_date,code,industry_code,aiEtopZ180,aiDaPE60
0,2021-01-04,8,1030309,1.2471,-1.6828
1,2021-01-04,9,1030328,0.7943,-0.0575
2,2021-01-04,12,1030306,-0.156,0.0224
3,2021-01-04,21,1030312,4.1238,0.287
4,2021-01-04,27,1030318,-1.2306,-1.3536


In [14]:
total_data = factors_data.merge(
    risk_data[['trade_date','code'] + neutralized_styles],on=['trade_date','code'])

total_data.head()

Unnamed: 0,trade_date,code,industry_code,aiEtopZ180,aiDaPE60,SIZE,Bank,RealEstate,Health,Transportation,...,Utilities,Telecom,AgriForest,CHEM,Media,IronSteel,NonBankFinan,ELECEQP,AERODEF,Conglomerates
0,2021-12-31,9,1030328,1.205,0.2961,-0.348,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,2021-12-31,12,1030306,-1.2135,-0.208,-0.526,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2021-12-31,21,1030312,-0.958,-0.0749,-0.713,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2021-12-31,27,1030318,-0.2918,-0.0125,-0.317,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,2021-12-31,28,1030317,-3.1514,-0.1026,-1.109,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
%%time
new_factors = factor_processing(total_data[factor_columns].values,
                 pre_process=[winsorize_normal,standardize],
                 risk_factors=total_data[neutralized_styles].values.astype(float),
                 post_process=[standardize], groups=total_data['trade_date'].values)
neufactors_data = pd.DataFrame(new_factors, columns=factor_columns, 
                 index=total_data.set_index(['trade_date','code']).index)
neufactors_data = neufactors_data.reset_index()

CPU times: user 956 ms, sys: 2.98 s, total: 3.93 s
Wall time: 233 ms


In [23]:
res = []
freq = [252, 52, 12]
hold = [1,3,5,7,10]
returns = yields_data.set_index(['trade_date','code'])['nxt1_ret'].unstack()
for factor in factor_columns:
    dt = factors_data.set_index(['trade_date','code'])[factor].unstack()
    neu_dt = neufactors_data.set_index(['trade_date','code'])[factor].unstack()
    for f in freq:
        for h in hold:
            score = Metrics(returns=returns, factors=dt, hold=h, freq=f).fit_metrics()
            neu_score = Metrics(returns=returns, factors=neu_dt, hold=h, freq=f).fit_metrics()
            res.append(score)
            res.append(neu_score)

factor fit metrics::100.0%