In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import pickle,itertools,sys,pdb,sqlalchemy
from sqlalchemy import select, and_, or_, MetaData, delete
from PyFin.api import advanceDateByCalendar, bizDatesList, makeSchedule
from PyFin.api import isBizDay
from alphamind.data.dbmodel.models import *
from model import *
from ultron.factor.settlement.weighted import Weighted
import warnings
warnings.filterwarnings("ignore")

In [2]:
import seaborn
seaborn.set_style('whitegrid')
%matplotlib inline

In [3]:
engine = sqlalchemy.create_engine('postgresql+psycopg2://alpha:alpha@127.0.0.1:8889/alpha')

In [4]:
begin = dt.datetime(2018, 1, 1)
end = dt.datetime(2019, 8, 28)

In [5]:
risk_styles = ['BETA','MOMENTUM','SIZE','EARNYILD','RESVOL','GROWTH','BTOP','LEVERAGE','LIQUIDTY','SIZENL']
industry_styles = ['Bank','RealEstate','Health','Transportation','Mining',
                                 'NonFerMetal','HouseApp','LeiService','MachiEquip','BuildDeco',
                                 'CommeTrade','CONMAT','Auto','Textile','FoodBever','Electronics',
                                 'Computer','LightIndus','Utilities','Telecom','AgriForest','CHEM',
                                 'Media','IronSteel','NonBankFinan','ELECEQP','AERODEF','Conglomerates']

In [6]:
# universe data
query = select([Universe.trade_date, Universe.code,
                Universe.hs300, Universe.zz500, Universe.zz1000]).where(
    and_(
        Universe.trade_date >= begin,
        Universe.trade_date <= end,
    ))
univ_df = pd.read_sql(query, engine)
univ_df = univ_df.set_index(['trade_date', 'code']).sort_index()

In [7]:
# risk exposure data
query = select([RiskExposure]).where(
    and_(
        RiskExposure.trade_date >= begin,
        RiskExposure.trade_date <= end,
    ))
risk_total_df = pd.read_sql(query, engine)
risk_total_df = risk_total_df.set_index(['trade_date', 'code']).sort_index()

In [8]:
# factor data
query = select([Experimental.trade_date, Experimental.code, Experimental.CFinc1, 
                Experimental.ivr_day, Experimental.roe_q, Experimental.idl_mtm_20]).where(
    and_(
        Experimental.trade_date >= begin,
        Experimental.trade_date <= end,
    ))
factor_total_df = pd.read_sql(query, engine)
factor_total_df = factor_total_df.set_index(['trade_date', 'code']).sort_index()

In [9]:
# market data
query = select([Market.trade_date, Market.code, Market.accumAdjFactor,
                Market.closePrice, Market.openPrice, Market.chgPct]).where(
    and_(
        Market.trade_date >= begin,
        Market.trade_date <= end
    ))
mkt_df = pd.read_sql(query, engine)
mkt_df = mkt_df[mkt_df.code<700000]
mkt_df = mkt_df.sort_values(['trade_date', 'code'])
for price in ['closePrice', 'openPrice']:
    mkt_df[price] = mkt_df[price] * mkt_df['accumAdjFactor']

In [17]:
horizon = 5
#universe = 'zz500'
neu_styles = risk_styles + industry_styles
date_list = bizDatesList('China.SSE', begin, end)[0:-1:horizon]

In [21]:
# universe
univ_se = univ_df.loc[date_list]
univ_se = univ_se[univ_se>0]

# risk exposure
risk_df = risk_total_df.reindex(univ_se.index)[neu_styles+['COUNTRY']]
risk_df.dropna(inplace=True)
univ_se = univ_se.loc[risk_df.index]

# forward return
# use close price
price_tb = mkt_df.set_index(['trade_date', 'code'])['closePrice'].unstack()
return_tb = (price_tb.shift(-horizon) / price_tb - 1.0)

return_se = return_tb.stack().reindex(univ_se.index)

factor_se = factor_total_df.reindex(univ_se.index)

In [22]:
total_data = factor_se.reset_index().merge(return_se.reset_index(), on=['trade_date','code']).merge(
    univ_se, on=['trade_date','code']).rename(columns={0:'ret'}).merge(risk_df.reset_index(),
    on=['trade_date','code'])

In [23]:
total_data.to_pickle('total_data.pkl')