## 准备数据

In [1]:
from zipline.pipeline.factors import CustomFactor
from zipline.pipeline.data import CNEquityPricing

import numpy as np
import pandas as pd
import scipy.stats as stats

from zipline.research import get_pricing

In [2]:
import alphalens as al

from zipline.pipeline.builtin import QTradableStocksUS

from zipline.research import run_pipeline
from zipline.pipeline import Pipeline
from zipline.pipeline.builtin import Sector 
from pathlib import Path

In [3]:
start_date = '2018-01-01'
end_date = '2020-05-31'

In [4]:
def _slope(ts, x=None):
    if x is None:
        x = np.arange(len(ts))
    log_ts = np.log(ts)
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, log_ts)
    return slope


class MyFactor(CustomFactor):
    """
    12 months Momentum
    Run a linear regression over one year (252 trading days) stocks log returns
    and the slope will be the factor value
    """
    inputs = [CNEquityPricing.close]
    window_length = 252

    def compute(self, today, assets, out, close):
        x = np.arange(len(close))
        slope = np.apply_along_axis(_slope, 0, close, x.T)
        out[:] = slope

In [5]:
universe = QTradableStocksUS()

In [6]:
factors_fp = Path('factors.pkl')
prices_fp = Path('prices.pkl')
factor_data_fp = Path('factor_data.pkl')

In [7]:
if not factors_fp.exists():
    pipe = Pipeline(
        columns={
            'MyFactor': MyFactor(mask=universe),
            # optional, useful to compute individual sector statistics
            'Sector': Sector(mask=universe),
        },
        screen=universe
    )
    # chunksize is optional
    factors = run_pipeline(pipe, start_date, end_date)
    factors = factors.dropna()
    factors.to_pickle(str(factors_fp))

factors = pd.read_pickle(str(factors_fp))
print(factors.head())

                                        MyFactor  Sector
datetime                  asset                         
2018-01-02 00:00:00+00:00 平安银行(000001)  0.001793     103
                          万 科Ａ(000002)  0.001976     104
                          国农科技(000004) -0.002881     206
                          世纪星源(000005) -0.002053     310
                          深振业Ａ(000006)  0.000601     104


In [8]:
asset_list = factors.index.levels[1].unique()

In [9]:
if not prices_fp.exists():
    from zipline.research import get_pricing
    prices = get_pricing(asset_list, start_date=start_date, end_date=end_date, fields='open')
    prices.to_pickle(str(prices_fp))
prices = pd.read_pickle(str(prices_fp)) 
prices.head()

Unnamed: 0_level_0,平安银行(000001),万 科Ａ(000002),国农科技(000004),世纪星源(000005),深振业Ａ(000006),全新好(000007),神州高铁(000008),中国宝安(000009),*ST美丽(000010),深物业A(000011),...,嘉元科技(688388),普门科技(688389),华润微(688396),赛特新材(688398),硕世生物(688399),金科环境(688466),奥特维(688516),吉贝尔(688566),凌志软件(688588),金博股份(688598)
open,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-02 00:00:00+00:00,12.783,29.101,22.29,4.15,9.226,16.66,8.611,6.013,5.38,15.314,...,,,,,,,,,,
2018-01-03 00:00:00+00:00,13.147,30.072,22.42,4.35,9.226,16.66,8.631,6.054,5.47,15.478,...,,,,,,,,,,
2018-01-04 00:00:00+00:00,12.755,30.313,23.8,4.27,9.226,16.66,8.582,6.22,5.55,15.605,...,,,,,,,,,,
2018-01-05 00:00:00+00:00,12.649,30.516,23.23,4.26,9.226,16.66,8.542,6.327,5.48,15.614,...,,,,,,,,,,
2018-01-08 00:00:00+00:00,12.688,32.487,23.11,4.29,9.226,16.66,8.404,6.245,5.63,16.123,...,,,,,,,,,,


## 格式化数据

In [10]:
sector_labels = Sector.SECTOR_NAMES # dict(Sector.SECTOR_NAMES)
sector_labels[-1] = "未知" # no dataset is perfect, better handle the unexpected
print(sector_labels)

{101: '基本材料', 102: '主要消费', 103: '金融服务', 104: '房地产', 205: '可选消费', 206: '医疗保健', 207: '公用事业', 308: '通讯服务', 309: '能源', 310: '工业领域', 311: '工程技术', -1: '未知'}


In [11]:
if not factor_data_fp.exists():
    factor_data = al.utils.get_clean_factor_and_forward_returns(
        factor=factors["MyFactor"],
        prices=prices,
        groupby=factors["Sector"],    # optional, useful to compute group statistics (e.g. sector)
        groupby_labels=sector_labels, # optional, use labels instead of numeric group information
        quantiles=5,
        periods=(1, 5, 10))
    factor_data.to_pickle(str(factor_data_fp))
factor_data = pd.read_pickle(str(factor_data_fp))

In [12]:
factor_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,1D,5D,10D,factor,group,factor_quantile
date,asset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-02 00:00:00+00:00,平安银行(000001),0.028475,-0.029179,0.06141,0.001793,金融服务,5
2018-01-02 00:00:00+00:00,万 科Ａ(000002),0.033367,0.132916,0.160579,0.001976,房地产,5
2018-01-02 00:00:00+00:00,国农科技(000004),0.005832,0.024675,-0.044863,-0.002881,医疗保健,1
2018-01-02 00:00:00+00:00,世纪星源(000005),0.048193,0.048193,0.016867,-0.002053,工业领域,1
2018-01-02 00:00:00+00:00,深振业Ａ(000006),0.0,0.0,0.0,0.000601,房地产,5


## tears

### create_summary_tear_sheet

In [13]:
# al.tears.create_summary_tear_sheet(factor_data)

### create_returns_tear_sheet

In [14]:
# al.tears.create_returns_tear_sheet(factor_data, by_group=True)

### create_information_tear_sheet

In [15]:
# al.tears.create_information_tear_sheet(factor_data, by_group=False)

### create_turnover_tear_sheet

In [16]:
# al.tears.create_turnover_tear_sheet(factor_data)

### create_full_tear_sheet

In [17]:
# al.tears.create_full_tear_sheet(factor_data, by_group=True)

In [18]:
class HistoricalClose(CustomFactor):
    inputs = [CNEquityPricing.close]
    window_length = 2
    
    def compute(self, today, assets, out, close):
        out[:] = close[0]

event_mask = (CNEquityPricing.close.latest < 5) & (HistoricalClose(mask=universe, window_length=2) >= 5)

In [19]:
pipe = Pipeline(
    columns={
        'my_event': event_mask,
        'Sector': Sector(mask=universe),
    },
    screen=event_mask
)

events = run_pipeline(pipe, start_date, end_date)



In [20]:
# events[:] = -1.0 # as this is negative Alphalens will trade the event with short positions
events.loc[:, 'my_event'] = np.random.uniform(-0.1,0.1,len(events)) # 5
events.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,my_event,Sector
datetime,asset,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-04 00:00:00+00:00,金科股份(000656),0.05639,104
2018-01-04 00:00:00+00:00,中原高速(600020),-0.097632,310
2018-01-05 00:00:00+00:00,唐山港(601000),0.011955,310
2018-01-05 00:00:00+00:00,大智慧(601519),-0.064485,103
2018-01-08 00:00:00+00:00,山东墨龙(002490),-0.063315,309


In [21]:
asset_list = events.index.levels[1].unique()

In [22]:
prices = get_pricing(asset_list, start_date=start_date,
                     end_date=end_date, fields='open')

In [23]:
event_data = al.utils.get_clean_factor_and_forward_returns(events['my_event'],
                                                           prices,
#                                                            quantiles=None,
                                                           groupby=events["Sector"],
                                                           groupby_labels=sector_labels,
                                                           periods=(
                                                               1, 2, 3, 4, 5),
                                                           filter_zscore=None)

Dropped 1.3% entries from factor data: 0.8% in forward returns computation and 0.4% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!


In [24]:
event_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,1D,2D,3D,4D,5D,factor,group,factor_quantile
date,asset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-04 00:00:00+00:00,金科股份(000656),0.010174,0.046575,0.046575,0.046575,0.024192,0.05639,房地产,5.0
2018-01-04 00:00:00+00:00,中原高速(600020),0.001927,0.00985,0.019914,0.00985,0.003854,-0.097632,工业领域,1.0
2018-01-05 00:00:00+00:00,唐山港(601000),-0.007934,0.0,-0.020246,-0.028181,-0.012038,0.011955,工业领域,5.0
2018-01-05 00:00:00+00:00,大智慧(601519),0.01217,0.002028,0.018256,0.014199,-0.004057,-0.064485,金融服务,1.0
2018-01-08 00:00:00+00:00,山东墨龙(002490),-0.008081,-0.010101,-0.038384,-0.040404,-0.046465,-0.063315,能源,1.0


### create_event_returns_tear_sheet

In [25]:
al.tears.create_event_returns_tear_sheet(factor_data, prices)

### create_event_study_tear_sheet

In [26]:
al.tears.create_event_study_tear_sheet(event_data,
                                       prices,
                                       avgretplot=(3, 10),
                                       rate_of_ret=False,
                                       n_bars=100)