In [1]:
import warnings
import os 
from pathlib import Path
import pandas as pd 
from logbook import Logger, StderrHandler, INFO, WARNING
import numpy as np 

from zipline import run_algorithm
from zipline.api import (attach_pipeline, pipeline_output,
                         date_rules, time_rules, record,
                         schedule_function, commission, slippage,
                         set_slippage, set_commission, set_max_leverage,
                         order_target, order_target_percent,
                         get_open_orders, cancel_order, set_benchmark, symbol,
                         set_long_only
                        )
from zipline.data import bundles
from zipline.utils.run_algo import load_extensions
from zipline.pipeline import Pipeline, CustomFactor
from zipline.pipeline.data import Column, DataSet
from zipline.pipeline.domain import KR_EQUITIES
from zipline.pipeline.filters import StaticAssets
from zipline.pipeline.factors import AverageDollarVolume, Returns
from zipline.pipeline.loaders import KREquityPricingLoader
from zipline.pipeline.loaders.frame import DataFrameLoader
from trading_calendars import get_calendar

import pyfolio as pf
from pyfolio.plotting import plot_rolling_returns, plot_rolling_sharpe
from pyfolio.timeseries import forecast_cone_bootstrap

In [2]:
warnings.filterwarnings('ignore')

## Load Zipline Extension

In [3]:
os.environ['ZIPLINE_ROOT'] = 'C:/Users/PC/.zipline/'

In [4]:
load_extensions(default=True,
                extensions=[],
                strict=True,
                environ=None)

In [5]:
log_handler = StderrHandler(format_string='[{record.time:%Y-%m-%d %H:%M:%S.%f}]: ' +
                            '{record.level_name}: {record.func_name}: {record.message}',
                            level=WARNING)
log_handler.push_application()
log = Logger('Algorithm')

## Algo Params

In [6]:
N_LONGS = 20
VOL_SCREEN = 650

## Load DataReader Bundles

In [7]:
bundle_data = bundles.load('fnguide')

In [8]:
idx = pd.IndexSlice

In [9]:
def load_factor(bundle):
    with pd.HDFStore('../factor.h5') as store: 
        factor = (store['value/per']
                   .loc[idx['2000':'2021', :], :]
                  )
        factor = factor[factor['Ratio'] > 0]
    tickers = factor.index.get_level_values('ticker').unique().tolist()
    
    assets = []
    for ticker in tickers:
        try:
            asset = bundle.asset_finder.lookup_symbol(ticker, as_of_date=None)
            assets.append(asset)
        except Exception as e:
            print(e)
    factor_sids = pd.Int64Index([asset.sid for asset in assets])
    ticker_map = dict(zip(tickers, factor_sids))
    return (factor
            .unstack('ticker')
            .rename(columns=ticker_map)
            .Ratio
            .tz_localize('UTC')
            .fillna(method='ffill')
           ), assets

In [12]:
per, assets = load_factor(bundle_data)

In [13]:
per 

ticker,1,4,5,9,11,20,21,27,30,31,...,1144,1143,1101,1145,530,1133,1148,1149,1150,1140
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03 00:00:00+00:00,96.124528,8.285322,2.043615,9.026088,49.674788,5.970759,21.102467,8.218231,10.119501,16.016901,...,,,,,,,,,,
2000-01-04 00:00:00+00:00,110.526282,7.788314,2.145796,9.806240,54.531879,6.293135,23.126054,8.134625,11.269223,16.255959,...,,,,,,,,,,
2000-01-05 00:00:00+00:00,117.224534,7.589387,2.467671,9.624114,55.635763,6.221395,23.993570,7.858763,10.686340,16.016901,...,,,,,,,,,,
2000-01-06 00:00:00+00:00,101.818811,7.920725,2.388475,8.993712,52.544887,6.057029,22.259463,7.399308,10.524213,16.064713,...,,,,,,,,,,
2000-01-07 00:00:00+00:00,117.347626,7.556128,2.401248,9.413980,53.869548,6.428442,23.126054,7.376335,10.200443,16.686264,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-20 00:00:00+00:00,14.632114,39.046867,20.261431,153.051118,42.855627,5407.435812,422.709294,16.299440,8.593907,91.352912,...,42.063884,31.501148,74.286287,1059.416041,934.948579,292.864594,92.302177,24.749677,34.915518,75.608147
2021-07-21 00:00:00+00:00,14.533917,38.753338,20.770274,152.800600,42.736254,5407.435812,416.515948,16.183851,8.631195,91.352912,...,41.739328,30.837198,76.321737,1033.449973,948.856571,284.653437,91.612319,25.000518,34.974219,75.911789
2021-07-22 00:00:00+00:00,14.484801,39.340503,20.862796,154.053083,43.810623,5407.435812,416.515948,16.183851,8.631195,91.352912,...,41.804239,31.870036,87.854756,1043.836364,948.856571,284.653437,89.956673,25.167747,35.267726,75.304497
2021-07-23 00:00:00+00:00,14.337488,39.487214,21.186610,154.303565,43.691250,5407.435812,416.515948,16.183851,8.640520,91.352912,...,41.739328,31.870036,85.480214,1049.029649,933.402919,287.390490,89.404745,25.084137,35.267726,74.089912


## Define Custom Dataset

In [16]:
class PERData(DataSet):
    ratio = Column(dtype=float)
    domain = KR_EQUITIES

## Define Pipeline Loaders

In [17]:
per_loader = {PERData.ratio: DataFrameLoader(PERData.ratio, per)}

## Pipeline & Setup

In [18]:
class PER(CustomFactor):
    inputs = [PERData.ratio]
    window_length = 1
    
    def compute(self, today, assets, out, per):
        out[:] = per

## Create Pipeline

In [37]:
def compute_factors():
    per = PER()
    dollar_volume = AverageDollarVolume(window_length=30)
    return Pipeline(columns={
        'longs': per.bottom(N_LONGS),
        'ranking': per.rank(ascending=False),
        },
        screen=dollar_volume.top(VOL_SCREEN)
    )

## Initialize Algorithm

In [38]:
def exec_trades(data, assets, target_percent):
    for asset in assets:
        if data.can_trade(asset) and not get_open_orders(asset):
            order_target_percent(asset, target_percent)

In [39]:
def rebalance(context, data):
    print(context.datetime)
    factor_data = pipeline_output('factor_pipeline')
    record(factor_data=factor_data.ranking)

    assets = factor_data.index
    record(prices=data.current(assets, 'price'))

    longs = assets[factor_data.longs]
    divest = set(context.portfolio.positions.keys()) - set(longs)

    # 보유 주식 처분
    exec_trades(data, assets=divest, target_percent=0)
    # 새롭게 리밸런싱
    exec_trades(data, assets=longs, target_percent=1 / N_LONGS)

In [40]:
def initialize(context):
    """
    Called once at the start of the algorithm.
    """
    context.universe = assets
    context.longs = 0
    set_long_only()
    set_benchmark(symbol('kospi'))
    set_slippage(slippage.FixedSlippage(spread=0.01))
    set_commission(commission.PerShare(cost=0.002, min_trade_cost=0))

    attach_pipeline(pipeline=compute_factors(), name='factor_pipeline')
    schedule_function(rebalance,
                      date_rules.month_end(),
                      time_rules.market_close())       

In [41]:
start = pd.Timestamp('2000-01-01', tz='utc')
end = pd.Timestamp('2021-07-01', tz='utc')

## Start BackTest

In [42]:
results = run_algorithm(start=start,
                        end=end,
                        initialize=initialize,
                        capital_base=1e6,
                        data_frequency='daily',
                        bundle='fnguide',
                        trading_calendar=get_calendar('XKRX'),
                        custom_loader=per_loader
                       )

2000-01-31 06:30:00+00:00
2000-02-29 06:30:00+00:00
2000-03-31 06:30:00+00:00
2000-04-28 06:30:00+00:00
2000-05-31 06:30:00+00:00
2000-06-30 06:30:00+00:00
2000-07-31 06:30:00+00:00
2000-08-31 06:30:00+00:00
2000-09-29 06:30:00+00:00
2000-10-31 06:30:00+00:00
2000-11-30 06:30:00+00:00
2000-12-26 06:30:00+00:00
2001-01-31 06:30:00+00:00
2001-02-28 06:30:00+00:00
2001-03-30 06:30:00+00:00
2001-04-30 06:30:00+00:00
2001-05-31 06:30:00+00:00
2001-06-29 06:30:00+00:00
2001-07-31 06:30:00+00:00
2001-08-31 06:30:00+00:00
2001-09-28 06:30:00+00:00
2001-10-31 06:30:00+00:00
2001-11-30 06:30:00+00:00
2001-12-28 06:30:00+00:00
2002-01-31 06:30:00+00:00
2002-02-28 06:30:00+00:00
2002-03-29 06:30:00+00:00
2002-04-30 06:30:00+00:00
2002-05-31 06:30:00+00:00
2002-06-28 06:30:00+00:00
2002-07-31 06:30:00+00:00
2002-08-30 06:30:00+00:00
2002-09-30 06:30:00+00:00
2002-10-31 06:30:00+00:00
2002-11-29 06:30:00+00:00
2002-12-30 06:30:00+00:00
2003-01-30 06:30:00+00:00
2003-02-28 06:30:00+00:00
2003-03-31 0

## PyFolio Analysis

In [43]:
returns, positions, transactions = pf.utils.extract_rets_pos_txn_from_zipline(results)

In [44]:
benchmark_period_returns = results['benchmark_period_return']
benchmark = np.exp(np.log(benchmark_period_returns + 1.0).diff()) - 1.0

## Tear Sheets

In [None]:
pf.create_full_tear_sheet(returns, 
                          positions=positions, 
                          transactions=transactions,
                          benchmark_rets=benchmark,
                          round_trips=True)

Start date,2000-01-04,2000-01-04
End date,2021-07-01,2021-07-01
Total months,252,252
Unnamed: 0_level_3,Backtest,Unnamed: 2_level_3
Annual return,7.619%,
Cumulative returns,369.718%,
Annual volatility,22.69%,
Sharpe ratio,0.44,
Calmar ratio,0.13,
Stability,0.82,
Max drawdown,-58.112%,
Omega ratio,1.08,
Sortino ratio,0.60,
Skew,-0.60,


Worst drawdown periods,Net drawdown in %,Peak date,Valley date,Recovery date,Duration
0,58.11,2000-08-30,2004-05-20,2006-04-03,1459
1,57.09,2007-07-24,2008-10-27,2010-04-16,714
2,46.0,2016-07-11,2020-03-23,2020-11-24,1142
3,34.37,2011-08-02,2012-12-11,2015-05-13,987
4,29.31,2000-03-27,2000-05-23,2000-08-24,109
