In [1]:
import warnings
warnings.filterwarnings('ignore')
from collections import defaultdict
from time import time

import numpy as np
import pandas as pd
import pandas_datareader.data as web
from logbook import Logger, StderrHandler, INFO

import matplotlib.pyplot as plt
import seaborn as sns

from zipline import run_algorithm
from zipline.api import (attach_pipeline, pipeline_output, record, schedule_function, time_rules, date_rules,
                          set_slippage, set_commission, order_target, order_target_percent)


from zipline.finance import commission, slippage
from zipline.data import bundles
from zipline.utils.run_algo import load_extensions
from zipline.pipeline import Pipeline, CustomFactor
from zipline.pipeline.data import Column, DataSet
from zipline.pipeline.domain import US_EQUITIES
from zipline.pipeline.filters import StaticAssets
from zipline.pipeline.loaders.frame import DataFrameLoader

import pyfolio as pf
from pyfolio.plotting import plot_rolling_returns, plot_rolling_sharpe
from pyfolio.timeseries import forecast_cone_bootstrap

sns.set_style('whitegrid')
pd.set_option('display.expand_frame_repr', False)
np.random.seed(42)

import zipline
zipline.__version__

'2.4'

In [2]:
load_extensions(default=True, extensions=[], strict=True, environ=None)

log_handler = StderrHandler(format_string='[{record.time:%Y-%m-%d %H:%M:%S.%f}]: ' +
                            '{record.level_name}: {record.func_name}: {record.message}', level=INFO)
log_handler.push_application()
log = Logger('Algorithm')

N_LONGS = 20
N_SHORTS = 20
MIN_POSITIONS = 10
bundle_data = bundles.load('quandl')

Please ensure a ZIPLINE_ROOT environment variable is defined and accessible (or alter the script and manually set the path


NameError: name 'exit' is not defined

In [3]:
def load_predictions(bundle):
    predictions = pd.read_hdf('data/backtest.h5', 'data')[['predicted']].dropna()
    tickers = predictions.index.get_level_values(0).unique().tolist()

    assets = bundle.asset_finder.lookup_symbols(tickers, as_of_date=None)
    predicted_sids = pd.Int64Index([asset.sid for asset in assets])
    ticker_map = dict(zip(tickers, predicted_sids))
    return (predictions.unstack('ticker').rename(columns=ticker_map).predicted.tz_localize('UTC')), assets

predictions, assets = load_predictions(bundle_data)

In [4]:
predictions.tail()

ticker,8,12,45,71,85,98,136,138,145,149,...,216,1685,946,811,2713,2955,2411,2530,1417,1701
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-11-27 00:00:00+00:00,-0.001536,-0.000587,-0.000383,-0.002232,-0.002283,,,-0.000937,0.000599,-0.001685,...,,,,,,,,,,
2017-11-28 00:00:00+00:00,-0.001629,-0.001062,-0.000375,-0.00177,-0.002164,-0.000509,,0.000761,-0.000837,-0.001586,...,,,,,,,,,,
2017-11-29 00:00:00+00:00,-0.002439,-0.00099,-0.002492,-0.002022,-0.001521,-0.00082,,-0.001654,-0.000373,-0.001052,...,,,,,,,,,,
2017-08-07 00:00:00+00:00,,0.00066,,,-0.000532,,,-0.001345,-0.002795,0.000444,...,,,,,,,,,,
2017-04-14 00:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,


In [6]:
class SignalData(DataSet):
    predictions = Column(dtype=float)
    domain = US_EQUITIES

signal_loader = {SignalData.predictions: DataFrameLoader(SignalData.predictions, predictions)}

In [8]:
class MLSignal(CustomFactor):
    inputs = [SignalData.predictions]
    window_length = 1

    def compute(self, today, assets, out, preds):
        out[:] = preds

def compute_signals():
    signals = MLSignal()
#     predictions = SignalData.predictions.latest
    return Pipeline(columns={
        'longs' : signals.top(N_LONGS, mask=signals > 0),
        'shorts': signals.bottom(N_SHORTS, mask=signals < 0)},
            screen=StaticAssets(assets)
    )

In [9]:
def initialize(context):
    context.n_longs = N_LONGS
    context.n_shorts = N_SHORTS
    context.min_positions = MIN_POSITIONS
    context.universe = assets

    set_slippage(slippage.FixedSlippage(spread=0.00))
    set_commission(commission.PerShare(cost=0, min_trade_cost=0))

    schedule_function(rebalance, date_rules.every_day(), time_rules.market_open(hours=1, minutes=30))
    schedule_function(record_vars, date_rules.every_day(), time_rules.market_close())

    pipeline = compute_signals()
    attach_pipeline(pipeline, 'signals')

# call data b4 market open, use to get the current pipeline values
def before_trading_start(context, data):
    output = pipeline_output('signals')
    # print(f'b4 trading start: {output}')
    context.trades = (output['longs'].astype(int).append(output['shorts'].astype(int).mul(-1))
                      .reset_index().drop_duplicates().set_index('index').squeeze())

In [10]:
# execute orders long/short/clear the positions
def rebalance(context, data):
    trades = defaultdict(list)

    for stock, trade in context.trades.items():
        if not trade:
            order_target(stock, 0)
        else:
            trades[trade].append(stock)
    context.longs, context.shorts = len(trades[1]), len(trades[-1])
    if context.longs > context.min_positions and context.shorts > context.min_positions:
        for stock in trades[-1]:
            order_target_percent(stock, -1 / context.shorts)
        for stock in trades[1]:
            order_target_percent(stock, 1 / context.longs)

In [11]:
# Plot variables at the end of each day
def record_vars(context, data):
    record(leverage=context.account.leverage, longs=context.longs, shorts=context.shorts)

In [12]:
dates = predictions.index.get_level_values('date')
start_date = dates.min().replace(tzinfo=None)
end_date = (dates.max() + pd.DateOffset(1)).replace(tzinfo=None)
start_date, end_date, dates[:5]

(Timestamp('2014-12-09 00:00:00'),
 Timestamp('2017-11-30 00:00:00'),
 DatetimeIndex(['2014-12-09 00:00:00+00:00', '2014-12-10 00:00:00+00:00',
                '2014-12-11 00:00:00+00:00', '2014-12-12 00:00:00+00:00',
                '2014-12-15 00:00:00+00:00'],
               dtype='datetime64[ns, UTC]', name='date', freq=None))

In [13]:
start = time()
results = run_algorithm(start=start_date, end=end_date, initialize=initialize,
                       before_trading_start=before_trading_start, capital_base=1e6,
                       data_frequency='daily', bundle='quandl',
                       custom_loader=signal_loader) # need to modify zipline

print('Duration: {:.2f}s'.format(time() - start))

Duration: 11.39s


In [14]:
returns, positions, transactions = pf.utils.extract_rets_pos_txn_from_zipline(results)
benchmark = web.DataReader('SP500', 'fred', '2014', '2018').squeeze()
benchmark = benchmark.pct_change().tz_localize('UTC')
LIVE_DATE = '2017-01-01'

AttributeError: 'DataFrame' object has no attribute 'amount'

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(16, 5))
plot_rolling_returns(returns, factor_returns=benchmark, live_start_date=LIVE_DATE, logy=False, cone_std=2,
                     legend_loc='best', volatility_match=False, cone_function=forecast_cone_bootstrap, ax=axes[0])
plot_rolling_sharpe(returns, ax=axes[1], rolling_window=63)
axes[0].set_title('Cumulative Returns - In and Out-of-Sample')
axes[1].set_title('Rolling Sharpe Ratio (3 Months)')

In [None]:
returns, positions, transactions = pf.utils.extract_rets_pos_txn_from_zipline(results)
benchmark = web.DataReader('SP500', 'fred', '2014', '2018').squeeze()
benchmark = benchmark.pct_change().tz_localize('UTC')
LIVE_DATE = '2017-01-01'

In [None]:
pf.create_full_tear_sheet(returns, positions=positions, transactions=transactions, benchmark_rets=benchmark,
                          live_start_date=LIVE_DATE,  round_trips=True)