In [1]:
import pandas as pd
import numpy as np 
import itertools
import statsmodels.api as sm
from statsmodels import regression, stats

In [2]:
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.factors import Returns, Latest
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.research import run_pipeline

In [3]:
# Rolling window start/end dates for exposure analysis
start_date = '2016-02-01'
end_date = '2019-02-01'

In [4]:
# Risk-free market daily returns proxy (US treasury bill ETF)
risk_free_returns = get_pricing('BIL', fields='price', start_date=start_date,
                                end_date=end_date).pct_change()[1:]
# Overall market daily returns proxy (S&P 500 ETF)
market_returns = get_pricing('SPY', fields='price', start_date=start_date,
                             end_date=end_date).pct_change()[1:]

In [5]:
# Function to acquire relevant market data from quantopian API

def make_pipeline():
    pipe = Pipeline()
    
    # 36-month historic data
    universe = QTradableStocksUS()

    # Daily returns on equities
    returns = Returns(window_length=2)
   
    # Fundamentals data
    market_cap = Latest([Fundamentals.market_cap])
    book_to_price = ( 1 / Latest([Fundamentals.pb_ratio]) )

    # Rank and group equities based on market cap and book:price data for use in
    # factor mimicking portfolios
    bigmcap = market_cap.rank(mask=universe).top(1000)
    smallmcap = market_cap.rank(mask=universe).bottom(1000)
    
    highbpratio = book_to_price.rank(mask=universe).top(1000)
    lowbpratio = book_to_price.rank(mask=universe).bottom(1000)
    
    # Dataframe for first regression
    universe = QTradableStocksUS() & (bigmcap | smallmcap | highbpratio | lowbpratio)
    
    pipe = Pipeline(
    columns = {
            'market_cap':market_cap,
            'bigmcap':bigmcap,
            'smallmcap':smallmcap,
            'book_to_price':book_to_price,
            'highbpratio':highbpratio,
            'lowbpratio':lowbpratio,
            'returns':returns
        },
        screen=universe
    )
    return pipe

pipe = make_pipeline()

results = run_pipeline(pipe, start_date=start_date, end_date=end_date) 



In [6]:
# Average daily returns of grouped sub-portfolios

returns_bigmcap = results[results.bigmcap]['returns'].groupby(level=0).mean()
returns_smallmcap = results[results.smallmcap]['returns'].groupby(level=0).mean()

returns_highbpratio = results[results.highbpratio]['returns'].groupby(level=0).mean()
returns_lowbpratio = results[results.lowbpratio]['returns'].groupby(level=0).mean()

# Constructed Fama French factors used for regession
EXMKT = market_returns - risk_free_returns 
BMS = returns_bigmcap - returns_smallmcap  
HML = returns_highbpratio - returns_lowbpratio 
    
# Spreading the above factor returns data over all eequities for each day, to
# calculate correlations between individual equities and factor returns
data = results[['returns']].set_index(results.index)
asset_list_sizes = [group[1].size for group in data.groupby(level=0)]

EXMKT_column = [[EXMKT.loc[group[0]]]*size if group[0] in EXMKT.index else [None]*size \
    for group, size in zip(data.groupby(level=0), asset_list_sizes)]
data['EXMKT'] = list(itertools.chain(*EXMKT_column))

BMS_column = [[BMS.loc[group[0]]] * size for group, size \
                in zip(data.groupby(level=0), asset_list_sizes)]
data['BMS'] = list(itertools.chain(*BMS_column))

HML_column = [[HML.loc[group[0]]] * size for group, size \
                in zip(data.groupby(level=0), asset_list_sizes)]
data['HML'] = list(itertools.chain(*HML_column))

In [7]:
# 36-month regression

# Clean data of NaN's at head and tail of EXMKT column
# since returns are calculated by the difference in consecutive daily market open prices
data = data.dropna()

# Appending column of 1's to data to get an intercept estimate
data = sm.add_constant(data)

# Gather list of assets from pipeline
assets = data.index.levels[1].unique()

# Defining variables to put into regression function
Y = [data.xs(asset, level=1)['returns'] for asset in assets]
X = [data.xs(asset, level=1)[['EXMKT', 'BMS', 'HML', 'const']] for asset in assets]

# Regression: estimating betas for hypothesis function...
# returns = beta0*const + beta1*EXMKT + beta2*BMS + beta3*HML + residuals
reg_results  = [regression.linear_model.OLS(y, x).fit().params \
                for y, x in zip(Y, X) if not(x.empty or y.empty)]
indices = [asset for y, x, asset in zip(Y, X, assets) if not(x.empty or y.empty)]

betas = pd.DataFrame(reg_results, index=indices)

# Constant removed to compensate for mis-specifying asset pricing function
del betas['const']

# Format betas dataframe to facilitate multiplication later on
betas['returns'] = np.ones(len(betas))

In [8]:
# Second pipeline to get past year residual returns data
start_date = '2018-02-01'
end_date = '2019-02-01'

# Risk-free market returns proxy (US treasury bill ETF)
risk_free_returns = get_pricing('BIL', fields='price', start_date=start_date,
                                end_date=end_date).pct_change()[1:]

# Overall market returns proxy (S&P 500 ETF)
market_returns = get_pricing('SPY', start_date=start_date, end_date=end_date,
                             fields='price').pct_change()[1:]

def make_pipeline():
    pipe = Pipeline()
    
    universe = QTradableStocksUS()
    lookback = 253
    
    # Get fundamentals and returns data
    market_cap = Latest([Fundamentals.market_cap])
    book_to_price = Latest([Fundamentals.pb_ratio])
    returns = Returns(window_length=2)
    
    market_cap_rank = market_cap.rank(mask=QTradableStocksUS())
    book_to_price_rank = book_to_price.rank(mask=QTradableStocksUS())
    
    # Group equities
    bigmcap = market_cap_rank.top(1000)
    smallmcap = market_cap_rank.bottom(1000)
    
    highbpratio = book_to_price_rank.top(1000)
    lowbpratio = book_to_price_rank.bottom(1000)
    
    universe = QTradableStocksUS() & (bigmcap | smallmcap | highbpratio | lowbpratio )
    
    pipe = Pipeline(
        columns = {
            'market cap':market_cap,
            'market_cap_rank':market_cap_rank,
            'book_to_price':book_to_price,
            'book_to_price_rank':book_to_price_rank,
            'returns':returns,
            'bigmcap':bigmcap,
            'smallmcap':smallmcap,
            'highbpratio':highbpratio,
            'lowbpratio':lowbpratio,
        },
        screen=universe
    )
    return pipe

pipe = make_pipeline()

results = run_pipeline(pipe, start_date, end_date) 

# Average daily returns of constructed factor sub-portfolios

returns_bigmcap = results[results.bigmcap]['returns'].groupby(level=0).mean()
returns_smallmcap = results[results.smallmcap]['returns'].groupby(level=0).mean()

returns_highbpratio = results[results.highbpratio]['returns'].groupby(level=0).mean()
returns_lowbpratio = results[results.lowbpratio]['returns'].groupby(level=0).mean()

# Final factors used for Carhart 4-factor model Long/Short portfolio returns
EXMKT = market_returns - risk_free_returns
BMS = returns_bigmcap - returns_smallmcap
HML = returns_highbpratio - returns_lowbpratio

# Set of functions that spread the factor L/S portfolios data over all assets for each day
data = results[['returns']].set_index(results.index)
asset_list_sizes = [group[1].size for group in data.groupby(level=0)]

EXMKT_column = [[EXMKT.loc[group[0]]]*size if group[0] in EXMKT.index else [None]*size \
    for group, size in zip(data.groupby(level=0), asset_list_sizes)]

data['EXMKT'] = list(itertools.chain(*EXMKT_column))
    
BMS_column = [[BMS.loc[group[0]]] * size for group, size \
             in zip(data.groupby(level=0), asset_list_sizes)]
data['BMS'] = list(itertools.chain(*BMS_column))

HML_column = [[HML.loc[group[0]]] * size for group, size \
             in zip(data.groupby(level=0), asset_list_sizes)]
data['HML'] = list(itertools.chain(*HML_column))



In [9]:
# Momentum factor used is cumulative returns for each equity
momentum_results = results.returns.groupby(level=1).cumsum()
momentum_results = momentum_results[end_date]

In [10]:
# Multiply factor returns dataframe columns by betas
# to get residual returns for each asset
residual_returns_data = data.multiply(betas, axis=0, level=1)

# Finding expected returns by summing residual returns columns
residual_returns_data['expected_returns'] = (residual_returns_data['BMS']
                                            + residual_returns_data['EXMKT']
                                            + residual_returns_data['HML']
                                            )

residual_returns_data['residual_returns'] = (residual_returns_data['returns']\
                                            - residual_returns_data['expected_returns'])

In [11]:
# Cumulative sum of residual returns over year for each equity
total_residual_returns = residual_returns_data['residual_returns'].groupby(level=1).cumsum(axis=0)

# Z-scoring total residual returns to scale the metric for later computation
residual_variance = total_residual_returns[end_date].var()
residual_returns_mean = total_residual_returns[end_date].mean()
res_returns_zscore = ( total_residual_returns[end_date] - residual_returns_mean ) /\
( residual_variance )

In [13]:
# Z-scoring past years momentum results to scale the metric
momentum_results_zscore = ( momentum_results - momentum_results.mean() )\
/ momentum_results.std()

# A basic real number evaluation metric for value. This can be...
# altered depending on what style of portfolio you would like to build
overall_value_factor = (2 * momentum_results_zscore) - res_returns_zscore

OVF_rank = overall_value_factor.sort_values(ascending=False)

print 'Longs:', OVF_rank.head(20)
print 'Shorts:', OVF_rank.tail(20)

Longs: Equity(15591 [SSRM])    3.209253
Equity(40607 [AG])      3.010627
Equity(42366 [PVG])     2.915546
Equity(15789 [DNR])     2.707219
Equity(4664 [SM])       2.686425
Equity(25707 [WLL])     2.597349
Equity(25714 [AUY])     2.581855
Equity(44884 [BTG])     2.551379
Equity(24827 [RCII])    2.447493
Equity(2621 [VAL])      2.438310
Equity(13083 [PAAS])    2.373644
Equity(6392 [RDC])      2.319060
Equity(27437 [WPM])     2.318351
Equity(9189 [KGC])      2.315751
Equity(42385 [REGI])    2.262949
Equity(22091 [ATTU])    2.226594
Equity(9038 [RIG])      2.222293
Equity(5607 [OGE])      2.217170
Equity(7244 [SWN])      2.211500
Equity(34560 [CZZ])     2.201479
dtype: float64
Shorts: Equity(33949 [FOLD])   -4.211837
Equity(42689 [PBYI])   -4.288356
Equity(25972 [DVAX])   -4.339967
Equity(45430 [FPRX])   -4.345682
Equity(45799 [KPTI])   -4.351084
Equity(49000 [BPMC])   -4.395700
Equity(46053 [ITCI])   -4.408193
Equity(48547 [ONCE])   -4.582854
Equity(49470 [CTMX])   -4.622977
Equity(13984 