In [19]:
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

from datetime import datetime
import pandas as pd
import pandas_datareader.data as web

# replaces pyfinance.ols.PandasRollingOLS (no longer maintained)
from statsmodels.regression.rolling import RollingOLS
import statsmodels.api as sm

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [2]:
sns.set_style('whitegrid')
idx = pd.IndexSlice

In [4]:
DATA_STORE = '../data/assets.h5'
START = 2000
END = 2018
with pd.HDFStore(DATA_STORE) as store:
    prices = (store['quandl/wiki/prices']
              .loc[idx[str(START):str(END), :], 'adj_close']
              .unstack('ticker'))
    stocks = store['us_equities/stocks'].loc[:, ['marketcap', 'ipoyear', 'sector']]

In [5]:
prices.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4706 entries, 2000-01-03 to 2018-03-27
Columns: 3199 entries, A to ZUMZ
dtypes: float64(3199)
memory usage: 114.9 MB


In [21]:
stocks

Unnamed: 0_level_0,marketcap,ipoyear,sector
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PIH,4.309000e+07,2014.0,Finance
PIHPP,,,Finance
TURN,7.033000e+07,,Finance
FLWS,8.333900e+08,1999.0,Consumer Services
FCCY,1.781400e+08,,Finance
...,...,...,...
ZOES,1.974000e+08,2014.0,Consumer Services
ZTS,4.165000e+10,2013.0,Health Care
ZTO,1.434000e+10,2016.0,Transportation
ZUO,3.040000e+09,2018.0,Technology


In [24]:
stocks = stocks[~stocks.index.duplicated()]
stocks.index.name = 'ticker'

In [28]:
shared = prices.columns.intersection(stocks.index)

In [30]:
shared

Index(['A', 'AA', 'AAL', 'AAMC', 'AAN', 'AAOI', 'AAON', 'AAP', 'AAPL', 'AAT',
       ...
       'ZEN', 'ZEUS', 'ZGNX', 'ZION', 'ZIOP', 'ZIXI', 'ZNGA', 'ZOES', 'ZTS',
       'ZUMZ'],
      dtype='object', name='ticker', length=2412)

In [38]:
stocks = stocks.loc[shared, :]
stocks.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2412 entries, A to ZUMZ
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   marketcap  2407 non-null   float64
 1   ipoyear    1065 non-null   float64
 2   sector     2372 non-null   object 
dtypes: float64(2), object(1)
memory usage: 139.9+ KB


In [33]:
prices = prices.loc[:, shared]
prices.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4706 entries, 2000-01-03 to 2018-03-27
Columns: 2412 entries, A to ZUMZ
dtypes: float64(2412)
memory usage: 86.6 MB


In [39]:
assert prices.shape[1] == stocks.shape[0]

### Monthly return

In [68]:
monthly_prices = prices.resample('M').last()

## compute returns for multiple historical periods

In [69]:
outlier_cutoff = 0.01
data = pd.DataFrame()
lags = [1, 2, 3, 6, 9, 12]
for lag in lags:
    data[f'return_{lag}m'] = (monthly_prices
                             .pct_change(lag)
                             .stack()
                             .pipe(lambda x: x.clip(lower=x.quantile(outlier_cutoff),
                                                   upper = x.quantile(1-outlier_cutoff)))
                             .add(1)
                             .pow(1/lag)
                             .sub(1)
                             )

In [74]:
data = data.swaplevel().dropna()

In [75]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,return_1m,return_2m,return_3m,return_6m,return_9m,return_12m
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A,2001-01-31,-0.003653,0.022360,0.056104,0.049811,-0.052500,-0.015988
AAN,2001-01-31,0.116071,0.008439,-0.015687,-0.009856,-0.002912,-0.009572
AAON,2001-01-31,0.300170,0.034296,0.028670,-0.006238,0.010159,0.031027
AAPL,2001-01-31,0.430943,0.144685,0.033941,-0.132736,-0.110514,-0.070339
ABAX,2001-01-31,-0.073762,-0.042573,-0.044914,-0.003738,-0.028353,-0.031993
...,...,...,...,...,...,...,...
ZIXI,2018-03-31,0.083951,0.018737,0.000760,-0.017817,-0.028409,-0.007585
ZNGA,2018-03-31,0.069364,0.015912,-0.025652,-0.003559,0.001818,0.021989
ZOES,2018-03-31,0.000000,0.005420,-0.038117,0.027701,0.025046,-0.017982
ZTS,2018-03-31,0.012367,0.032888,0.043517,0.042527,0.030657,0.036608


In [76]:
for lag in [2,3,6,9,12]:
    data[f'momentum_{lag}'] = data[f'return_{lag}m'].sub(data[f'return_1m'])

In [77]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,return_1m,return_2m,return_3m,return_6m,return_9m,return_12m,momentum_2,momentum_3,momentum_6,momentum_9,momentum_12
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
A,2001-01-31,-0.003653,0.022360,0.056104,0.049811,-0.052500,-0.015988,0.026013,0.059757,0.053464,-0.048847,-0.012335
AAN,2001-01-31,0.116071,0.008439,-0.015687,-0.009856,-0.002912,-0.009572,-0.107632,-0.131759,-0.125927,-0.118983,-0.125643
AAON,2001-01-31,0.300170,0.034296,0.028670,-0.006238,0.010159,0.031027,-0.265874,-0.271499,-0.306408,-0.290010,-0.269143
AAPL,2001-01-31,0.430943,0.144685,0.033941,-0.132736,-0.110514,-0.070339,-0.286258,-0.397002,-0.563679,-0.541456,-0.501281
ABAX,2001-01-31,-0.073762,-0.042573,-0.044914,-0.003738,-0.028353,-0.031993,0.031189,0.028848,0.070024,0.045410,0.041769
...,...,...,...,...,...,...,...,...,...,...,...,...
ZIXI,2018-03-31,0.083951,0.018737,0.000760,-0.017817,-0.028409,-0.007585,-0.065214,-0.083190,-0.101767,-0.112359,-0.091536
ZNGA,2018-03-31,0.069364,0.015912,-0.025652,-0.003559,0.001818,0.021989,-0.053452,-0.095017,-0.072923,-0.067546,-0.047375
ZOES,2018-03-31,0.000000,0.005420,-0.038117,0.027701,0.025046,-0.017982,0.005420,-0.038117,0.027701,0.025046,-0.017982
ZTS,2018-03-31,0.012367,0.032888,0.043517,0.042527,0.030657,0.036608,0.020521,0.031150,0.030160,0.018290,0.024241
