In [1]:
import datetime as dt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas.io.data as web

The pandas.io.data module is moved to a separate package (pandas-datareader) and will be removed from pandas in a future version.
After installing the pandas-datareader package (https://github.com/pydata/pandas-datareader), you can change the import ``from pandas.io import data, wb`` to ``from pandas_datareader import data, wb``.


In [2]:
# step 1: Range Selection
st = dt.datetime(2000,1,1)
en = dt.datetime(2013,1,1)
sp500_tickers_lil = ['AA','AAPL','ABC','ABT','ADBE','ADI','ADM','ADP','ADSK','AEE']
sp500_tickers_joey = ['AA','AAPL','ABC','ABT','ADBE','ADI','ADM','ADP','ADSK','AEE','AEP','AES','AET','AFL','AGN','AIG','AIV','AIZ','AKAM','ALL','AMAT','AMGN','AMP','AMT','AMZN','AN','ANTM','AON','APA','APC','APD','AVB','AVY','AXP','AZO','BA','BAC','BAX','BBBY','BBT','BBY','BCR','BDX','BEN','BHI','BIIB','BK','BLL','BMY','BRCM','BSX','BXP','C','CA','CAG','CAH','CAT','CB','CBG','CBS','CCE','CCL','CELG','CHK','CHRW','CI','CINF','CL','CLX','CMA','CMCSA','CME','CMI','CMS','CNP','CNX','COF','COH','COL','COP','COST','CPB','CSCO','CSX','CTAS','CTL','CTSH','CTXS','CVS','CVX','D','DD','DE','DFS','DGX','DHI','DHR','DIS','DOV','DOW','DRI','DTE','DUK','DVN','EA','EBAY','ECL','ED','EFX','EIX','EL','EMC','EMN','EMR','EOG','EQR','ESRX','ETFC','ETN','ETR','EXC','EXPD','EXPE','F','FCX','FDX','FE','FIS','FISV','FITB','FLR','FOXA','FTR','GAS','GD','GE','GILD','GIS','GLW','GME','GOOGL','GPC','GPS','GS','GT','GWW','HAL','HAR','HAS','HBAN','HD','HES','HIG','HOG','HON','HOT','HPQ','HRB','HST','HSY','HUM','IBM','ICE','IFF','INTC','INTU','IP','IPG','ITW','JCI','JEC','JNJ','JNPR','JPM','JWN','K','KEY','KIM','KLAC','KMB','KO','KR','KSS','L','LB','LEG','LEN','LH','LLL','LLTC','LLY','LM','LMT','LNC','LOW','LUK','LUV','M','MAR','MAS','MAT','MCD','MCHP','MCK','MCO','MDLZ','MDT','MET','MHFI','MKC','MMC','MMM','MO','MON','MRK','MRO','MS','MSFT','MSI','MTB','MU','MUR','MYL','NBL','NEE','NEM','NI','NKE','NOC','NOV','NSC','NTAP','NTRS','NUE','NVDA','NWL','OMC','ORCL','OXY','PAYX','PBI','PCAR','PCG','PCL','PCP','PDCO','PEG','PEP','PFE','PFG','PG','PGR','PH','PHM','PKI','PLD','PNC','PNW','POM','PPG','PPL','PRU','PSA','PX','QCOM','R','RAI','RF','RHI','RL','ROK','RRC','RTN','SBUX','SCHW','SE','SEE','SHW','SLB','SNA','SNDK','SO','SPG','SPLS','SRE','STI','STJ','STT','STZ','SWK','SYK','SYMC','SYY','T','TAP','TDC','TE','TGT','THC','TIF','TJX','TMK','TMO','TROW','TRV','TSN','TSO','TSS','TWX','TXN','TXT','UNH','UNM','UNP','UPS','USB','UTX','VAR','VFC','VIAB','VLO','VMC','VNO','VRSN','VZ','WAT','WBA','WFC','WFM','WHR','WM','WMB','WMT','WU','WY','WYN','XEL','XL','XLNX','XOM','XRX','YHOO','YUM','ZBH','ZION']

#options
jan = True

In [3]:
regr_coefs = []
k = 0

for ticker in sp500_tickers_joey:
    try:
        data = web.get_data_yahoo(ticker, start=st, end=en)
        k += 1
    except:
        print(ticker, "error")
        continue
        
    
    mdata = data.resample("M").pct_change().dropna()
    Y = mdata["Close"].values - np.mean(mdata["Close"].values)
    if jan:
        X = np.zeros([len(mdata["Close"].values),16]) # 1 constant + 12*mo + 1 2y + 1 3y + January
        X[0::12,-1] = 1
    else:
        X = np.zeros([len(mdata["Close"].values),15]) # 1 constant + 12*mo + 1 2y + 1 3y
    
    # constant
    X[:,-2] = 1
    
    X = pd.DataFrame(X)
    
    # monthly lags
    for x in range(12):
        X[x] = pd.Series(mdata["Close"]).shift(1+x).values
    
    # 24m lags
    X[12] = pd.Series(mdata["Close"]).shift(24).values
    
    # 36m lags
    X[13] = pd.Series(mdata["Close"]).shift(36).values
    
    # truncate
    X = X.ix[36:]
    Y = pd.DataFrame(Y).ix[36:]
    
    
    #print("Ticker", ticker, "Regr. Start Date", mdata["Close"].index[36],"ndata",len(Y))
        
    result = sm.OLS(Y,X).fit()
    #print(result.summary())
    regr_coefs.append(result.params)
    
    '''
    fig = plt.figure(figsize=(12,8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(Y, lags=40, ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(Y, lags=40, ax=ax2)
    '''
    
#print("done")
avg_coefs = np.mean(np.array(regr_coefs),axis=0)
labels = ["1m","2m","3m","4m","5m","6m","7m","8m","9m","10m","11m","12m","24m","36m","const","Jan"]
for x in zip(labels, avg_coefs):
    print(x)

('1m', 0.25298567255645865)
('2m', -0.12886536167593118)
('3m', 0.019203996926985011)
('4m', 0.028682508989952348)
('5m', -0.0066884204332014778)
('6m', -0.10153536136157984)
('7m', 0.025687100756060847)
('8m', -0.010705398522536751)
('9m', -0.032641524657629657)
('10m', -0.042552908843307105)
('11m', 0.0042183541030977626)
('12m', -0.044232746890859702)
('24m', -0.030952224152683578)
('36m', -0.014879969341961664)
('const', 0.0028323358461219473)
('Jan', -0.0060362359252851142)


In [4]:
print(list(avg_coefs))

[0.25298567255645865, -0.12886536167593118, 0.019203996926985011, 0.028682508989952348, -0.0066884204332014778, -0.10153536136157984, 0.025687100756060847, -0.010705398522536751, -0.032641524657629657, -0.042552908843307105, 0.0042183541030977626, -0.044232746890859702, -0.030952224152683578, -0.014879969341961664, 0.0028323358461219473, -0.0060362359252851142]


In [16]:
# Out of Sample Data
dataset = sp500_tickers_joey[:]

# take list of stocks
st = dt.datetime(2010,1,1)
en = dt.datetime(2016,1,1)
#ours
#regr_coef = np.array([0.25298567255645865, -0.12886536167593118, 0.019203996926985011, 0.028682508989952348, -0.0066884204332014778, -0.10153536136157984, 0.025687100756060847, -0.010705398522536751, -0.032641524657629657, -0.042552908843307105, 0.0042183541030977626, -0.044232746890859702, -0.030952224152683578, -0.014879969341961664, 0.0028323358461219473, -0.0060362359252851142])
# jegadeesh
regr_coef = np.array([-.0923,-.0073,.0208,.0154,.0148,.0205,.0087,.0065,.0178,.0151,.0224,.0339,.0171,.0187,-.0033,.0135])
#regr_coef = -np.zeros(16)

exp_ret = np.zeros([12*3,len(dataset)])
act_ret = np.zeros([12*3,len(dataset)])

t = 0

for ticker in dataset:
    # calculate returns
    print(ticker)
    
    try:
        data = web.get_data_yahoo(ticker, start=st, end=en)
    except:
        print(ticker, "error")
        continue
    
    mdata = data.resample("M").pct_change().dropna()
    
    adj_close = mdata["Adj Close"]
      
    k = 0
    for i in range(len(adj_close)):
        if adj_close.index[i] > dt.datetime(2013,1,1):
            #print(adj_close.index[i])
            x_row = np.zeros(16) # 1-12 months, 24m, 36m, 1 constant, 1 january
            x_row[0:12] = adj_close[i-12:i].values[::-1]
            x_row[12] = adj_close[i-24]
            x_row[13] = adj_close[i-36]
            x_row[14] = 1
            if adj_close.index[i].month == 1:
                x_row[15] = 1
                
            exp_ret[k,t] = np.dot(x_row, regr_coef)
            act_ret[k,t] = adj_close[i]
            k += 1
    
    t += 1  # count ticker    

exp_ret_df = pd.DataFrame(data=exp_ret, index=pd.date_range(start=dt.datetime(2013,1,1), end=dt.datetime(2016,1,1),freq='M'),columns = dataset)
act_ret_df = pd.DataFrame(data=act_ret, index=pd.date_range(start=dt.datetime(2013,1,1), end=dt.datetime(2016,1,1),freq='M'),columns = dataset)
#print(exp_ret_df)

dates = pd.date_range(start=dt.datetime(2013,1,1), end=dt.datetime(2016,1,1),freq='M')
frets = []
for index, row in exp_ret_df.iterrows():
    sorted_row = row.order(ascending=False)
    pret = 0
    for x in range(30):
        #print(sorted_row[x], act_ret_df.loc[index,sorted_row.index[x]])
        pret += act_ret_df.loc[index,sorted_row.index[x]]
    pret = 1.0+pret/30.0
    frets.append(pret)
    print("date", index)
    #print(sorted_row[:5].index.values)
    print("period return", pret)
    print("-------")
print("avg return", np.prod(frets)-1)

AA
AAPL
ABC
ABT
ADBE
ADI
ADM
ADP
ADSK
AEE
AEP
AES
AET
AFL
AGN
AIG
AIV
AIZ
AKAM
ALL
AMAT
AMGN
AMP
AMT
AMZN
AN
ANTM
AON
APA
APC
APD
AVB
AVY
AXP
AZO
BA
BAC
BAX
BBBY
BBT
BBY
BCR
BDX
BEN
BHI
BIIB
BK
BLL
BMY
BRCM
BSX
BXP
C
CA
CAG
CAH
CAT
CB
CBG
CBS
CCE
CCL
CELG
CHK
CHRW
CI
CINF
CL
CLX
CMA
CMCSA
CME
CMI
CMS
CNP
CNX
COF
COH
COL
COP
COST
CPB
CSCO
CSX
CTAS
CTL
CTSH
CTXS
CVS
CVX
D
DD
DE
DFS
DGX
DHI
DHR
DIS
DOV
DOW
DRI
DTE
DUK
DVN
EA
EBAY
ECL
ED
EFX
EIX
EL
EMC
EMN
EMR
EOG
EQR
ESRX
ETFC
ETN
ETR
EXC
EXPD
EXPE
F
FCX
FDX
FE
FIS
FISV
FITB
FLR
FOXA
FTR
GAS
GD
GE
GILD
GIS
GLW
GME
GOOGL
GPC
GPS
GS
GT
GWW
HAL
HAR
HAS
HBAN
HD
HES
HIG
HOG
HON
HOT
HPQ
HRB
HST
HSY
HUM
IBM
ICE
IFF
INTC
INTU
IP
IPG
ITW
JCI
JEC
JNJ
JNPR
JPM
JWN
K
KEY
KIM
KLAC
KMB
KO
KR
KSS
L
LB
LEG
LEN
LH
LLL
LLTC
LLY
LM
LMT
LNC
LOW
LUK
LUV
M
MAR
MAS
MAT
MCD
MCHP
MCK
MCO
MDLZ
MDT
MET
MHFI
MKC
MMC
MMM
MO
MON
MRK
MRO
MS
MSFT
MSI
MTB
MU
MUR
MYL
NBL
NEE
NEM
NI
NKE
NOC
NOV
NSC
NTAP
NTRS
NUE
NVDA
NWL
OMC
ORCL
OXY
PAYX
PBI
PCAR
PCG
PCL
PCP
PDCO
PEG
PEP




In [None]:
plt.plot(np.cumprod(frets)-1)
plt.title('Cumulative Returns')
plt.xlabel('Months after 1/1/2013')
plt.ylabel('Return %')
plt.show()

In [17]:
#ourfrets = frets[:]
jegfrets = frets[:]
#mktfrets = frets[:]
#jegfretsaug = frets[:]

In [None]:
plt.plot(np.cumprod(ourfrets)-1)
plt.plot(np.cumprod(jegfrets)-1)
plt.plot(np.cumprod(mktfrets)-1)
plt.title('Cumulative Returns')
plt.xlabel('Months after 1/1/2013')
plt.ylabel('Return')
plt.show()

In [None]:
# Autocorrelation

data = web.get_data_yahoo("AA", start=st, end=en)
mdata = data.resample("M").pct_change().dropna()
Y = mdata["Close"].values - np.mean(mdata["Close"].values)

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(Y, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(Y, lags=40, ax=ax2)
plt.show()