# The Old Way
## Import Packages

In [2]:
import datetime as dt
import pandas as pd
import numpy as np
from pandas_datareader import data
import statsmodels.formula.api as sm

## Load Universe
I created a CSV listing all stocks included in SPDR’s sector funds as of January 24th, 2016. It is loaded as a pandas dataframe. In order to make calls to Yahoo, all periods are replaced with hyphens.

We also pull all sector tickers and SPY as a proxy for the market as a whole.

In [3]:
universe = pd.read_csv('inputs/universe.csv', index_col='symbol')
universe.index = map(lambda x: x.replace('.', '-'), universe.index)

## Pull Price History

Price history is pulled from Yahoo and concatenated into a single multi-index dataframe. Stocks were pulled from January 1st 2010 through December 31st, 2015.

In [4]:
def pull_universe(uni, sdate, edate):
    '''Expects a universe csv. Returns a dataframe of prices.
    
    Might not work if an error is generated on the first stock in the index.
    Performance and stability could be improved through list comprehension
    and a single pd.concat operation.'''
    
    # Start by pulling one stock.
    df = data.DataReader(uni.index[0],data_source='yahoo', start=sdate, end=edate)
    df['symbol'] = uni.index[0]
    df['sector'] = uni.loc[uni.index[0],'sector']
    
    #Loop through remaining stocks. Append to prices file.
    for Tic in uni.index[1:]:
        try:
            df_new = data.DataReader(Tic,data_source='yahoo', start=sdate, end=edate)
            df_new['symbol'] = Tic
            df_new['sector'] = uni.loc[Tic,'sector']
            df = pd.concat([df, df_new])
        except IOError:
            pass

    return df

In [5]:
# First line will run on the first 3 securities (plus all sectors) only
# prices = pull_universe(uni=universe.iloc[:14, ], sdate=dt.datetime(2010, 1, 1), edate=dt.datetime(2015, 12, 31))
prices = pull_universe(uni=universe, sdate=dt.datetime(2010, 1, 1), edate=dt.datetime(2015, 12, 31))

In [6]:
prices.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,symbol,sector
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-04,26.450001,26.780001,26.450001,26.67,8465900,22.542255,XLP,
2010-01-05,26.690001,26.719999,26.540001,26.68,11805400,22.550708,XLP,
2010-01-06,26.65,26.690001,26.540001,26.66,9267000,22.533803,XLP,
2010-01-07,26.74,26.74,26.51,26.66,6951500,22.533803,XLP,
2010-01-08,26.6,26.6,26.42,26.57,5437400,22.457732,XLP,


## Adjust and Calculate Returns

In [7]:
def adjust_prices(df):
    '''Adjusts df prices for dividends and splits.'''
    df['AdjFactor'] = df.groupby('symbol')['Adj Close'].shift(1) / df.groupby('symbol')['Close'].shift(1)
    df['Open'] = df['Open'] * df['AdjFactor']
    df['High'] = df['High'] * df['AdjFactor']
    df['Low'] = df['Low'] * df['AdjFactor']
    df['Close'] = df['Close'] * df['AdjFactor']
    new_df = df[['sector', 'symbol', 'Open', 'High', 'Low', 'Close', 'Volume']]
    new_df.columns = ['sector', 'symbol', 'open', 'high', 'low', 'close', 'volume']
    return new_df

def add_returns(df):
    df['lastClose'] = df.groupby('symbol')['close'].shift(1)
    df['ret_cc'] = np.log(df['close'] / df['lastClose'])
    df['ret_oc'] = np.log(df['close'] / df['open'])
    df['ret_co'] = np.log(df['open'] / df['lastClose'])
    df.drop('lastClose', 1, inplace=True)
    return df

prices = adjust_prices(prices)
prices = add_returns(prices)

## Beta Lags
In its simplest form, the capital asset pricing model (CAPM) predicts that a security's expected return over a given period of time is equal to the return of the market times a beta parameter (usually set by linear regression analysis). Modifications of this model include:

1. Allowing for "excess return" (alpha)
1. Utilizing "excess return," i.e., subtracting the risk free rate from all returns.
1. Adding additional factors for the return of a stock's industry.
1. Considering financial indicators of the health of the target company (price book ratios, market cap size, etc).

When valuing small companies, many professional valuators believe that a "beta lag" exists. That is, they believe that last period's market return impacts this perod's security return. This effect is claimed to be more prevalent in smaller, less liquid securities.

If this is true, it should be trivial to develop strategies which take advantage of this effect.

In [8]:
def make_analysis_df(tic, history, stocks):
    sector = stocks.loc[tic, 'sector']
    security_df = history.loc[history['symbol'] == tic, 'ret_cc'].to_frame('security_return').reset_index()
    sector_df = history.loc[history['symbol'] == sector, 'ret_cc'].to_frame('sector_return').reset_index()
    market_df = history.loc[history['symbol'] == 'SPY', 'ret_cc'].to_frame('market_return').reset_index()
    analysis_df = pd.merge(left = security_df, right = sector_df, on = 'Date', how = 'left')
    analysis_df = pd.merge(left = analysis_df, right = market_df, on = 'Date', how = 'left')
    return analysis_df


def simple_beta(stocks, history):
    to_analyze = history.loc[pd.notnull(history['sector']), 'symbol']
    to_analyze = list(set(to_analyze)) # Convert to set to remove duplicates.
    stocks['alpha'] = np.nan
    stocks['beta_sector'] = np.nan
    stocks['beta_market'] = np.nan
    
    for ticker in to_analyze:
        analysis_df = make_analysis_df(ticker, history, stocks)
        temp_model = sm.ols(formula='security_return ~ sector_return + market_return', data=analysis_df).fit()
        stocks.loc[ticker, 'alpha'] = temp_model.params[0]
        stocks.loc[ticker, 'beta_sector'] = temp_model.params[1]
        stocks.loc[ticker, 'beta_market'] = temp_model.params[2]
    
    return stocks

test_beta = simple_beta(universe, prices)

In [9]:
test_beta.to_csv('simple_beta_results.csv')

# Let's do that all again with the package
## Import Packages

In [10]:
import wmcm.functions as wmf

In [11]:
wmf.greg()

Fuck off!
