In [177]:
import pandas as pd
import numpy as np
import sys

In [178]:
sys.path.append('/Users/christianjunge/OneDrive/AC297Capstone/AC297_beta/work/Friedman')

In [179]:
import Quandl

In [180]:
from get_beta import get_beta

## Copied in from Haosu's notebook:

In [181]:
#Global Variables
SECTORS = ['Basic Industries', 'Capital Goods', 'Consumer Durables', 'Consumer Non-Durables','Consumer Services', 
           'Energy', 'Finance', 'Health Care', 'Miscellaneous', 'Public Utilities', 'Technology', 'Transportation', 'n/a']
MARKETS = ['NYSE', 'NASDAQ', 'AMEX']

In [182]:
def getStockUniverse(directory = '../../data/', NYSE = True, NASDAQ = True, AMEX = True):
    """
    Reads in the stored stock data, including symbol, Name, Cap, Sector, Industry from NYSE, NASDAQ and AMEX
    
    Returns
    -------
    A pandas frame concatenating all the stock universe from NYSE, NASDAQ and AMEX
    """
    nyse = pd.read_csv(directory+'companylist_nyse.csv')
    nyse['Market'] = 'NYSE'
    nasdaq = pd.read_csv(directory+'companylist_nasdaq.csv')
    nasdaq['Market'] = 'NASDAQ'
    amex = pd.read_csv(directory+'companylist_amex.csv')
    amex['Market'] = 'AMEX'
    
    allstocks = pd.concat([nyse,nasdaq,amex])
    
    # The following parses the market cap into more readable fashion
    # eg. $1.15B = 1150, n/a = -1
    def parseMarketCap(allstocks):
        cap = allstocks['MarketCap'].values
        def computeCap(s):
            if s=='n/a':
                return -1
            elif s[-1:]=='B':
                return float(s[1:-1])*1000
            elif s[-1:]=='M':
                return float(s[1:-1])
        allstocks['MarketCap'] = map(computeCap, cap)
    
    allstocks = allstocks.drop('Unnamed: 8', axis = 1)
    parseMarketCap(allstocks)
    
    allstocks = allstocks.drop_duplicates('Name')    #Drop duplicates, eg. GOOG instead of GOOGL

    return allstocks.sort_values(by = 'MarketCap', ascending = False).reset_index().drop('index', axis=1)

In [183]:
def getSamplePortfolio(stock_universe, n = 10, capThreshold = 2000,
                       sector = None, descendingByCap = False, market = ['NYSE', 'NASDAQ']):
    """
    Get a sample portfolio from the stock universe, preferably a generated one from getStockUniverse.
    With specified parameters.
    
    Parameters
    ----------
    stock_universe : pandas frame with symbols, Cap and Sector
    n : number of samples
    capThreshold : sample should be larger than this threshold, initially set ot 2000
    sector : list of specific sectors to sample from, default to None with no preference
    DescendingByCap: Sample the largest caps, otherwise random sampling.
    market : specific market to sample from, NYSE, NASDAQ and AMEX, default is NYSE and NASDAQ.
    
    Returns
    ------
    A list of symbols
    """
    
    reduced_universe = stock_universe
    
    if capThreshold != None:# and line['MarketCap'].value <capThreshold:
        reduced_universe = reduced_universe[reduced_universe['MarketCap']>=capThreshold]
        
    if sector != None:
        reduced_universe = reduced_universe[map(lambda x: x in sector, reduced_universe['Sector'].values)]
        
    if market != None:
        reduced_universe = reduced_universe[map(lambda x: x in market, reduced_universe['Market'].values)]
    
    if len(reduced_universe)<n:
        print '*******Warning: insufficient candidates, reduce number of samples'
        n = len(reduced_universe)
    
    if descendingByCap:
        # getting rid of redundant quotes, eg. goog and googl
        reduced_universe = reduced_universe.sort_values(by = 'MarketCap', ascending=False)[:n]

    else:
        reduced_universe = reduced_universe.sample(n)
    
    print 'Top 5 rows of selected portfolio:'
    print reduced_universe[['Symbol', 'Name', 'MarketCap', 'Sector', 'industry', 'Market']].head()
    
    return reduced_universe.Symbol.values

In [184]:
def getStocks(symbols, trim_start="2005-01-01", trim_end="2015-12-31"):
    """
    Paramters
    ---------
    symbols: list of string symbols. eg. ['AAPL', 'KCG']
    
    Returns
    -------
    a dictionary of pandas data frames containing all the close prices.
    """
    print symbols
    dfs = {}
    for symbol in symbols:
        flag = False
        #Get data from either Yahoo or Google 
        for source in ['YAHOO/', 'WIKI/', 'GOOG/NYSE_', 'GOOG/NASDAQ_', 'GOOG/AMEX_']:
            try:
                data = Quandl.get(source+symbol,authtoken='c2365v55yoZrWKxbVxwK',
                                  trim_start = trim_start, trim_end = trim_end)
                flag = True
                break
            except:
                pass
        
        if not flag:
            print 'Retrieving ' + symbol + ' unsuccessful. - No symbol. Suggesting:******'
            print Quandl.search(symbol)
            break
            
        else:
            try:
                data = data[['Adjusted Close']]
                data.columns = ['Close']
            except:
                try:
                    data = data[['Close']]
                except:
                    print 'Retrieving ' + symbol + ' unsuccessful. - No close data.'
                    data = None

        dfs[symbol] = data
        
    return dfs

In [185]:
stock_universe = getStockUniverse()

In [186]:
techgiants = getSamplePortfolio(stock_universe, sector = ['Technology'], descendingByCap = True)


Top 5 rows of selected portfolio:
   Symbol                   Name  MarketCap      Sector  \
0    AAPL             Apple Inc.     537330  Technology   
1    GOOG          Alphabet Inc.     485310  Technology   
2    MSFT  Microsoft Corporation     405750  Technology   
4      FB         Facebook, Inc.     307170  Technology   
25   ORCL     Oracle Corporation     155280  Technology   

                                           industry  Market  
0                            Computer Manufacturing  NASDAQ  
1   Computer Software: Programming, Data Processing  NASDAQ  
2           Computer Software: Prepackaged Software  NASDAQ  
4   Computer Software: Programming, Data Processing  NASDAQ  
25          Computer Software: Prepackaged Software    NYSE  


In [187]:
techgiants_p = getStocks(techgiants)

['AAPL' 'GOOG' 'MSFT' 'FB' 'ORCL' 'INTC' 'CSCO' 'IBM' 'TSM' 'DCM']


## It would be better to return the portfolio as a data frame rather than a dictionary of separate data frames.



In [188]:
techgiants_p['AAPL'].values

array([[   4.186585],
       [   4.229582],
       [   4.266626],
       ..., 
       [ 108.153132],
       [ 106.740798],
       [ 104.691918]])

In [189]:
techgiants_p.keys()

['GOOG', 'TSM', 'IBM', 'INTC', 'AAPL', 'CSCO', 'ORCL', 'FB', 'DCM', 'MSFT']

In [190]:
ba = techgiants_p['AAPL']
ba = ba.Close
ba.index

DatetimeIndex(['2005-01-03', '2005-01-04', '2005-01-05', '2005-01-06',
               '2005-01-07', '2005-01-10', '2005-01-11', '2005-01-12',
               '2005-01-13', '2005-01-14',
               ...
               '2015-12-17', '2015-12-18', '2015-12-21', '2015-12-22',
               '2015-12-23', '2015-12-24', '2015-12-28', '2015-12-29',
               '2015-12-30', '2015-12-31'],
              dtype='datetime64[ns]', name=u'Date', length=2769, freq=None)

In [191]:
for key in techgiants_p.keys():
    print len(techgiants_p[key])

2769
2784
2769
2769
2769
2769
2769
911
2784
2769


### This is a problem in this method: the returns are all different sizes.  This will make these hard to work with.

### For the purpose of this development, I'll just use the techgiants that have matching length as a market to hedge against.  We will have to build in ways to deal with missing data.  

In [192]:
market_names=[]
for key in techgiants_p.keys():
    if len(techgiants_p[key])==2769: 
        market_names.append(key)

In [193]:
market_names

['GOOG', 'IBM', 'INTC', 'AAPL', 'CSCO', 'ORCL', 'MSFT']

In [194]:
market = np.zeros((len(techgiants_p['AAPL']),len(market_names)))
for i,key in enumerate(market_names):
    market[:,i] = techgiants_p[key].values.flatten()
    

In [195]:
market

array([[ 101.253921,   79.099823,   16.725715, ...,   16.975329,
          12.453647,   20.887241],
       [  97.15301 ,   78.250155,   16.392216, ...,   16.307562,
          12.128609,   20.965354],
       [  96.658507,   78.088317,   16.232715, ...,   16.316349,
          12.165756,   20.918487],
       ..., 
       [ 776.599976,  138.366653,   35.130791, ...,   27.555284,
          36.887906,   56.14687 ],
       [ 771.      ,  137.9311  ,   34.684721, ...,   27.317139,
          36.768396,   55.908583],
       [ 758.880005,  136.22849 ,   34.149431, ...,   26.949999,
          36.379998,   55.084498]])

### I'll just use a simple unweighted mean of these stocks as an example hedging market.

In [196]:
market = np.mean(market, axis=1)

In [197]:
market = pd.DataFrame(market,index = techgiants_p['AAPL'].Close.index)

In [198]:
portfolio = pd.DataFrame(techgiants_p['AAPL'].values.flatten(), index = techgiants_p['AAPL'].Close.index)

In [218]:
beta = get_beta(portfolio, market)

In [219]:
betas = beta[1]

### Notice that betas is shorter than the market and portfolios by the length of the regression window.  So below, I pass in the time after the 60 day window.  

In [220]:
len(market.ix[60:,:])

2709

In [221]:
betas = pd.DataFrame(betas, index = portfolio.index[60:])

In [222]:
len(betas)

2709

In [223]:
def beta_hedging(betas, market, portfolio):
    """
    Performs beta-hedging, given a portfolio, a market, and beta forecasts.
    
    Parameters:
    -----------
    betas: Pandas Series
        Beta forecasts matching length and frequency of the provided market and portfolio.
    market: Pandas Series
        Asset values of the market that is used to hedge the portfolio.
    portfolio: Pandas Series
        Asset values of the portfolio that is being traded.
    
    Returns:
    --------
    returns: Pandas Series
        Daily returns of the hedged portfolio
    """
    assert len(betas) == len(market) == len(portfolio), "Inputs must be same size"
    
    betasvals = betas.values.flatten()
    marketvals = market.values.flatten()
    portfoliovals = portfolio.values.flatten()
    
    length = len(betas)
    datestamps = betas.index
    returns = np.zeros(length-1)
    
    for day in range(length - 1):
        # Today and tomorrow's prices
        portfolio_buyprice = portfoliovals[day]
        portfolio_sellprice = portfoliovals[day+1]
        market_buyprice = marketvals[day]
        market_sellprice = marketvals[day+1]
        
        # Ratio used to correct for difference in price between market and portfolio
        price_ratio = portfolio_buyprice/market_buyprice
        
        # Daily returns of the portfolio and the hedge
        portfolio_price_change = portfolio_sellprice - portfolio_buyprice
        hedge_price_change = betasvals[i]*price_ratio*(market_sellprice-market_buyprice)
        
        # Daily returns are the difference between the portfolio and the hedge
        returns[day] = portfolio_price_change - hedge_price_change
    
    return pd.DataFrame(returns, datestamps[1:])
    

### Now we can compare the returns from the hedged and unhedged portfolio

In [224]:
hedged_returns = beta_hedging(betas, market.ix[60:,:], portfolio.ix[60:,:])

In [225]:
portfolio_returns = np.diff(portfolio.values.flatten())

### Total returns

In [226]:
np.sum(hedged_returns)

0    109.712116
dtype: float64

In [227]:
np.sum(portfolio_returns)

100.50533299999999

### Volatility of returns

In [228]:
np.std(hedged_returns)

0    1.035375
dtype: float64

In [229]:
np.std(portfolio_returns)

0.96525082598780876