In [7]:
import Quandl
import pandas as pd
import numpy as np

In [133]:
#Global Variables
SECTORS = ['Basic Industries', 'Capital Goods', 'Consumer Durables', 'Consumer Non-Durables','Consumer Services', 
           'Energy', 'Finance', 'Health Care', 'Miscellaneous', 'Public Utilities', 'Technology', 'Transportation', 'n/a']
MARKETS = ['NYSE', 'NASDAQ', 'AMEX']

In [129]:
def getStockUniverse(directory = '../../data/', NYSE = True, NASDAQ = True, AMEX = True):
    """
    Reads in the stored stock data, including symbol, Name, Cap, Sector, Industry from NYSE, NASDAQ and AMEX
    
    Returns
    -------
    A pandas frame concatenating all the stock universe from NYSE, NASDAQ and AMEX
    """
    nyse = pd.read_csv(directory+'companylist_nyse.csv')
    nyse['Market'] = 'NYSE'
    nasdaq = pd.read_csv(directory+'companylist_nasdaq.csv')
    nasdaq['Market'] = 'NASDAQ'
    amex = pd.read_csv(directory+'companylist_amex.csv')
    amex['Market'] = 'AMEX'
    
    allstocks = pd.concat([nyse,nasdaq,amex])
    
    # The following parses the market cap into more readable fashion
    # eg. $1.15B = 1150, n/a = -1
    def parseMarketCap(allstocks):
        cap = allstocks['MarketCap'].values
        def computeCap(s):
            if s=='n/a':
                return -1
            elif s[-1:]=='B':
                return float(s[1:-1])*1000
            elif s[-1:]=='M':
                return float(s[1:-1])
        allstocks['MarketCap'] = map(computeCap, cap)
    
    allstocks = allstocks.drop('Unnamed: 8', axis = 1)
    parseMarketCap(allstocks)
    
    allstocks = allstocks.drop_duplicates('Name')    #Drop duplicates, eg. GOOG instead of GOOGL

    return allstocks.sort_values(by = 'MarketCap', ascending = False).reset_index().drop('index', axis=1)

In [103]:
def getSamplePortfolio(stock_universe, n = 10, capThreshold = 2000,
                       sector = None, descendingByCap = False, market = ['NYSE', 'NASDAQ']):
    """
    Get a sample portfolio from the stock universe, preferably a generated one from getStockUniverse.
    With specified parameters.
    
    Parameters
    ----------
    stock_universe : pandas frame with symbols, Cap and Sector
    n : number of samples
    capThreshold : sample should be larger than this threshold, initially set ot 2000
    sector : list of specific sectors to sample from, default to None with no preference
    DescendingByCap: Sample the largest caps, otherwise random sampling.
    market : specific market to sample from, NYSE, NASDAQ and AMEX, default is NYSE and NASDAQ.
    
    Returns
    ------
    A list of symbols
    """
    
    reduced_universe = stock_universe
    
    if capThreshold != None:# and line['MarketCap'].value <capThreshold:
        reduced_universe = reduced_universe[reduced_universe['MarketCap']>=capThreshold]
        
    if sector != None:
        reduced_universe = reduced_universe[map(lambda x: x in sector, reduced_universe['Sector'].values)]
        
    if market != None:
        reduced_universe = reduced_universe[map(lambda x: x in market, reduced_universe['Market'].values)]
    
    if len(reduced_universe)<n:
        print '*******Warning: insufficient candidates, reduce number of samples'
        n = len(reduced_universe)
    
    if descendingByCap:
        # getting rid of redundant quotes, eg. goog and googl
        reduced_universe = reduced_universe.sort_values(by = 'MarketCap', ascending=False)[:n]

    else:
        reduced_universe = reduced_universe.sample(n)
    
    print 'Top 5 rows of selected portfolio:'
    print reduced_universe[['Symbol', 'Name', 'MarketCap', 'Sector', 'industry', 'Market']].head()
    
    return reduced_universe.Symbol.values

In [173]:
def getStocks(symbols, trim_start="2005-01-01", trim_end="2015-12-31"):
    """
    Paramters
    ---------
    symbols: list of string symbols. eg. ['AAPL', 'KCG']
    
    Returns
    -------
    a dictionary of pandas data frames containing all the close prices.
    """
    print symbols
    dfs = {}
    for symbol in symbols:
        flag = False
        #Get data from either Yahoo or Google 
        for source in ['YAHOO/', 'WIKI/', 'GOOG/NYSE_', 'GOOG/NASDAQ_', 'GOOG/AMEX_']:
            try:
                data = Quandl.get(source+symbol,authtoken='c2365v55yoZrWKxbVxwK',
                                  trim_start = trim_start, trim_end = trim_end)
                flag = True
                break
            except:
                pass
        
        if not flag:
            print 'Retrieving ' + symbol + ' unsuccessful. - No symbol. Suggesting:******'
            print Quandl.search(symbol)
            break
            
        else:
            try:
                data = data[['Adjusted Close']]
                data.columns = ['Close']
            except:
                try:
                    data = data[['Close']]
                except:
                    print 'Retrieving ' + symbol + ' unsuccessful. - No close data.'
                    data = None

        dfs[symbol] = data
        
    return dfs

---

###Following example

In [130]:
stock_universe = getStockUniverse()

In [131]:
stock_universe

Unnamed: 0,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,Summary Quote,Market
0,AAPL,Apple Inc.,96.91,537330,1980,Technology,Computer Manufacturing,http://www.nasdaq.com/symbol/aapl,NASDAQ
1,GOOG,Alphabet Inc.,705.07,485310,2004,Technology,"Computer Software: Programming, Data Processing",http://www.nasdaq.com/symbol/goog,NASDAQ
2,MSFT,Microsoft Corporation,51.3,405750,1986,Technology,Computer Software: Prepackaged Software,http://www.nasdaq.com/symbol/msft,NASDAQ
3,XOM,Exxon Mobil Corporation,81.75,340320,,Energy,Integrated oil Companies,http://www.nasdaq.com/symbol/xom,NYSE
4,FB,"Facebook, Inc.",107.92,307170,2012,Technology,"Computer Software: Programming, Data Processing",http://www.nasdaq.com/symbol/fb,NASDAQ
5,GE,General Electric Company,29.39,297110,,Energy,Consumer Electronics/Appliances,http://www.nasdaq.com/symbol/ge,NYSE
6,JNJ,Johnson & Johnson,105.78,292690,,Health Care,Major Pharmaceuticals,http://www.nasdaq.com/symbol/jnj,NYSE
7,AMZN,"Amazon.com, Inc.",555.23,261430,1997,Consumer Services,Catalog/Specialty Distribution,http://www.nasdaq.com/symbol/amzn,NASDAQ
8,WFC,Wells Fargo & Company,48.07,245530,,Finance,Major Banks,http://www.nasdaq.com/symbol/wfc,NYSE
9,T,AT&T Inc.,37.13,228390,,Public Utilities,Telecommunications Equipment,http://www.nasdaq.com/symbol/t,NYSE


In [160]:
techgiants = getSamplePortfolio(stock_universe, sector = ['Technology'], descendingByCap = True)

Top 5 rows of selected portfolio:
   Symbol                   Name  MarketCap      Sector  \
0    AAPL             Apple Inc.     537330  Technology   
1    GOOG          Alphabet Inc.     485310  Technology   
2    MSFT  Microsoft Corporation     405750  Technology   
4      FB         Facebook, Inc.     307170  Technology   
25   ORCL     Oracle Corporation     155280  Technology   

                                           industry  Market  
0                            Computer Manufacturing  NASDAQ  
1   Computer Software: Programming, Data Processing  NASDAQ  
2           Computer Software: Prepackaged Software  NASDAQ  
4   Computer Software: Programming, Data Processing  NASDAQ  
25          Computer Software: Prepackaged Software    NYSE  


In [146]:
techgiants

array(['AAPL', 'GOOG', 'MSFT', 'FB', 'ORCL', 'INTC', 'CSCO', 'IBM', 'TSM',
       'DCM'], dtype=object)

In [174]:
techgiants_p = getStocks(techgiants)

['AAPL' 'GOOG' 'MSFT' 'FB' 'ORCL' 'INTC' 'CSCO' 'IBM' 'TSM' 'DCM']


In [176]:
techgiants_p['ORCL']

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2005-01-03,12.453647
2005-01-04,12.128609
2005-01-05,12.165756
2005-01-06,12.277198
2005-01-07,12.379353
2005-01-10,12.249337
2005-01-11,12.258624
2005-01-12,12.518655
2005-01-13,12.518655
2005-01-14,12.657958
