### Lesson outline

Here's an overview of what you'll learn to do in this lesson. Documentation links are for reference.
#### Read in multiple stocks:

- Create an empty [pandas.DataFrame](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html) with dates as index: [pandas.date_range](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.date_range.html)
- Drop missing date rows: [pandas.DataFrame.dropna](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.dropna.html)
- Incrementally join data for each stock: [pandas.DataFrame.join](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.join.html)

#### Manipulate stock data:

- [Index and select data](http://pandas.pydata.org/pandas-docs/stable/indexing.html) by row (dates) and column (symbols)
- Plot multiple stocks at once (still using [pandas.DataFrame.plot](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.plot.html))
- Carry out arithmetic operations across stocks


In [40]:
"""Build a dataframe in pandas and filter it by a range of date"""
import pandas as pd

def test_run():
    #Define date range
    start_date = '2010-01-22'
    end_date = '2010-01-26'
    dates = pd.date_range(start_date,end_date)
    
    #Create an empty dataframe
    df1 = pd.DataFrame(index=dates)
    
    
    #Read SPY data into temporary dataframe
    dfSPY = pd.read_csv('data/SPY.csv', index_col='Date', parse_dates=True, 
                        usecols=['Date', 'Adj Close'], na_values=['nan'])
    
    #Join the two dataframes using DataFrame.join()
    #df1 = df1.join(dfSPY)

    # Drop NaN Values
    #df1 = df1.dropna()
    #print df1

    #Join the two dataframes using DataFrame.join(), with how='inner
    df1 = df1.join(dfSPY, how='inner')
    print df1
    
if __name__ == "__main__":
    test_run()    

            Adj Close
2010-01-26  94.730543
2010-01-25  95.129189
2010-01-22  94.643882


In [48]:
"""Build a dataframe in pandas with 3 symbols and filter it by a range of date"""
import pandas as pd

def test_run():
    #Define date range
    start_date = '2010-01-22'
    end_date = '2010-01-26'
    dates = pd.date_range(start_date,end_date)
    
    #Create an empty dataframe
    df1 = pd.DataFrame(index=dates)
    
    
    #Read SPY data into temporary dataframe
    dfSPY = pd.read_csv('data/SPY.csv', index_col='Date', parse_dates=True, 
                        usecols=['Date', 'Adj Close'], na_values=['nan'])

    #Join the two dataframes using DataFrame.join(), with how='inner
    df1 = df1.join(dfSPY, how='inner')
    
    #Read in more stocks
    symbols = ['GOOG', 'IBM', 'GLD']
    for symbol in symbols:
        df_temp = pd.read_csv('data/{}.csv'.format(symbol), index_col='Date',
                             parse_dates=True,usecols=['Date', 'Adj Close'], 
                              na_values=['nan'])
        #Rename to prevent clash
        df_temp = df_temp.rename(columns={'Adj Close': symbol})
        df1 = df1.join(df_temp)
    
    print df1
    
if __name__ == "__main__":
    test_run() 

            Adj Close        GOOG         IBM         GLD
2010-01-26  94.730543  270.939526  105.769566  107.559998
2010-01-25  95.129189  269.730740  106.080779  107.480003
2010-01-22  94.643882  274.730736  105.559289  107.169998


---
## Quiz: Utility functions for reading data

In [89]:
"""Utility functions"""

import os
import pandas as pd

def symbol_to_path(symbol, base_dir="data"):
    """Return CSV file path given ticker symbol."""
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))


def get_data(symbols, dates):
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    if 'SPY' not in symbols:  # add SPY for reference, if absent
        symbols.insert(0, 'SPY')

    for symbol in symbols:
        # TODO: Read and join data for each symbol
        df_temp = pd.read_csv(symbol_to_path(symbol).format(symbol), index_col='Date',
                             parse_dates=True,usecols=['Date', 'Adj Close'], 
                              na_values=['nan'])
        #Rename to prevent clash
        df_temp = df_temp.rename(columns={'Adj Close': symbol})
        df = df.join(df_temp)
        if symbol == 'SPY': #Drop dates SPY did not trade
            df = df.dropna(subset=['SPY'])   

    return df


def test_run():
    # Define a date range
    dates = pd.date_range('2010-01-22', '2010-01-26')

    # Choose stock symbols to read
    symbols = ['GOOG', 'IBM', 'GLD']
    
    # Get stock data
    df = get_data(symbols, dates)
    print df


if __name__ == "__main__":
    test_run()

                  SPY        GOOG         IBM         GLD
2010-01-22  94.643882  274.730736  105.559289  107.169998
2010-01-25  95.129189  269.730740  106.080779  107.480003
2010-01-26  94.730543  270.939526  105.769566  107.559998


---