## From
https://chrisconlan.com/download-historical-stock-data-google-r-python/

In [8]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Demonstrates how to use Web request to fetch data from URL into
## a Panda Dataframe

In [1]:
import pandas as pd
import io
import requests
import time
 

In [2]:
def google_stocks(symbol, startdate = (1, 1, 2005), enddate = None):
 
    startdate = str(startdate[0]) + '+' + str(startdate[1]) + '+' + str(startdate[2])
 
    if not enddate:
        enddate = time.strftime("%m+%d+%Y")
    else:
        enddate = str(enddate[0]) + '+' + str(enddate[1]) + '+' + str(enddate[2])
 
    stock_url = "http://www.google.com/finance/historical?q=" + symbol + \
                "&startdate=" + startdate + "&enddate=" + enddate + "&output=csv"
 
    raw_response = requests.get(stock_url).content
 
    stock_data = pd.read_csv(io.StringIO(raw_response.decode('utf-8')))
 
    return stock_data
 
 

In [9]:
apple_data = google_stocks('AAPL')
# apple_data.info
apple_data.columns
apple_data.index

Index(['﻿Date', 'Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')

RangeIndex(start=0, stop=250, step=1)

In [4]:
apple_data = google_stocks('AAPL')
print(apple_data)
 

         ﻿Date    Open    High     Low   Close    Volume
0     8-Nov-17  174.66  176.24  174.33  176.24  24409527
1     7-Nov-17  173.91  175.25  173.60  174.81  24361485
2     6-Nov-17  172.36  174.99  171.72  174.25  35026306
3     3-Nov-17  174.00  174.26  171.12  172.50  59398631
4     2-Nov-17  166.60  168.50  165.28  168.11  41393373
5     1-Nov-17  169.87  169.94  165.61  166.89  33637762
6    31-Oct-17  167.90  169.65  166.94  169.04  36046828
7    30-Oct-17  163.89  168.07  163.72  166.72  44700772
8    27-Oct-17  159.29  163.60  158.70  163.05  44454160
9    26-Oct-17  157.23  157.83  156.78  157.41  17000469
10   25-Oct-17  156.91  157.55  155.27  156.41  21207098
11   24-Oct-17  156.29  157.42  156.20  157.10  17757230
12   23-Oct-17  156.89  157.69  155.50  156.17  21984327
13   20-Oct-17  156.61  157.75  155.96  156.25  23974146
14   19-Oct-17  156.75  157.08  155.02  155.98  42584166
15   18-Oct-17  160.42  160.71  159.60  159.76  16374164
16   17-Oct-17  159.78  160.87 

In [10]:
apple_truncated = google_stocks('AAPL', enddate = (1, 1, 2006))
print(apple_truncated)

         ﻿Date    Open    High     Low   Close    Volume
0     8-Nov-17  174.66  176.24  174.33  176.24  24409527
1     7-Nov-17  173.91  175.25  173.60  174.81  24361485
2     6-Nov-17  172.36  174.99  171.72  174.25  35026306
3     3-Nov-17  174.00  174.26  171.12  172.50  59398631
4     2-Nov-17  166.60  168.50  165.28  168.11  41393373
5     1-Nov-17  169.87  169.94  165.61  166.89  33637762
6    31-Oct-17  167.90  169.65  166.94  169.04  36046828
7    30-Oct-17  163.89  168.07  163.72  166.72  44700772
8    27-Oct-17  159.29  163.60  158.70  163.05  44454160
9    26-Oct-17  157.23  157.83  156.78  157.41  17000469
10   25-Oct-17  156.91  157.55  155.27  156.41  21207098
11   24-Oct-17  156.29  157.42  156.20  157.10  17757230
12   23-Oct-17  156.89  157.69  155.50  156.17  21984327
13   20-Oct-17  156.61  157.75  155.96  156.25  23974146
14   19-Oct-17  156.75  157.08  155.02  155.98  42584166
15   18-Oct-17  160.42  160.71  159.60  159.76  16374164
16   17-Oct-17  159.78  160.87 

## Use Quandl

See https://www.quandl.com/tools/python

In [41]:
import quandl

with open("quandl_apkey.txt", "r") as keyfile:
    key = keyfile.read()
    
quandl_key = key.rstrip()
quandl.ApiConfig.api_key = quandl_key # "evWfebtKvTVN_dxvWqau"

In [42]:
mydata = quandl.get("FRED/GDP")

mydata.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1947-01-01,243.08
1947-04-01,246.267
1947-07-01,250.115
1947-10-01,260.309
1948-01-01,266.173


In [51]:
data = quandl.get_table('ZACKS/FC', paginate=True, 
                        ticker=['AAPL', 'MSFT'], 
                        per_end_date={'gte': '2015-01-01'}, 
                        qopts={'columns':['ticker', 'per_end_date']}
                       )

data['ticker'].value_counts()
data.head()

MSFT    14
AAPL    14
Name: ticker, dtype: int64

Unnamed: 0_level_0,ticker,per_end_date
None,Unnamed: 1_level_1,Unnamed: 2_level_1
0,AAPL,2015-09-30
1,AAPL,2016-09-30
2,AAPL,2017-09-30
3,AAPL,2015-03-31
4,AAPL,2015-06-30


## Use Datareader
### n.b., uses Panel, which is deprecated ?

In [11]:
from pandas_datareader import data
import pandas as pd


# Define the instruments to download. We would like to see Apple, Microsoft and the S&P500 index.
tickers = ['AAPL', 'MSFT', 'SPY']

# Define which online source one should use
data_source = 'google'

# We would like all available data from 01/01/2000 until 12/31/2016.
start_date = '2010-01-01'
end_date = '2016-12-31'

# User pandas_reader.data.DataReader to load the desired data. As simple as that.
panel_data = data.DataReader(tickers, data_source, start_date, end_date)

In [17]:
type(panel_data)
panel_data.ix['Close'].head()

pandas.core.panel.Panel

Unnamed: 0_level_0,AAPL,MSFT,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-11-11,108.43,59.02,216.42
2016-11-14,105.71,58.12,216.59
2016-11-15,107.11,58.87,218.28
2016-11-16,109.99,59.65,217.87
2016-11-17,109.95,60.64,218.99


In [19]:
# Getting just the adjusted closing prices. This will return a Pandas DataFrame
# The index in this DataFrame is the major index of the panel_data.
close = panel_data.ix['Close']

# Getting all weekdays between 01/01/2000 and 12/31/2016
all_weekdays = pd.date_range(start=start_date, end=end_date, freq='B')

# How do we align the existing prices in adj_close with our new set of dates?
# All we need to do is reindex close using all_weekdays as the new index
close = close.reindex(all_weekdays)

close.tail(10)

Unnamed: 0,AAPL,MSFT,SPY
2016-12-19,116.64,63.62,225.53
2016-12-20,116.95,63.54,226.4
2016-12-21,117.06,63.54,225.77
2016-12-22,116.29,63.55,225.38
2016-12-23,116.52,63.24,225.71
2016-12-26,,,
2016-12-27,117.26,63.28,226.27
2016-12-28,116.76,62.99,224.4
2016-12-29,116.73,62.9,224.35
2016-12-30,115.82,62.14,223.53


In [21]:
df = panel_data.to_frame()
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume
Date,minor,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-11-11,AAPL,107.12,108.87,106.55,108.43,34143898.0
2016-11-11,MSFT,58.23,59.12,58.01,59.02,38767843.0
2016-11-11,SPY,216.08,216.7,215.32,216.42,100552732.0
2016-11-14,AAPL,107.71,107.81,104.08,105.71,51175504.0
2016-11-14,MSFT,59.02,59.08,57.28,58.12,41328422.0


## Fama French

In [23]:
from pandas_datareader.famafrench import get_available_datasets

import pandas_datareader.data as web

len(get_available_datasets())
ds = web.DataReader("5_Industry_Portfolios", "famafrench")

print(ds['DESCR'])

262

5 Industry Portfolios
---------------------

This file was created by CMPT_IND_RETS using the 201709 CRSP database. It contains value- and equal-weighted returns for 5 industry portfolios. The portfolios are constructed at the end of June. The annual returns are from January to December. Missing data are indicated by -99.99 or -999. Copyright 2017 Kenneth R. French

  0 : Average Value Weighted Returns -- Monthly (93 rows x 5 cols)
  1 : Average Equal Weighted Returns -- Monthly (93 rows x 5 cols)
  2 : Average Value Weighted Returns -- Annual (7 rows x 5 cols)
  3 : Average Equal Weighted Returns -- Annual (7 rows x 5 cols)
  4 : Number of Firms in Portfolios (93 rows x 5 cols)
  5 : Average Firm Size (93 rows x 5 cols)
  6 : Sum of BE / Sum of ME (8 rows x 5 cols)
  7 : Value-Weighted Average of BE/ME (8 rows x 5 cols)


In [26]:
type(ds)
ds.keys()

dict

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 'DESCR'])

In [29]:
type(ds[4])
ds[4].head()

pandas.core.frame.DataFrame

Unnamed: 0_level_0,Cnsmr,Manuf,HiTec,Hlth,Other
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01,622,737,830,467,1232
2010-02,620,734,821,464,1221
2010-03,614,729,818,458,1215
2010-04,614,726,807,458,1203
2010-05,611,723,804,457,1195


## NASDAQ Symbols

In [31]:
from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
symbols = get_nasdaq_symbols()
print(symbols.ix['IBM'])

Nasdaq Traded                                                    True
Security Name       International Business Machines Corporation Co...
Listing Exchange                                                    N
Market Category                                                      
ETF                                                             False
Round Lot Size                                                    100
Test Issue                                                      False
Financial Status                                                  NaN
CQS Symbol                                                        IBM
NASDAQ Symbol                                                     IBM
NextShares                                                      False
Name: IBM, dtype: object
