In [55]:
import time
import datetime as dt
import yaml
import pandas as pd
import yfinance as yf
pd.options.display.max_columns = None
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from yahoofinancials import YahooFinancials

from alpha_vantage.timeseries import TimeSeries

import requests
from bs4 import BeautifulSoup

# 1. yfinance API
**Intraday data**:  
Only 7 days worth of 1m granularity data are allowed to be fetched per request.
The requested range must be within the last 30 days.  
The requested range must be within the last 60 days.

## Single Stock

In [2]:
# begin with the nearest business day, end exclusive since hard-coded date "2020-12-07" means "2020-12-07 00:00:00"
# start and end could be string or datetime
data_1m = yf.download("MSFT", start="2020-12-04", end="2020-12-07", interval="1m")
data1_5m = yf.download("MSFT", start="2020-11-04", end="2020-12-07", interval="5m")
data_1m
data1_5m

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-04 09:30:00-05:00,214.139999,214.139999,214.108200,214.112793,214.112793,542030
2020-12-04 09:31:00-05:00,214.089996,214.550003,214.020004,214.490005,214.490005,85270
2020-12-04 09:32:00-05:00,214.500000,214.509995,214.119995,214.429993,214.429993,45654
2020-12-04 09:33:00-05:00,214.419998,214.500000,214.160004,214.300003,214.300003,53164
2020-12-04 09:34:00-05:00,214.339996,214.339996,214.330002,214.330002,214.330002,47535
...,...,...,...,...,...,...
2020-12-04 15:55:00-05:00,214.000000,214.050003,213.929993,214.039993,214.039993,145986
2020-12-04 15:56:00-05:00,214.039993,214.169998,214.030197,214.154999,214.154999,163482
2020-12-04 15:57:00-05:00,214.160004,214.300003,214.119995,214.259995,214.259995,232628
2020-12-04 15:58:00-05:00,214.270004,214.300003,214.160004,214.205002,214.205002,187070


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-11-04 09:30:00-05:00,213.679993,215.029999,212.850006,214.996506,214.996506,3343572
2020-11-04 09:35:00-05:00,215.000000,215.130005,212.660004,213.100006,213.100006,1174529
2020-11-04 09:40:00-05:00,213.139999,213.969894,212.418503,213.645004,213.645004,743188
2020-11-04 09:45:00-05:00,213.669998,213.860001,212.759995,212.970001,212.970001,683280
2020-11-04 09:50:00-05:00,213.009995,213.300003,212.440002,213.152603,213.152603,599086
...,...,...,...,...,...,...
2020-12-04 15:35:00-05:00,213.610001,213.610001,213.350006,213.360001,213.360001,202480
2020-12-04 15:40:00-05:00,213.350006,213.755005,213.291901,213.630005,213.630005,284395
2020-12-04 15:45:00-05:00,213.619995,213.634995,213.429993,213.630005,213.630005,206046
2020-12-04 15:50:00-05:00,213.639999,214.199997,213.440002,214.000000,214.000000,669698


# Multiple Stocks

In [3]:
stocks = ["AMZN", "MSFT", "INTC", "GOOG"]
start = dt.datetime.today() - dt.timedelta(30)
end = dt.datetime.today()
close_prices = pd.DataFrame()

for ticker in stocks:
    close_prices[ticker] = yf.download(ticker, start, end)["Adj Close"]
close_prices

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,AMZN,MSFT,INTC,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-11-09,3143.73999,217.819733,45.599998,1763.0
2020-11-10,3035.02002,210.459,45.439999,1740.390015
2020-11-11,3137.389893,215.984543,46.349998,1752.709961
2020-11-12,3110.280029,214.877441,44.950001,1749.839966
2020-11-13,3128.810059,215.944641,45.459999,1777.02002
2020-11-16,3131.060059,216.662766,46.189999,1781.380005
2020-11-17,3135.659912,213.900009,45.529999,1770.150024
2020-11-18,3105.459961,211.080002,45.060001,1746.780029
2020-11-19,3117.02002,212.419998,45.619999,1763.920044
2020-11-20,3099.399902,210.389999,45.389999,1742.189941


In [4]:
# if you want to store entire price info, dict is recommended
ohlcv_data = {}

for ticker in stocks:
    ohlcv_data[ticker] = yf.download(ticker, start, end)
ohlcv_data

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


{'AMZN':                    Open         High          Low        Close    Adj Close  \
 Date                                                                          
 2020-11-09  3231.030029  3289.000000  3112.110107  3143.739990  3143.739990   
 2020-11-10  3095.020020  3114.000000  3019.479980  3035.020020  3035.020020   
 2020-11-11  3061.780029  3139.149902  3050.000000  3137.389893  3137.389893   
 2020-11-12  3159.949951  3175.879883  3086.050049  3110.280029  3110.280029   
 2020-11-13  3122.000000  3141.719971  3085.389893  3128.810059  3128.810059   
 2020-11-16  3093.199951  3142.699951  3072.689941  3131.060059  3131.060059   
 2020-11-17  3183.540039  3189.250000  3135.260010  3135.659912  3135.659912   
 2020-11-18  3134.000000  3140.000000  3105.100098  3105.459961  3105.459961   
 2020-11-19  3105.310059  3125.000000  3080.919922  3117.020020  3117.020020   
 2020-11-20  3117.020020  3132.889893  3098.050049  3099.399902  3099.399902   
 2020-11-23  3116.699951  3139.7

# 2. yahoofinancials: based on web scraping

In [5]:
# an YahooFinancials object
yahoo_financials = YahooFinancials("AAPL")

# "daily" is the smallest unit
# start and end should be string type
historical_stock_prices = yahoo_financials.get_historical_price_data("2020-05-01", "2020-09-15", "daily")
# JSON Data
historical_stock_prices
type(historical_stock_prices)

{'AAPL': {'eventsData': {'dividends': {'2020-05-08': {'amount': 0.82,
     'date': 1588944600,
     'formatted_date': '2020-05-08'},
    '2020-08-07': {'amount': 0.205,
     'date': 1596807000,
     'formatted_date': '2020-08-07'}},
   'splits': {'2020-08-31': {'date': 1598880600,
     'numerator': 4,
     'denominator': 1,
     'splitRatio': '4:1',
     'formatted_date': '2020-08-31'}}},
  'firstTradeDate': {'formatted_date': '1980-12-12', 'date': 345479400},
  'currency': 'USD',
  'instrumentType': 'EQUITY',
  'timeZone': {'gmtOffset': -18000},
  'prices': [{'date': 1588339800,
    'high': 74.75,
    'low': 71.4625015258789,
    'open': 71.5625,
    'close': 72.26750183105469,
    'volume': 60154200,
    'adjclose': 71.23554229736328,
    'formatted_date': '2020-05-01'},
   {'date': 1588599000,
    'high': 73.42250061035156,
    'low': 71.58000183105469,
    'open': 72.2925033569336,
    'close': 73.29000091552734,
    'volume': 33392000,
    'adjclose': 72.24344635009766,
    'forma

dict

In [25]:
stocks = ["AMZN", "MSFT", "INTC", "GOOG", "CSCO", "FB"]
start = (dt.date.today() - dt.timedelta(365)).strftime("%Y-%m-%d")
end = (dt.date.today()).strftime("%Y-%m-%d")
close_prices = pd.DataFrame()
for ticker in stocks:
    yahoo_financials = YahooFinancials(ticker)
    data = yahoo_financials.get_historical_price_data(start, end, "daily")
    ohlv = data[ticker]["prices"]
    temp = pd.DataFrame(ohlv)[["formatted_date", "adjclose"]]
    temp.set_index("formatted_date", inplace=True)
    temp.dropna(inplace=True)
    close_prices[ticker] = temp["adjclose"]
close_prices

Unnamed: 0_level_0,AMZN,MSFT,INTC,GOOG,CSCO,FB
formatted_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-12-09,1749.510010,149.773361,55.158466,1343.560059,42.441444,201.339996
2019-12-10,1739.209961,149.545792,55.217014,1344.660034,42.634796,200.869995
2019-12-11,1748.719971,150.109772,55.685368,1345.020020,42.808815,202.259995
2019-12-12,1760.329956,151.633667,56.153725,1350.270020,44.152630,196.750000
2019-12-13,1760.939941,152.910126,56.387901,1347.829956,43.794926,194.110001
...,...,...,...,...,...,...
2020-11-30,3168.040039,214.070007,48.349998,1760.739990,43.020000,276.970001
2020-12-01,3220.080078,216.210007,49.560001,1798.099976,43.540001,286.549988
2020-12-02,3203.530029,215.369995,49.900002,1827.949951,43.889999,287.519989
2020-12-03,3186.729980,214.240005,50.990002,1826.770020,44.110001,281.850006


# 3. Alpha Vantage API  
**5 calls per min and 500 calls per day**


In [18]:
with open("api_key.yaml", 'r') as stream:
    try:
        api_key = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)
ts = TimeSeries(key=api_key, output_format="pandas")
data, meta_data = ts.get_intraday(symbol='MSFT',interval='1min', outputsize='full')
data.columns = ["open", "high", "low", "close", "volume"]
data
meta_data

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-12-04 20:00:00,214.0299,214.0299,214.0299,214.0299,240.0
2020-12-04 19:50:00,214.0400,214.0400,214.0400,214.0400,525.0
2020-12-04 19:49:00,213.9600,213.9600,213.9600,213.9600,260.0
2020-12-04 19:41:00,214.0300,214.0300,214.0000,214.0000,2184.0
2020-12-04 19:26:00,214.0000,214.0000,214.0000,214.0000,710.0
...,...,...,...,...,...
2020-11-23 04:29:00,211.3000,211.3000,211.3000,211.3000,110.0
2020-11-23 04:23:00,211.3100,211.3100,211.3100,211.3100,376.0
2020-11-23 04:22:00,211.3000,211.3000,211.3000,211.3000,724.0
2020-11-23 04:09:00,211.3500,211.3500,211.3500,211.3500,225.0


{'1. Information': 'Intraday (1min) open, high, low, close prices and volume',
 '2. Symbol': 'MSFT',
 '3. Last Refreshed': '2020-12-04 20:00:00',
 '4. Interval': '1min',
 '5. Output Size': 'Full size',
 '6. Time Zone': 'US/Eastern'}

In [53]:
# have more than 5 stocks in one loop
stocks = ["AMZN", "MSFT", "INTC", "GOOG", "CSCO",
          "FB", "BA", "MMM", "XOM", "NKE", "AAPL"]
ts = TimeSeries(key=api_key, output_format="pandas")
close_prices = pd.DataFrame()
api_call_amt = 0
for ticker in stocks:
    print(ticker)
    api_call_amt += 1
    if api_call_amt == 1:
        start_time = time.time()
    data = ts.get_intraday(symbol=ticker,interval='1min', outputsize='full')[0]
    data.columns = ["open", "high", "low", "close", "volume"]
    close_prices[ticker] = data["close"]
    if api_call_amt == 5:
        api_call_amt = 0
        time.sleep(60 - (time.time() - start_time) % 60)
        
close_prices = close_prices[::-1]
close_prices

AMZN
MSFT
INTC
GOOG
CSCO
FB
BA
MMM
XOM
NKE
AAPL


Unnamed: 0_level_0,AMZN,MSFT,INTC,GOOG,CSCO,FB,BA,MMM,XOM,NKE,AAPL
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-11-23 05:12:00,3114.00,211.16,,,,,204.64,,,,
2020-11-23 05:33:00,3114.00,,,,,,204.39,,,,117.50
2020-11-23 06:10:00,3113.00,,,,,,,,37.79,,
2020-11-23 07:01:00,3110.11,210.75,45.56,,41.06,270.96,203.43,173.65,37.64,133.0234,117.54
2020-11-23 07:02:00,3110.43,210.70,45.56,,41.04,271.30,203.15,,37.64,,117.54
...,...,...,...,...,...,...,...,...,...,...,...
2020-12-04 19:11:00,3159.27,,,,,,,,,,122.00
2020-12-04 19:12:00,3160.00,,,,,,232.55,,,,122.01
2020-12-04 19:31:00,3159.02,,,,,279.25,232.30,,,,
2020-12-04 19:57:00,3159.20,,,,,,232.00,,,,122.13


# 4. Web Scraping

In [59]:
url = "https://ca.finance.yahoo.com/quote/MSFT/financials?p=MSFT"
page = requests.get(url)

# entire html
page_content = page.content  # not recommended

soup = BeautifulSoup(page_content, "html.parser")
tbl = soup.find_all("table", {"class", "calls W(100%) Pos(r) Bd(0) Pt(0) list-options"})
tbl

[<table class="calls W(100%) Pos(r) Bd(0) Pt(0) list-options" data-reactid="53"><thead data-reactid="54"><tr class="C($tertiaryColor)" data-reactid="55"><th class="Va(b) Py(4px) Fw(400) Fz(xs) Ta(start) Pstart(10px) C($tertiaryColor) Cur(p)" data-reactid="56"><span data-reactid="57">Contract Name</span><!-- react-text: 58 --><!-- /react-text --></th><th class="Va(b) Py(4px) Fw(400) Fz(xs) Ta(end) Pstart(7px) C($tertiaryColor) Cur(p)" data-reactid="59"><span data-reactid="60">Last Trade Date</span><!-- react-text: 61 --><!-- /react-text --></th><th class="Va(b) Py(4px) Fw(400) Fz(xs) Ta(c) Pstart(7px) C($tertiaryColor) C($primaryColor) Fw(500)! Cur(p)" data-reactid="62"><span data-reactid="63">Strike</span><svg class="Va(m)! W(14px) H(14px) Fill($primaryColor)! Stk($primaryColor)! Pt(1px) Pos(a) Cur(p)" data-icon="caret-up" data-reactid="64" height="48" style="fill:#000;stroke:#000;stroke-width:0;vertical-align:bottom;" viewbox="0 0 48 48" width="48"><path d="M24.21 16.03L11.48 28.76c-.