In [1]:
import yfinance as yf
help(yf.Ticker.history)



Help on function history in module yfinance.base:

history(self, period='1mo', interval='1d', start=None, end=None, prepost=False, actions=True, auto_adjust=True, back_adjust=False, proxy=None, rounding=False, tz=None, **kwargs)
    :Parameters:
        period : str
            Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
            Either Use period parameter or use start and end
        interval : str
            Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
            Intraday data cannot extend last 60 days
        start: str
            Download start date string (YYYY-MM-DD) or _datetime.
            Default is 1900-01-01
        end: str
            Download end date string (YYYY-MM-DD) or _datetime.
            Default is now
        prepost : bool
            Include Pre and Post market data in results?
            Default is False
        auto_adjust: bool
            Adjust all OHLC automatically? Default is True
        back_adjust: bool
        

In [2]:
# libraries
from pandas_datareader import data as pdr
import yfinance as yf

def download_raw_stock_data(filepath, tickers, start, end, period = '1d'):
    """
    Download Stock tickers
    :Parameters:
        filepath: str
            path to store the raw data
        tickers : str, list
            List of tickers to download
        period: str
            the frequency at which to gather the data; common options would include ‘1d’ (daily), ‘1mo’ (monthly), ‘1y’ (yearly)
        start: str
            the date to start gathering the data. For example ‘2010–1–1’
        end: str
            the date to end gathering the data. For example ‘2020–1–25’
    
    """
    #define the ticker symbol
    tickerSymbol = tickers

    #get data on this ticker
    tickerData = yf.Ticker(tickerSymbol)

    #get the historical prices for this ticker
    tickerDf = tickerData.history(period=period, start=start, end=end)
    tickerDf.to_csv(filepath)

In [None]:
dict_tickers = {
    'Apple': 'AAPL',
    'Microsoft': 'MSFT',
    'Google': 'GOOG',
    'Bitcoin': 'BTC-USD',
    'Facebook': 'FB',
    'Walmart': 'WMT',
    'Amazon': 'AMZN',
    'CVS': 'CVS',
    'Berkshire': 'BRK-B',
    'ExxonMobil': 'XOM',
    'AtandT': 'T',
    'Costco': 'COST',
    'Walgreens': 'WBA',
    'Kroger': 'KR',
    'JPMorgan': 'JPM',
    'Verizon': 'VZ',
    'FordMotor': 'F',
    'GeneralMotors': 'GM',
    'Dell': 'DELL',
    'BankOfAmerica': 'BAC',
    'Target': 'TGT',
    'GeneralElectric': 'GE',
    'JohnsonandJohnson': 'JNJ',
    'Nvidia': 'NVDA',
    'Intel': 'INTC',
}
period = '1d'
start='2000-1-1'
end='2021-8-31'
for tickerName, ticker in dict_tickers.items():
    tickerName = tickerName
    ticker = ticker
    filepath = f"raw-stock-data/data-2000-2021/{tickerName}.csv"
    download_raw_stock_data(filepath, ticker, start, end, period)


In [4]:
for tickerName in dict_tickers.keys():
    print(tickerName)

Apple
Microsoft
Google
Bitcoin
Facebook
Walmart
Amazon
CVS
Berkshire
ExxonMobil
AtandT
Costco
Walgreens
Kroger
JPMorgan
Verizon
FordMotor
GeneralMotors
Dell
BankOfAmerica
Target
GeneralElectric
JohnsonandJohnson
Nvidia
Intel


In [5]:
import pandas as pd
for tickerName in dict_tickers.keys():
    df = pd.read_csv(f"raw-stock-data/{tickerName}.csv")
    print(f"{tickerName} size: {len(df)}")

Apple size: 10014
Microsoft size: 8688
Google size: 4036
Bitcoin size: 2175
Facebook size: 2084
Walmart size: 12088
Amazon size: 5862
CVS size: 11985
Berkshire size: 6119
ExxonMobil size: 12778
AtandT size: 9270
Costco size: 8607
Walgreens size: 10202
Kroger size: 12778
JPMorgan size: 10202
Verizon size: 9270
FordMotor size: 12168
GeneralMotors size: 2461
Dell size: 1016
BankOfAmerica size: 11947
Target size: 11986
GeneralElectric size: 12779
JohnsonandJohnson size: 12780
Nvidia size: 5438
Intel size: 10201


### Clearly, ExxonMobil and Kroger has more data.

In [6]:
df.set_index('Date', inplace=True)
df1 = df[['Open', 'Close']].copy()
df1.head(5)

Unnamed: 0_level_0,Open,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1980-12-12,0.1006,0.1006
1980-12-15,0.095789,0.095352
1980-12-16,0.08879,0.088353
1980-12-17,0.09054,0.09054
1980-12-18,0.093165,0.093165


In [7]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10014 entries, 1980-12-12 to 2020-08-28
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    10014 non-null  float64
 1   Close   10014 non-null  float64
dtypes: float64(2)
memory usage: 234.7+ KB


In [8]:
df1

Unnamed: 0_level_0,Open,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1980-12-12,0.100600,0.100600
1980-12-15,0.095789,0.095352
1980-12-16,0.088790,0.088353
1980-12-17,0.090540,0.090540
1980-12-18,0.093165,0.093165
...,...,...
2020-08-24,127.875024,125.053177
2020-08-25,123.900603,124.027283
2020-08-26,125.373618,125.713928
2020-08-27,126.329969,124.211098
