# Loading Ticker Data and save to data folder

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

yahoo_download takes a ticker and returns a dataframe of:
- Daily close price
- Daily volume
- Daily HML

In [2]:
def yahoo_download(ticker_name, period = '1y'):
    """
    yahoo_download downloads the daily financial data for a specific ticker 
    and stores it in a Pandas dataframe.
    
    To download discrete data (dividends, number of shares as times goes on)
    see documentation at:    https://pypi.org/project/yfinance/
    """
    try:
        ticker = yf.Ticker(ticker_name)
        df = ticker.history(period = period)[["Open", "High", "Low", "Close", "Volume"]]
        df["HML"] = df["High"] - df["Low"]
    except Exception as e:
        print(f"Failed to download data for {ticker}: {e}")
        return None
    
    return df[["Close", "Volume", "HML"]]

In [3]:
SP500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].tolist()
SP500_data = {}

for ticker in SP500_tickers:
    try:
        SP500_data[ticker] = pd.read_pickle(f"./data/stocks/{ticker}_raw.pkl")
    except FileNotFoundError:
        df = yahoo_download(ticker)
        # gets rid of tickers with any missing values
        if df.isnull().values.any() or df.empty:
            print(f"Ticker: {ticker} has null values, and I will skip it")
        else:
            df.to_pickle(f"./data/stocks/{ticker}_raw.pkl")
            SP500_data[ticker] = df

BRK.B: No data found, symbol may be delisted


Ticker: BRK.B has null values, and I will skip it


BF.B: No price data found, symbol may be delisted (period=1y)


Ticker: BF.B has null values, and I will skip it


Some tickers - VLTO, KVUE, and GEHC have only been added to the S&P500 in the last few months, and some of their data is missing. We can manually remove them from our analysis.

The main difference between the GOOG and GOOGL stock ticker symbols is that GOOG shares have no voting rights, while GOOGL shares do. We will keep them in for now...

In [4]:
bad_ticks = ["VLTO", "KVUE", "GEHC"]

for tick in bad_ticks:
    SP500_data.pop(tick, None)

# Saving Some Data

In [5]:
closing_prices = {tick : SP500_data[tick]["Close"] for tick in SP500_data.keys()}
closing_prices = pd.DataFrame.from_dict(closing_prices)
closing_prices.to_pickle(f"./data/stocks/closing_prices.pkl")

volumes = {tick : SP500_data[tick]["Volume"] for tick in SP500_data.keys()}
volumes = pd.DataFrame.from_dict(volumes)
volumes.to_pickle(f"./data/stocks/volumes.pkl")

HMLs = {tick : SP500_data[tick]["HML"] for tick in SP500_data.keys()}
HMLs = pd.DataFrame.from_dict(HMLs)
HMLs.to_pickle(f"./data/stocks/HMLs.pkl")