In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import os
from datetime import datetime
from dateutil.relativedelta import relativedelta


In [3]:
def get_sp500_data():
    """
    Fetch S&P 500 tickers and industry data from Wikipedia.

    Returns:
        dict: A dictionary mapping ticker symbols (str) to their industry data.
    """
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    sp500 = pd.read_html(url)[0]

    # Create dictionary with requested fields
    ticker_data_dict = {
        t.replace(".", "-"): {
            "ticker": t.replace(".", "-"),
            "name": n,
            "industry": ind,
            "sub_industry": sub,
        }
        for t, n, ind, sub in zip(
            sp500["Symbol"],
            sp500["Security"],
            sp500["GICS Sector"],
            sp500["GICS Sub-Industry"],
        )
    }

    return ticker_data_dict

In [4]:
def fetch_ticker_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame:
    """
    Download historical stock data from Yahoo Finance.

    Args:
        ticker (str): Stock ticker symbol.
        start_date (str): Start date for the data (YYYY-MM-DD).
        end_date (str): End date for the data (YYYY-MM-DD).

    Returns:
        pd.DataFrame: A DataFrame containing historical stock data for the given ticker.
    """
    data = yf.download(tickers=ticker, start=start_date, end=end_date, interval="1d", threads=True, multi_level_index=False)
    data = data.reset_index()
    # data.columns = [col[0] for col in data.columns]
    data['Ticker'] = ticker

    return data

In [5]:
# get S&P 500 data
data = get_sp500_data()

# check first ticker symbol
first_ticker = list(data.keys())[0]
print(f"Stock: {first_ticker}")
print(data[first_ticker])


# find today and look back dates
today = datetime.today()
look_back = today - relativedelta(months=6)

# convert to strings
today = today.strftime("%Y-%m-%d")
look_back = look_back.strftime("%Y-%m-%d")

# fetch stock data for first ticker
df = fetch_ticker_data(ticker=first_ticker, start_date=look_back, end_date=today)

Stock: MMM
{'ticker': 'MMM', 'name': '3M', 'industry': 'Industrials', 'sub_industry': 'Industrial Conglomerates'}
YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


In [6]:
df.head()

Unnamed: 0,Date,Close,High,Low,Open,Volume,Ticker
0,2024-09-16,133.240646,133.903829,132.468582,132.61705,3069300,MMM
1,2024-09-17,132.211227,133.062464,130.964035,132.725927,3969000,MMM
2,2024-09-18,132.003357,132.913996,130.419627,131.904368,2960900,MMM
3,2024-09-19,132.47847,133.943425,131.963771,133.032771,2691500,MMM
4,2024-09-20,133.399017,133.577179,131.419363,131.656912,11579600,MMM


In [28]:
data

{'MMM': {'ticker': 'MMM',
  'name': '3M',
  'industry': 'Industrials',
  'sub_industry': 'Industrial Conglomerates'},
 'AOS': {'ticker': 'AOS',
  'name': 'A. O. Smith',
  'industry': 'Industrials',
  'sub_industry': 'Building Products'},
 'ABT': {'ticker': 'ABT',
  'name': 'Abbott Laboratories',
  'industry': 'Health Care',
  'sub_industry': 'Health Care Equipment'},
 'ABBV': {'ticker': 'ABBV',
  'name': 'AbbVie',
  'industry': 'Health Care',
  'sub_industry': 'Biotechnology'},
 'ACN': {'ticker': 'ACN',
  'name': 'Accenture',
  'industry': 'Information Technology',
  'sub_industry': 'IT Consulting & Other Services'},
 'ADBE': {'ticker': 'ADBE',
  'name': 'Adobe Inc.',
  'industry': 'Information Technology',
  'sub_industry': 'Application Software'},
 'AMD': {'ticker': 'AMD',
  'name': 'Advanced Micro Devices',
  'industry': 'Information Technology',
  'sub_industry': 'Semiconductors'},
 'AES': {'ticker': 'AES',
  'name': 'AES Corporation',
  'industry': 'Utilities',
  'sub_industry': '

In [None]:
ticker_data_dict_df = pd.DataFrame(data).T.reset_index(drop=True)
ticker_data_dict_df

Unnamed: 0,ticker,name,industry,sub_industry
0,MMM,3M,Industrials,Industrial Conglomerates
1,AOS,A. O. Smith,Industrials,Building Products
2,ABT,Abbott Laboratories,Health Care,Health Care Equipment
3,ABBV,AbbVie,Health Care,Biotechnology
4,ACN,Accenture,Information Technology,IT Consulting & Other Services
...,...,...,...,...
498,XYL,Xylem Inc.,Industrials,Industrial Machinery & Supplies & Components
499,YUM,Yum! Brands,Consumer Discretionary,Restaurants
500,ZBRA,Zebra Technologies,Information Technology,Electronic Equipment & Instruments
501,ZBH,Zimmer Biomet,Health Care,Health Care Equipment


In [49]:
combined_df = pd.merge(ticker_data_dict_df, df, how="right", left_on="ticker", right_on="Ticker")

In [52]:
combined_df.drop(columns=['Ticker']).head()

Unnamed: 0,ticker,name,industry,sub_industry,Date,Close,High,Low,Open,Volume
0,MMM,3M,Industrials,Industrial Conglomerates,2024-09-16,133.240646,133.903829,132.468582,132.61705,3069300
1,MMM,3M,Industrials,Industrial Conglomerates,2024-09-17,132.211227,133.062464,130.964035,132.725927,3969000
2,MMM,3M,Industrials,Industrial Conglomerates,2024-09-18,132.003357,132.913996,130.419627,131.904368,2960900
3,MMM,3M,Industrials,Industrial Conglomerates,2024-09-19,132.47847,133.943425,131.963771,133.032771,2691500
4,MMM,3M,Industrials,Industrial Conglomerates,2024-09-20,133.399017,133.577179,131.419363,131.656912,11579600


In [None]:
def combine_stock_data(ticker: str, ticker_data_dict: dict, ticker_df: pd.DataFrame) -> pd.DataFrame:
    """
    Combine historical ticker data with industry and subindustry.

    Args:
        ticker (str): Stock ticker symbol.
        ticker_data_dict (dict): Dictionary of Ticker data. This includes Industry and sub-industry.
        ticker_df (pd.DataFrame): DataFrame of historical ticker data.
    """
    ticker_data_dict_df = pd.DataFrame(ticker_data_dict, index=[0])

    combined_df = pd.merge(ticker_df, ticker_data_dict_df, how="left", left_on=ticker)

    return combined_df

In [31]:
full_df = combine_stock_data(first_ticker, data, df)

KeyError: 'MMM'