# Get list of componentes in index
Available on Wikipedia

In [1]:
import time
from pathlib import Path

import pandas as pd
import yfinance as yf

## Download tickers list by index

### S&P500

In [2]:
SPX_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
SPX_tickers

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
0,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1957-03-04,66740,1902
1,AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
2,ABT,Abbott,Health Care,Health Care Equipment,"North Chicago, Illinois",1957-03-04,1800,1888
3,ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ACN,Accenture,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989
...,...,...,...,...,...,...,...,...
498,YUM,Yum! Brands,Consumer Discretionary,Restaurants,"Louisville, Kentucky",1997-10-06,1041061,1997
499,ZBRA,Zebra Technologies,Information Technology,Electronic Equipment & Instruments,"Lincolnshire, Illinois",2019-12-23,877212,1969
500,ZBH,Zimmer Biomet,Health Care,Health Care Equipment,"Warsaw, Indiana",2001-08-07,1136869,1927
501,ZION,Zions Bancorporation,Financials,Regional Banks,"Salt Lake City, Utah",2001-06-22,109380,1873


In [3]:
SPX_tickers[['GICS Sector']].value_counts()

GICS Sector           
Industrials               75
Financials                72
Health Care               65
Information Technology    65
Consumer Discretionary    52
Consumer Staples          38
Real Estate               31
Utilities                 30
Materials                 29
Communication Services    23
Energy                    23
Name: count, dtype: int64

### Nasdaq-100

In [4]:
NDX_tickers =  pd.read_html('https://en.wikipedia.org/wiki/Nasdaq-100')[4]
NDX_tickers

Unnamed: 0,Company,Ticker,GICS Sector,GICS Sub-Industry
0,Adobe Inc.,ADBE,Information Technology,Application Software
1,ADP,ADP,Industrials,Human Resource & Employment Services
2,Airbnb,ABNB,Consumer Discretionary,"Hotels, Resorts & Cruise Lines"
3,Align Technology,ALGN,Health Care,Health Care Supplies
4,Alphabet Inc. (Class A),GOOGL,Communication Services,Interactive Media & Services
...,...,...,...,...
96,Warner Bros. Discovery,WBD,Communication Services,Broadcasting
97,"Workday, Inc.",WDAY,Information Technology,Application Software
98,Xcel Energy,XEL,Utilities,Multi-Utilities
99,Zoom Video Communications,ZM,Information Technology,Application Software


In [5]:
NDX_tickers[['GICS Sector']].value_counts()

GICS Sector           
Information Technology    36
Consumer Discretionary    14
Health Care               14
Communication Services    11
Industrials               10
Consumer Staples           8
Utilities                  4
Energy                     2
Financials                 1
Real Estate                1
Name: count, dtype: int64

## Get tickers info

In [31]:
# replace '.' by '-' because of yahoo finace
ticker_list = SPX_tickers["Symbol"].sort_values().str.replace('.', '-').unique()
ticker_list

array(['A', 'AAL', 'AAPL', 'ABBV', 'ABNB', 'ABT', 'ACGL', 'ACN', 'ADBE',
       'ADI', 'ADM', 'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AFL', 'AIG',
       'AIZ', 'AJG', 'AKAM', 'ALB', 'ALGN', 'ALK', 'ALL', 'ALLE', 'AMAT',
       'AMCR', 'AMD', 'AME', 'AMGN', 'AMP', 'AMT', 'AMZN', 'ANET', 'ANSS',
       'AON', 'AOS', 'APA', 'APD', 'APH', 'APTV', 'ARE', 'ATO', 'ATVI',
       'AVB', 'AVGO', 'AVY', 'AWK', 'AXON', 'AXP', 'AZO', 'BA', 'BAC',
       'BALL', 'BAX', 'BBWI', 'BBY', 'BDX', 'BEN', 'BF-B', 'BG', 'BIIB',
       'BIO', 'BK', 'BKNG', 'BKR', 'BLK', 'BMY', 'BR', 'BRK-B', 'BRO',
       'BSX', 'BWA', 'BX', 'BXP', 'C', 'CAG', 'CAH', 'CARR', 'CAT', 'CB',
       'CBOE', 'CBRE', 'CCI', 'CCL', 'CDAY', 'CDNS', 'CDW', 'CE', 'CEG',
       'CF', 'CFG', 'CHD', 'CHRW', 'CHTR', 'CI', 'CINF', 'CL', 'CLX',
       'CMA', 'CMCSA', 'CME', 'CMG', 'CMI', 'CMS', 'CNC', 'CNP', 'COF',
       'COO', 'COP', 'COR', 'COST', 'CPB', 'CPRT', 'CPT', 'CRL', 'CRM',
       'CSCO', 'CSGP', 'CSX', 'CTAS', 'CTLT', 'CTRA', 'CTSH

### filter ticket info

In [32]:
required_info = [
    'currentPrice',
    'industry',
    'marketCap',
    'previousClose',
    'sector',
    'shortName',
    'symbol',
]


def get_ticker_info(_ticker):

    ticker = yf.Ticker(_ticker)
    ticker_info = ticker.info
    return {key: ticker_info.get(key) for key in required_info}

get_ticker_info("MSFT")

{'currentPrice': 318.955,
 'industry': 'Software - Infrastructure',
 'marketCap': 2369758953472,
 'previousClose': 313.39,
 'sector': 'Technology',
 'shortName': 'Microsoft Corporation',
 'symbol': 'MSFT'}

### download info for each ticker

In [10]:
all_info_required = []

for t in ticker_list
    all_info_required.append(get_ticker_info(t))
    time.sleep(1)

all_info_required = pd.DataFrame(all_info_required)
all_info_required

Unnamed: 0,currentPrice,industry,marketCap,previousClose,sector,shortName,symbol
0,111.67,Diagnostics & Research,32673189888,110.26,Healthcare,"Agilent Technologies, Inc.",A
1,12.73,Airlines,8317297664,12.29,Industrials,"American Airlines Group, Inc.",AAL
2,173.66,Consumer Electronics,2715035107328,172.40,Technology,Apple Inc.,AAPL
3,147.69,Drug Manufacturers - General,260680237056,147.43,Healthcare,AbbVie Inc.,ABBV
4,127.41,Travel Services,86917578752,127.73,Consumer Cyclical,"Airbnb, Inc.",ABNB
...,...,...,...,...,...,...,...
495,123.53,Restaurants,34614464512,122.99,Consumer Cyclical,"Yum! Brands, Inc.",YUM
496,109.69,Medical Devices,23085905920,110.32,Healthcare,"Zimmer Biomet Holdings, Inc.",ZBH
497,230.94,Communication Equipment,11856090112,230.33,Technology,Zebra Technologies Corporation,ZBRA
498,33.33,Banks - Regional,4937672704,32.92,Financial Services,Zions Bancorporation N.A.,ZION


#### hammer in failed info calls
may not be required if the api works

In [34]:
hammer = [
    get_ticker_info("BRK-B"),
    get_ticker_info("BF-B"),
    {
        'currentPrice': 265.03,
         'industry': 'Farm & Heavy Construction Machinery',
         'marketCap': 136847171584,
         'previousClose': 268.97,
         'sector': 'Industrials',
         'shortName': 'Caterpillar Inc',
         'symbol': 'CAT'
    }
]

hammer = pd.DataFrame(hammer)
hammer

Unnamed: 0,currentPrice,industry,marketCap,previousClose,sector,shortName,symbol
0,343.69,Insurance - Diversified,750409285632,343.04,Financial Services,Berkshire Hathaway Inc. New,BRK-B
1,57.73,Beverages - Wineries & Distilleries,27744575488,57.75,Consumer Defensive,Brown Forman Inc,BF-B
2,265.03,Farm & Heavy Construction Machinery,136847171584,268.97,Industrials,Caterpillar Inc,CAT


## Save info

In [40]:
output = pd.concat([all_info_required, hammer], ignore_index=True)
output

Unnamed: 0,currentPrice,industry,marketCap,previousClose,sector,shortName,symbol
0,111.67,Diagnostics & Research,32673189888,110.26,Healthcare,"Agilent Technologies, Inc.",A
1,12.73,Airlines,8317297664,12.29,Industrials,"American Airlines Group, Inc.",AAL
2,173.66,Consumer Electronics,2715035107328,172.40,Technology,Apple Inc.,AAPL
3,147.69,Drug Manufacturers - General,260680237056,147.43,Healthcare,AbbVie Inc.,ABBV
4,127.41,Travel Services,86917578752,127.73,Consumer Cyclical,"Airbnb, Inc.",ABNB
...,...,...,...,...,...,...,...
498,33.33,Banks - Regional,4937672704,32.92,Financial Services,Zions Bancorporation N.A.,ZION
499,170.99,Drug Manufacturers - Specialty & Generic,78709604352,170.33,Healthcare,Zoetis Inc.,ZTS
500,343.69,Insurance - Diversified,750409285632,343.04,Financial Services,Berkshire Hathaway Inc. New,BRK-B
501,57.73,Beverages - Wineries & Distilleries,27744575488,57.75,Consumer Defensive,Brown Forman Inc,BF-B


In [51]:
data_path = Path().resolve().parent.parent/"data"/"trading"

In [50]:
output.to_parquet(data_path/"sp500_stocks_info_20231004.parquet")