<a href="https://colab.research.google.com/github/MManuelG/trading_testing/blob/main/ticker_extractor_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
"""
FILENAME: ticker_extractor.py
AUTHOR: Manu M
CREATED: 2025-07-04
UPDATED: -
DESCRIPTION: script for extracting tickers from the wikipedia article of S&P100 using beautifulsoup webscraping
"""

############# IMPORT
import requests
from bs4 import BeautifulSoup
import yfinance as yf

############# S&P 100 Wikipedia-Site
url = "https://en.wikipedia.org/wiki/S%26P_100"

############# Get & parse HTML
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

############# Find Table with tickers
table = soup.find("table", {"class": "wikitable sortable"})

############# extract
tickers = []
counter=0
for row in table.find_all("tr")[1:]:
    cols = row.find_all("td")
    if cols:
        ticker = cols[0].text.strip()
        # "-" instead of "." => "BRK.B" on yf is "BRK-B"
        ticker = ticker.replace(".", "-")
        tickers.append(ticker)
        #print(ticker)
        counter += 1


############# validate tickers!
valid_counter=0
valid_tickers = []
for ticker in tickers:
    try:
        data = yf.Ticker(ticker).info
        if 'shortName' in data:  # Wenn Ticker gültig
            valid_tickers.append(ticker)
            valid_counter += 1

    except:
        continue

#print(valid_tickers)
print(f"Total Tickers scraped: {counter} (via len(tickerlist): {len(tickers)})")
print(f"Valid Tickers scraped: {valid_counter} (via len(tickerlist): {len(valid_tickers)})")

# if there are any invalid tickers, they will be added to this list
invalid_tickers = list(set(tickers) - set(valid_tickers))

if invalid_tickers: # print only if there are any invalid ones!
  print("Invalid tickers:", invalid_tickers)

# write into .txt file called "tickers.txt"
with open("tickers.txt", "w") as f:
    for ticker in tickers:
        f.write(ticker + "\n")

Total Tickers scraped: 101 (via len(tickerlist): 101)
Valid Tickers scraped: 101 (via len(tickerlist): 101)
