In [11]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

import yahoo_fin.stock_info as si

## Get the list of companies in the S&P 500 

In [7]:
# URL of the S&P 500 Wikipedia page
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

# Define headers to mimic a browser request
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

# Send a GET request to the URL and store the response
response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content of the response using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the table that contains the S&P 500 list using Pandas
    tables = pd.read_html(str(soup))

    # Check if any tables were found
    if len(tables) > 0:
        # The first table in the list should be the S&P 500 table
        sp500_table = tables[0]

        # Extract the company names from the table
        companies = sp500_table['Security'].tolist()

        # Print the list of companies
        print(companies)

    else:
        print("Error: Could not find table")

else:
    print(f"Error: Request failed with status code {response.status_code}")

['3M', 'A. O. Smith', 'Abbott', 'AbbVie', 'Accenture', 'Activision Blizzard', 'ADM', 'Adobe Inc.', 'ADP', 'Advance Auto Parts', 'AES Corporation', 'Aflac', 'Agilent Technologies', 'Air Products and Chemicals', 'Akamai', 'Alaska Air Group', 'Albemarle Corporation', 'Alexandria Real Estate Equities', 'Align Technology', 'Allegion', 'Alliant Energy', 'Allstate', 'Alphabet Inc. (Class A)', 'Alphabet Inc. (Class C)', 'Altria', 'Amazon', 'Amcor', 'AMD', 'Ameren', 'American Airlines Group', 'American Electric Power', 'American Express', 'American International Group', 'American Tower', 'American Water Works', 'Ameriprise Financial', 'AmerisourceBergen', 'Ametek', 'Amgen', 'Amphenol', 'Analog Devices', 'Ansys', 'Aon', 'APA Corporation', 'Apple Inc.', 'Applied Materials', 'Aptiv', 'Arch Capital Group', 'Arista Networks', 'Arthur J. Gallagher & Co.', 'Assurant', 'AT&T', 'Atmos Energy', 'Autodesk', 'AutoZone', 'AvalonBay Communities', 'Avery Dennison', 'Baker Hughes', 'Ball Corporation', 'Bank of

In [9]:
sp500_table.head()

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
0,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1957-03-04,66740,1902
1,AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
2,ABT,Abbott,Health Care,Health Care Equipment,"North Chicago, Illinois",1957-03-04,1800,1888
3,ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ACN,Accenture,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


## Get the Top Institutional Holders for any stock

In [17]:
def get_shareholders_distribution(stock_symbol):
    url = f"https://finance.yahoo.com/quote/{stock_symbol}/holders"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        # Parse the tables using Pandas
        tables = pd.read_html(response.text)

        # Get the institutional holders table
        institutional_holders = tables[1]

        # Extract the top 10 shareholders
        top_shareholders = institutional_holders.head(10)[['Holder', 'Shares',	'Date Reported',	'% Out',	'Value']]

        # Convert the DataFrame to a list of dictionaries
        shareholders = top_shareholders.to_dict('records')

        print(shareholders)
        return shareholders

    else:
        print(f"Error: Request failed with status code {response.status_code}")
        return None

# Example usage
stock_symbol = "AAPL"  # Replace with the stock symbol you want to check
top_shareholders = get_shareholders_distribution(stock_symbol)

if top_shareholders:
    for shareholder in top_shareholders:
        print(shareholder)

[{'Holder': 'Vanguard Group, Inc. (The)', 'Shares': 1278250538, 'Date Reported': 'Dec 30, 2022', '% Out': '8.08%', 'Value': 216893541925}, {'Holder': 'Blackrock Inc.', 'Shares': 1029208322, 'Date Reported': 'Dec 30, 2022', '% Out': '6.50%', 'Value': 174636060538}, {'Holder': 'Berkshire Hathaway, Inc', 'Shares': 895136175, 'Date Reported': 'Dec 30, 2022', '% Out': '5.66%', 'Value': 151886699617}, {'Holder': 'State Street Corporation', 'Shares': 586857405, 'Date Reported': 'Dec 30, 2022', '% Out': '3.71%', 'Value': 99577960182}, {'Holder': 'FMR, LLC', 'Shares': 321162411, 'Date Reported': 'Dec 30, 2022', '% Out': '2.03%', 'Value': 54494835546}, {'Holder': 'Geode Capital Management, LLC', 'Shares': 282749817, 'Date Reported': 'Dec 30, 2022', '% Out': '1.79%', 'Value': 47976986877}, {'Holder': 'Price (T.Rowe) Associates Inc', 'Shares': 226281368, 'Date Reported': 'Dec 30, 2022', '% Out': '1.43%', 'Value': 38395420864}, {'Holder': 'Morgan Stanley', 'Shares': 208655323, 'Date Reported': 'Dec

In [14]:
top_shareholders

[{'name': 'Vanguard Group, Inc. (The)', 'shares': '8.08%%'},
 {'name': 'Blackrock Inc.', 'shares': '6.50%%'},
 {'name': 'Berkshire Hathaway, Inc', 'shares': '5.66%%'},
 {'name': 'State Street Corporation', 'shares': '3.71%%'},
 {'name': 'FMR, LLC', 'shares': '2.03%%'},
 {'name': 'Geode Capital Management, LLC', 'shares': '1.79%%'},
 {'name': 'Price (T.Rowe) Associates Inc', 'shares': '1.43%%'},
 {'name': 'Morgan Stanley', 'shares': '1.32%%'},
 {'name': 'Northern Trust Corporation', 'shares': '1.10%%'},
 {'name': 'Norges Bank Investment Management', 'shares': '1.06%%'}]