# Equally Weight S&P 500 Index Fund Webscraping

# Extracting S&P 500 tickers

In [1]:
# Importing relevant libraries
from bs4 import BeautifulSoup
import pandas as pd
from pandas_datareader import data as pdr
import requests
import yfinance as yf

In [2]:
html = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = BeautifulSoup(html.text, 'lxml')
table = soup.find('table', {'id': 'constituents'})

In [3]:
table.prettify()

'<table class="wikitable sortable" id="constituents">\n <tbody>\n  <tr>\n   <th>\n    <a href="/wiki/Ticker_symbol" title="Ticker symbol">\n     Symbol\n    </a>\n   </th>\n   <th>\n    Security\n   </th>\n   <th>\n    <a href="/wiki/SEC_filing" title="SEC filing">\n     SEC filings\n    </a>\n   </th>\n   <th>\n    <a href="/wiki/Global_Industry_Classification_Standard" title="Global Industry Classification Standard">\n     GICS\n    </a>\n    Sector\n   </th>\n   <th>\n    GICS Sub-Industry\n   </th>\n   <th>\n    Headquarters Location\n   </th>\n   <th>\n    Date first added\n   </th>\n   <th>\n    <a href="/wiki/Central_Index_Key" title="Central Index Key">\n     CIK\n    </a>\n   </th>\n   <th>\n    Founded\n   </th>\n  </tr>\n  <tr>\n   <td>\n    <a class="external text" href="https://www.nyse.com/quote/XNYS:MMM" rel="nofollow">\n     MMM\n    </a>\n   </td>\n   <td>\n    <a href="/wiki/3M" title="3M">\n     3M\n    </a>\n   </td>\n   <td>\n    <a class="external text" href="http

In [4]:
tickers = []
for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        ticker = ticker[:-1]
        tickers.append(ticker)

In [5]:
ticker_data = pd.Series(data=tickers, name='Tickers')

In [6]:
print(ticker_data)

0       MMM
1       AOS
2       ABT
3      ABBV
4      ABMD
       ... 
498     YUM
499    ZBRA
500     ZBH
501    ZION
502     ZTS
Name: Tickers, Length: 503, dtype: object


In [7]:
len(ticker_data)

503

In [8]:
ticker_data

0       MMM
1       AOS
2       ABT
3      ABBV
4      ABMD
       ... 
498     YUM
499    ZBRA
500     ZBH
501    ZION
502     ZTS
Name: Tickers, Length: 503, dtype: object

In [9]:
pd.set_option('display.max_rows', None)

# Extracting financial data for each constituent

## Building out a base case before iteration

In [18]:
# Importing finance data from YFinance API
yf.pdr_override()
# download dataframe
t= ticker_data[0]
test = yf.Ticker(t)

In [19]:
type(test.info)

dict

In [20]:
test.info

{'zip': '55144-1000',
 'sector': 'Industrials',
 'fullTimeEmployees': 95000,
 'longBusinessSummary': '3M Company operates as a diversified technology company worldwide. It operates through four segments: Safety and Industrial; Transportation and Electronics; Health Care; and Consumer. The Safety and Industrial segment offers industrial abrasives and finishing for metalworking applications; autobody repair solutions; closure systems for personal hygiene products, masking, and packaging materials; electrical products and materials for construction and maintenance, power distribution, and electrical original equipment manufacturers; structural adhesives and tapes; respiratory, hearing, eye, and fall protection solutions; and natural and color-coated mineral granules for shingles. The Transportation and Electronics segment provides ceramic solutions; attachment tapes, films, sound, and temperature management for transportation vehicles; premium large format graphic films for advertising an

In [21]:
full_info = pd.DataFrame.from_dict(test.info, orient='index')

In [22]:
full_info

Unnamed: 0,0
zip,55144-1000
sector,Industrials
fullTimeEmployees,95000
longBusinessSummary,3M Company operates as a diversified technolog...
city,Saint Paul
phone,651 733 1110
state,MN
country,United States
companyOfficers,[]
website,https://www.3m.com


In [23]:
current_price = full_info.loc['currentPrice'] #Current Stock Price
current_price[0]

147.41

In [24]:
mark_cap = full_info.loc['marketCap']#Current Market Cap (Equity Value calculation)
mark_cap[0]

83965321216

In [30]:
my_columns = ['Ticker', 'Price','Market Capitalization', 'Number Of Shares to Buy']
test_dataframe = pd.DataFrame(columns = my_columns)

test_dataframe = test_dataframe.append(
                                        pd.Series([t, 
                                                   current_price[0], 
                                                   mark_cap[0], 
                                                   'N/A'], 
                                                  index = my_columns), 
                                                  ignore_index=True
)

In [31]:
test_dataframe

Unnamed: 0,Ticker,Price,Market Capitalization,Number Of Shares to Buy
0,MMM,147.41,83965321216,


## Iterating through all constituents

In [32]:
my_columns = ['Ticker', 'Price','Market Capitalization', 'Number Of Shares to Buy']
final_dataframe = pd.DataFrame(columns = my_columns)

In [33]:
final_dataframe

Unnamed: 0,Ticker,Price,Market Capitalization,Number Of Shares to Buy


In [35]:
my_columns = ['Ticker', 'Price','Market Capitalization', 'Number Of Shares to Buy']
final_dataframe = pd.DataFrame(columns = my_columns)

for ticker in ticker_data:
        data = yf.Ticker(ticker)
        stock_info = pd.DataFrame.from_dict(data.info, orient='index')

        # Extracts the final closing price of the stock
        if 'currentPrice' not in stock_info.index:
            current_price = ['0']
        else:
            current_price = stock_info.loc['currentPrice']
            
        #Extracts the market capitalization of the stock
        if 'marketCap' not in stock_info.index:
            market_cap = ['0']
        else:
            market_cap = stock_info.loc['marketCap']
        
        final_dataframe = final_dataframe.append(
                                        pd.Series([ticker, 
                                                   current_price[0], 
                                                   market_cap[0], 
                                                   'N/A'], 
                                                  index = my_columns), 
                                                  ignore_index=True
        )
        
        final_dataframe

In [36]:
final_dataframe

Unnamed: 0,Ticker,Price,Market Capitalization,Number Of Shares to Buy
0,MMM,147.41,83965321216.0,
1,AOS,60.6,9359548416.0,
2,ABT,109.25,191320784896.0,
3,ABBV,138.04,244068515840.0,
4,ABMD,291.5,13299920896.0,
5,ACN,309.35,195835871232.0,
6,ATVI,80.49,62967889920.0,
7,ADM,82.76,46522212352.0,
8,ADBE,433.43,202845241344.0,
9,ADP,247.2,102715801600.0,


In [37]:
final_dataframe.to_csv('s&p500_data.csv')