# S&P 500 index fund equal weight distribution

In this notebook the goal is to buy up all the stocks of the S&P 500 in equal amounts. The real challenge is to retrieve data from an API, use dataframes to represent it and to store it into an Excel file. All of this have been achived in the code below and can be seen in the attached Excel file!

To begin with we import all the needed libraries.

In [2]:
import numpy as np 
import pandas as pd
import requests
import xlsxwriter
import math

Importing all the ticker symbols for all the companies in the S&P 500.

In [9]:
snpdata = pd.read_csv('sp500_companies.csv')
ticker_list = sorted(snpdata['Symbol'].tolist())
print(ticker_list)
print(len(ticker_list))


['A', 'AAPL', 'ABBV', 'ABNB', 'ABT', 'ACGL', 'ACN', 'ADBE', 'ADI', 'ADM', 'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AFL', 'AIG', 'AIZ', 'AJG', 'AKAM', 'ALB', 'ALGN', 'ALL', 'ALLE', 'AMAT', 'AMCR', 'AMD', 'AME', 'AMGN', 'AMP', 'AMT', 'AMTM', 'AMZN', 'ANET', 'ANSS', 'AON', 'AOS', 'APA', 'APD', 'APH', 'APTV', 'ARE', 'ATO', 'AVB', 'AVGO', 'AVY', 'AWK', 'AXON', 'AXP', 'AZO', 'BA', 'BAC', 'BALL', 'BAX', 'BBY', 'BDX', 'BEN', 'BF-B', 'BG', 'BIIB', 'BK', 'BKNG', 'BKR', 'BLDR', 'BLK', 'BMY', 'BR', 'BRK-B', 'BRO', 'BSX', 'BWA', 'BX', 'BXP', 'C', 'CAG', 'CAH', 'CARR', 'CAT', 'CB', 'CBOE', 'CBRE', 'CCI', 'CCL', 'CDNS', 'CDW', 'CE', 'CEG', 'CF', 'CFG', 'CHD', 'CHRW', 'CHTR', 'CI', 'CINF', 'CL', 'CLX', 'CMCSA', 'CME', 'CMG', 'CMI', 'CMS', 'CNC', 'CNP', 'COF', 'COO', 'COP', 'COR', 'COST', 'CPAY', 'CPB', 'CPRT', 'CPT', 'CRL', 'CRM', 'CRWD', 'CSCO', 'CSGP', 'CSX', 'CTAS', 'CTRA', 'CTSH', 'CTVA', 'CVS', 'CVX', 'CZR', 'D', 'DAL', 'DAY', 'DD', 'DE', 'DECK', 'DELL', 'DFS', 'DG', 'DGX', 'DHI', 'DHR', 'DIS', 'DLR'

Retrieving the API key and set up a test case by getting data for the AAPL stock

In [6]:

symbol = 'AAPL'
from secret import FMP_API_1
base_url = f'https://financialmodelingprep.com/api/v3/quote/'
ext =  f'{base_url}{symbol}?apikey={FMP_API_1}'


response = requests.get(ext)
data = response.json()

if data: 
    quote = data[0]  
    for key, value in quote.items():
        print(f"{key}: {value}")
else:
    print("No data found for the symbol.")

data = data[0]


No data found for the symbol.


IndexError: list index out of range

## Iterator 

The function fetch_stock_data() this works by going through each stock ticker and getting back its data and saving them to the data list.

The function fetch_stock_data_batch() is similar to the previous function but makes use of the batch function to retrieve data in order to reduce the amount of API calls.

In [63]:

def fetch_stock_data(symbols,api):
    stock_data = []

    for symbol in symbols:
        ext = f'{base_url}{symbol}?apikey={api}'
        
        response = requests.get(ext)
        data = response.json()

        if data:
            stock_data.append(data[0])
        else:
            print(f"No data found for symbol: {symbol}")
    
    return stock_data

In [None]:
# all_data = []
# selected_symbols1 = ticker_list[:200]
# selected_symbols2 = ticker_list[200:400]
# selected_symbols3 = ticker_list[400:506]

# sec1 = fetch_stock_data(selected_symbols1,FMP_API_1)
# sec2 = fetch_stock_data(selected_symbols2,FMP_API_1)
# sec3 = fetch_stock_data(selected_symbols3,FMP_API_1)
# # print(type(sec1))
# # print(sec1)

# for sec in [sec1,sec2,sec3]:
#     for stock in sec:
#         all_data.append(stock)

# print(all_data)

# for stock in all_data:
#     print("\nStock Data:")
#     for key, value in stock.items():
#         print(f"{key}: {value}")

In [64]:
def fetch_stock_data_batch(symbols,api):
    tickers = ','.join(symbols)
    ext = f'{base_url}{tickers}?apikey={api}'
    
    response = requests.get(ext)
    data = response.json()
    
    if data:
        return data
    else:
        print(f"No data found for symbols: {symbols}")
        return []


In [None]:
all_data = []
selected_symbols1 = ticker_list[:200]
selected_symbols2 = ticker_list[200:400]
selected_symbols3 = ticker_list[400:503]

sec1 = fetch_stock_data_batch(selected_symbols1,FMP_API_1)
sec2 = fetch_stock_data_batch(selected_symbols2,FMP_API_1)
sec3 = fetch_stock_data_batch(selected_symbols3,FMP_API_1)

for sec in [sec1,sec2,sec3]:
    for stock in sec:
        all_data.append(stock)

print(all_data)

for stock in all_data:
    print("\nStock Data:")
    for key, value in stock.items():
        print(f"{key}: {value}")

### Saving data 
The functions below are meant to save and retrieve data in order to skip calling the APIs over and over. 

In [5]:
import json
def save_to_json(data, filename):
    with open(filename, 'w') as json_file:
        json.dump(data, json_file, indent=4)
    print(f"Data saved to {filename}")

def load_from_json(filename):
    with open(filename, 'r') as json_file:
        data = json.load(json_file)
    print(f"Data retrieved from {filename}")
    return data


In [6]:
# save_to_json(all_data,"All_Data_29th")
re_data= load_from_json('All_Data_29th')
print(len(re_data))
print(re_data)
# for stock in re_data:
#     print(stock)

Data retrieved from All_Data_29th
502
[{'symbol': 'A', 'name': 'Agilent Technologies, Inc.', 'price': 135.54, 'changesPercentage': -0.2135, 'change': -0.29, 'dayLow': 134.59, 'dayHigh': 136.2221, 'yearHigh': 155.35, 'yearLow': 124.16, 'marketCap': 38709546300, 'priceAvg50': 135.4428, 'priceAvg200': 138.48125, 'exchange': 'NYSE', 'volume': 594109, 'avgVolume': 1723390, 'open': 134.62, 'previousClose': 135.83, 'eps': 4.42, 'pe': 30.67, 'earningsAnnouncement': '2025-02-25T10:59:00.000+0000', 'sharesOutstanding': 285595000, 'timestamp': 1735333202}, {'symbol': 'AAPL', 'name': 'Apple Inc.', 'price': 255.59, 'changesPercentage': -1.3242, 'change': -3.43, 'dayLow': 253.06, 'dayHigh': 258.7, 'yearHigh': 260.1, 'yearLow': 164.08, 'marketCap': 3863447322000, 'priceAvg50': 236.4174, 'priceAvg200': 212.20125, 'exchange': 'NASDAQ', 'volume': 42170447, 'avgVolume': 44152275, 'open': 257.9, 'previousClose': 259.02, 'eps': 6.07, 'pe': 42.11, 'earningsAnnouncement': '2025-01-30T21:00:00.000+0000', 'sha

## Visual representation

Using pandas the data is displayed in a table form which tends to be a lot clearer.

In [7]:
columns = ['Symbol', 'Name', 'Price', 'Day Low', 'Day High', 'Market Cap','Number of shares to buy']

new_dataframe = pd.DataFrame(columns=columns)

for stock in re_data:

    new_row = pd.Series(
        [
            stock['symbol'],         
            stock['name'],           
            stock['price'],          
            stock['dayLow'],         
            stock['dayHigh'],        
            stock['marketCap'],
            "NOS"        
        ],
        index=columns
    )
    # Append the row to the DataFrame
    new_dataframe = pd.concat([new_dataframe, new_row.to_frame().T], ignore_index=True)


In [67]:
new_dataframe

Unnamed: 0,Symbol,Name,Price,Day Low,Day High,Market Cap,Number of shares to buy
0,A,"Agilent Technologies, Inc.",135.54,134.59,136.2221,38709546300,NOS
1,AAPL,Apple Inc.,255.59,253.06,258.7,3863447322000,NOS
2,ABBV,AbbVie Inc.,178.01,177.29,179.66,314568591400,NOS
3,ABNB,"Airbnb, Inc.",133.385,132.16,134.5,84562310136,NOS
4,ABT,Abbott Laboratories,114.99,114.51,115.95,199445555400,NOS
...,...,...,...,...,...,...,...
497,XYL,Xylem Inc.,117.53,117.18,119.095,28553325850,NOS
498,YUM,"Yum! Brands, Inc.",135.23,134.29,136.47,37738906560,NOS
499,ZBH,"Zimmer Biomet Holdings, Inc.",106.87,106.26,107.63,21275038380,NOS
500,ZBRA,Zebra Technologies Corporation,389.07,387.01,394.7,20068230600,NOS


## Number of shares to buy
Consider that one has a sum of money that they want to invest into the s&p 500 and this person wants to get an idea of how many shares of each stock they will get. To accomplish this we split the investment on all the possible stocks and then determine how many whole stocks one could buy with that amount.

In [13]:
savings = 1000000
cut_per_stock = savings / len(ticker_list)
for stock_idx in range(len(new_dataframe.index)):
    new_dataframe.loc[stock_idx, 'Number of shares to buy'] = math.floor(cut_per_stock/new_dataframe.loc[stock_idx, 'Price'])

new_dataframe



Unnamed: 0,Symbol,Name,Price,Day Low,Day High,Market Cap,Number of shares to buy
0,A,"Agilent Technologies, Inc.",135.54,134.59,136.2221,38709546300,14
1,AAPL,Apple Inc.,255.59,253.06,258.7,3863447322000,7
2,ABBV,AbbVie Inc.,178.01,177.29,179.66,314568591400,11
3,ABNB,"Airbnb, Inc.",133.385,132.16,134.5,84562310136,14
4,ABT,Abbott Laboratories,114.99,114.51,115.95,199445555400,17
...,...,...,...,...,...,...,...
497,XYL,Xylem Inc.,117.53,117.18,119.095,28553325850,16
498,YUM,"Yum! Brands, Inc.",135.23,134.29,136.47,37738906560,14
499,ZBH,"Zimmer Biomet Holdings, Inc.",106.87,106.26,107.63,21275038380,18
500,ZBRA,Zebra Technologies Corporation,389.07,387.01,394.7,20068230600,5


In [14]:
with pd.ExcelWriter('shares_ratio.xlsx', engine='xlsxwriter') as excel_writer:
    new_dataframe.to_excel(excel_writer, sheet_name='Shares Ratiosss', index=False)
    workbook = excel_writer.book
    worksheet = excel_writer.sheets['Shares Ratiosss']

    header_format = workbook.add_format({'bold': True, 'bg_color': 'gray', 'font_color': 'white', 'align': 'center'})
    currency_format = workbook.add_format({'num_format': '$#,##0.00', 'align': 'right'})
    general_format = workbook.add_format({'bg_color': 'gray', 'font_color': 'white', 'align': 'center'})


    for col_num, value in enumerate(new_dataframe.columns.values):
        worksheet.write(0, col_num, value, header_format)


    for row_num in range(1, len(new_dataframe) + 1):
        for col_num in range(len(new_dataframe.columns)):
            worksheet.write(row_num, col_num, new_dataframe.iloc[row_num - 1, col_num], general_format)


    for col_idx in [2, 3, 4, 5]:  
        worksheet.set_column(col_idx, col_idx, 15, currency_format)  
