In [7]:
%pip install pandas numpy matplotlib seaborn statsmodels requests openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.


## Getting the ETF Fund data from State Street Global Advisors

In [10]:
# getting all imports squared away
import requests as req
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json
from datetime import datetime


etf_list = ['XLI', 'XLK', 'XLE', 'XLB']
ssga = [etf.lower() for etf in etf_list]
# Create a subfolder named "SSGA Data" if it doesn't exist
subfolder = 'SSGA Data'
if not os.path.exists(subfolder):
    os.makedirs(subfolder)

# Initialize an empty list to store dataframes
ssga_df_list = []

# Loop through each ETF in the list, download the excel file, and save it in the subfolder
for etf in ssga:
    url = f'https://www.ssga.com/library-content/products/fund-data/etfs/us/holdings-daily-us-en-{etf}.xlsx'
    response = req.get(url)
    current_date = datetime.now().strftime('%m-%d-%Y')
    file_path = os.path.join(subfolder, f'{etf}-{current_date}.xlsx')
    with open(file_path, 'wb') as file:
        file.write(response.content)
    df = pd.read_excel(file_path, skiprows=4, header=0, usecols="A:H")
    ssga_df_list.append(df)


In [14]:
for df in ssga_df_list:
    # Find the index of the first occurrence of the specific string
    drop_index = df[df['Name'] == "Past performance is not a reliable indicator of future performance. Investment return and principal value will fluctuate, so you may have a gain or loss when shares are sold. Current performance may be higher or lower than that quoted. All results are historical and assume the reinvestment of dividends and capital gains. Visit www.ssga.com for most recent month-end performance. "].index

    # Drop all rows starting from the found index
    if not drop_index.empty:
        df = df[:drop_index[0]]

    # Drop the last row if it contains only NaN values
    if df.iloc[-1].isna().all():
        df = df[:-1]

    print(df.tail())

                             Name  Ticker Identifier    SEDOL    Weight  \
76              SMITH (A.O.) CORP     AOS  831865209  2816023  0.186860   
77   HUNTINGTON INGALLS INDUSTRIE     HII  446413106  B40SSC9  0.164458   
78  SSI US GOV MONEY MARKET CLASS       -  924QSGII3        -  0.026875   
79                      US DOLLAR       -  999USDZ92        -  0.003086   
80        XAI EMINI INDUSTR MAR25  AIXH25  ADI2SGVK5        - -0.000239   

   Sector  Shares Held Local Currency  
76      -    617461.00            USD  
77      -    204036.00            USD  
78      -   5931422.87            USD  
79      -    680999.90            USD  
80      -     10000.00            USD  
                             Name  Ticker Identifier    SEDOL    Weight  \
67         SKYWORKS SOLUTIONS INC    SWKS  83088M102  2961053  0.111133   
68             ENPHASE ENERGY INC    ENPH  29355A107  B65SQW4  0.092398   
69  SSI US GOV MONEY MARKET CLASS       -  924QSGII3        -  0.082990   
70       