In [9]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
import os


In [3]:
# URL of the Wikipedia page containing the list of S&P 500 companies
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"

# Use pandas to read the tables from the webpage
tables = pd.read_html(url)

# The list of S&P 500 companies is typically in the first table on the page
sp500_df = tables[0]

# Extract the column with the company tickers (Symbol) and company names (Security)
sp500_companies = sp500_df[['Symbol', 'Security']]

# Print the first few rows to verify the data
print(sp500_companies.head())


  Symbol     Security
0    MMM           3M
1    AOS  A. O. Smith
2    ABT       Abbott
3   ABBV       AbbVie
4    ACN    Accenture


In [6]:

# Define the date range for the last year
end_date = datetime.now()
start_date = end_date - timedelta(days=365)

In [7]:
# Create an empty DataFrame to store historical data
historical_data = pd.DataFrame()

# Loop through the S&P 500 companies and fetch historical data
for index, row in sp500_companies.iterrows():
    symbol = row['Symbol']
    try:
        # Fetch historical data for the company from Yahoo Finance
        stock_data = yf.download(symbol, start=start_date, end=end_date)

        # Add the data to the historical_data DataFrame, along with the company's name
        stock_data['Company'] = row['Security']
        historical_data = pd.concat([historical_data, stock_data], axis=0)
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%******

ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2022-09-12 23:52:23.261836 -> 2023-09-12 23:52:23.261836)')



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%******

In [8]:



# Print the first few rows of the historical data
print(historical_data.head())

# Save the historical data to a CSV file if needed
historical_data.to_csv("sp500_historical_data.csv")


                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2022-09-13  122.110001  124.599998  120.230003  120.470001  114.054375   
2022-09-14  120.589996  120.589996  116.309998  117.529999  111.270950   
2022-09-15  117.820000  118.739998  116.070000  116.419998  110.220062   
2022-09-16  114.699997  117.309998  114.110001  116.610001  110.399948   
2022-09-19  116.010002  116.910004  115.279999  116.639999  110.428360   

               Volume Company  
Date                           
2022-09-13  7765500.0      3M  
2022-09-14  5670500.0      3M  
2022-09-15  4052800.0      3M  
2022-09-16  7261800.0      3M  
2022-09-19  3190300.0      3M  


In [10]:

# Specify the path to the CSV file
csv_file_path = "sp500_historical_data.csv"

# Check if the file exists
if os.path.exists(csv_file_path):
    # Get the size of the file in bytes
    file_size_bytes = os.path.getsize(csv_file_path)

    # Convert bytes to a human-readable format (e.g., kilobytes or megabytes)
    file_size_kb = file_size_bytes / 1024  # Convert to kilobytes
    file_size_mb = file_size_kb / 1024      # Convert to megabytes

    print(f"Size of '{csv_file_path}': {file_size_bytes} bytes ({file_size_kb:.2f} KB or {file_size_mb:.2f} MB)")
else:
    print(f"The file '{csv_file_path}' does not exist.")


Size of 'sp500_historical_data.csv': 15562472 bytes (15197.73 KB or 14.84 MB)
