In [3]:
pip install yfinance

Collecting yfinance
  Downloading yfinance-0.2.48-py2.py3-none-any.whl.metadata (13 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.6-py311-none-any.whl.metadata (23 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Using cached peewee-3.17.7-py3-none-any.whl
Collecting html5lib>=1.1 (from yfinance)
  Downloading html5lib-1.1-py2.py3-none-any.whl.metadata (16 kB)
Downloading yfinance-0.2.48-py2.py3-none-any.whl (101 kB)
Downloading frozendict-2.4.6-py311-none-any.whl (16 kB)
Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Installing collected packages: peewee, multitasking, html5lib, frozendict, yfinance
Successfully installed frozendict-2.4.6 html5lib-1.1 multitasking-0.0.11 peewee-3.17.7 yfinance-0.2.48
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: C:\Users\Schalk\anaconda3\python.exe -m pip install --upgrade pip


In [4]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import requests
from bs4 import BeautifulSoup
import time

def get_sp500_top_50():
    """
    Scrapes the top 50 S&P 500 companies by market cap
    """
    url = "https://www.slickcharts.com/sp500"
    headers = {'User-Agent': 'Mozilla/5.0'}
    
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table', {'class': 'table'})
    
    symbols = []
    for row in table.find_all('tr')[1:51]:  # Get top 50
        symbol = row.find_all('td')[2].text.strip()
        symbols.append(symbol)
    
    return symbols

def get_historical_data(symbols):
    """
    Gets historical price data for given symbols over the past year
    """
    end_date = datetime.now()
    start_date = end_date - timedelta(days=365)
    
    all_data = {}
    
    for symbol in symbols:
        try:
            # Add delay to avoid rate limiting
            time.sleep(0.5)
            
            # Get stock data
            stock = yf.Ticker(symbol)
            hist = stock.history(start=start_date, end=end_date)
            
            # Store closing prices
            all_data[symbol] = {
                'Current Price': hist['Close'][-1],
                'Historical Prices': hist['Close'].to_dict()
            }
            
            print(f"Successfully retrieved data for {symbol}")
            
        except Exception as e:
            print(f"Error retrieving data for {symbol}: {e}")
            continue
    
    return all_data

def format_results(data):
    """
    Formats the results into a pandas DataFrame
    """
    # Create DataFrame for current prices
    current_prices = pd.DataFrame([
        {'Symbol': symbol, 'Current Price': details['Current Price']}
        for symbol, details in data.items()
    ])
    
    # Create DataFrame for historical prices
    historical_df = pd.DataFrame({
        symbol: details['Historical Prices']
        for symbol, details in data.items()
    })
    
    return current_prices, historical_df

def main():
    # Get top 50 symbols
    print("Fetching top 50 S&P 500 companies...")
    symbols = get_sp500_top_50()
    
    # Get historical data
    print("Retrieving historical price data...")
    data = get_historical_data(symbols)
    
    # Format results
    current_prices, historical_prices = format_results(data)
    
    # Save to CSV files
    current_prices.to_csv('current_prices.csv', index=False)
    historical_prices.to_csv('historical_prices.csv')
    
    print("\nCurrent Prices:")
    print(current_prices.sort_values('Current Price', ascending=False))
    
    return current_prices, historical_prices

if __name__ == "__main__":
    current_prices, historical_prices = main()

Fetching top 50 S&P 500 companies...
Retrieving historical price data...


  'Current Price': hist['Close'][-1],


Successfully retrieved data for AAPL


  'Current Price': hist['Close'][-1],


Successfully retrieved data for NVDA


  'Current Price': hist['Close'][-1],


Successfully retrieved data for MSFT


  'Current Price': hist['Close'][-1],


Successfully retrieved data for AMZN


  'Current Price': hist['Close'][-1],


Successfully retrieved data for META


  'Current Price': hist['Close'][-1],


Successfully retrieved data for GOOGL


  'Current Price': hist['Close'][-1],


Successfully retrieved data for GOOG


$BRK.B: possibly delisted; no timezone found


Error retrieving data for BRK.B: index -1 is out of bounds for axis 0 with size 0


  'Current Price': hist['Close'][-1],


Successfully retrieved data for AVGO


  'Current Price': hist['Close'][-1],


Successfully retrieved data for TSLA


  'Current Price': hist['Close'][-1],


Successfully retrieved data for LLY


  'Current Price': hist['Close'][-1],


Successfully retrieved data for JPM


  'Current Price': hist['Close'][-1],


Successfully retrieved data for UNH


  'Current Price': hist['Close'][-1],


Successfully retrieved data for XOM


  'Current Price': hist['Close'][-1],


Successfully retrieved data for V


  'Current Price': hist['Close'][-1],


Successfully retrieved data for MA


  'Current Price': hist['Close'][-1],


Successfully retrieved data for PG


  'Current Price': hist['Close'][-1],


Successfully retrieved data for COST


  'Current Price': hist['Close'][-1],


Successfully retrieved data for HD


  'Current Price': hist['Close'][-1],


Successfully retrieved data for JNJ


  'Current Price': hist['Close'][-1],


Successfully retrieved data for ABBV


  'Current Price': hist['Close'][-1],


Successfully retrieved data for WMT


  'Current Price': hist['Close'][-1],


Successfully retrieved data for NFLX


  'Current Price': hist['Close'][-1],


Successfully retrieved data for CRM


  'Current Price': hist['Close'][-1],


Successfully retrieved data for BAC


  'Current Price': hist['Close'][-1],


Successfully retrieved data for ORCL


  'Current Price': hist['Close'][-1],


Successfully retrieved data for MRK


  'Current Price': hist['Close'][-1],


Successfully retrieved data for KO


  'Current Price': hist['Close'][-1],


Successfully retrieved data for CVX


  'Current Price': hist['Close'][-1],


Successfully retrieved data for AMD


  'Current Price': hist['Close'][-1],


Successfully retrieved data for PEP


  'Current Price': hist['Close'][-1],


Successfully retrieved data for LIN


  'Current Price': hist['Close'][-1],


Successfully retrieved data for CSCO


  'Current Price': hist['Close'][-1],


Successfully retrieved data for WFC


  'Current Price': hist['Close'][-1],


Successfully retrieved data for ACN


  'Current Price': hist['Close'][-1],


Successfully retrieved data for ADBE


  'Current Price': hist['Close'][-1],


Successfully retrieved data for TMO


  'Current Price': hist['Close'][-1],


Successfully retrieved data for MCD


  'Current Price': hist['Close'][-1],


Successfully retrieved data for PM


  'Current Price': hist['Close'][-1],


Successfully retrieved data for ABT


  'Current Price': hist['Close'][-1],


Successfully retrieved data for NOW


  'Current Price': hist['Close'][-1],


Successfully retrieved data for GE


  'Current Price': hist['Close'][-1],


Successfully retrieved data for IBM


  'Current Price': hist['Close'][-1],


Successfully retrieved data for TXN


  'Current Price': hist['Close'][-1],


Successfully retrieved data for QCOM


  'Current Price': hist['Close'][-1],


Successfully retrieved data for CAT


  'Current Price': hist['Close'][-1],


Successfully retrieved data for ISRG


  'Current Price': hist['Close'][-1],


Successfully retrieved data for VZ


  'Current Price': hist['Close'][-1],


Successfully retrieved data for INTU
Successfully retrieved data for DIS

Current Prices:
   Symbol  Current Price
39    NOW     932.770020
16   COST     874.179993
9     LLY     829.799988
21   NFLX     756.030029
47   INTU     610.299988
4    META     567.580017
11    UNH     564.380005
35    TMO     546.340027
45   ISRG     503.839996
14     MA     499.267487
34   ADBE     478.079987
30    LIN     456.149994
2    MSFT     406.350006
17     HD     393.829987
44    CAT     376.160004
33    ACN     344.825012
36    MCD     292.260010
22    CRM     291.497498
13      V     289.834991
8    TSLA     249.850006
0    AAPL     225.910004
10    JPM     221.839996
41    IBM     206.720001
19   ABBV     203.869995
42    TXN     203.160004
3    AMZN     186.190002
6    GOOG     172.649994
40     GE     171.800003
5   GOOGL     171.110001
7    AVGO     169.770004
24   ORCL     167.830002
29    PEP     166.080002
15     PG     165.169998
43   QCOM     162.770004
18    JNJ     159.862503
27    CVX 

  'Current Price': hist['Close'][-1],
