In [1]:
import os
import pandas as pd
import yfinance as yf
import requests
from tqdm.notebook import tqdm
import glob

# ==================== Obtenir les ticker des actions du S&P 500 de WIKIPEDIA ====================

def get_sp500_tickers():
    try:
        url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
        tables = pd.read_html(url)
        sp500_df = tables[0]  
        tickers = sp500_df['Symbol'].tolist()
        print(f"✅ Retrieved {len(tickers)} S&P 500 tickers.")
        return tickers
    except Exception as e:
        print(f"❌ Error fetching S&P 500 tickers: {e}")
        return []

sp500_tickers = get_sp500_tickers()

✅ Retrieved 503 S&P 500 tickers.


In [2]:
# ==================== Télécharger les données historiques à partir de YAHOO FINANCE ====================

DATA_DIR = "sp500_stock_data"
os.makedirs(DATA_DIR, exist_ok=True)

START_DATE = "2020-01-01"
END_DATE = "2025-02-01"

def download_stock_data(ticker):
    try:
        stock_data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)
        if not stock_data.empty:
            file_path = os.path.join(DATA_DIR, f"{ticker}.csv")
            stock_data.to_csv(file_path)
            return file_path
    except Exception as e:
        print(f"⚠️ Error fetching {ticker}: {e}")
    return None

saved_files = []
for ticker in tqdm(sp500_tickers, desc="Downloading stock data"):
    file = download_stock_data(ticker)
    if file:
        saved_files.append(file)

print(f"✅ Downloaded data for {len(saved_files)} S&P 500 stocks.")

# ==================== Combiner l'information en un seul CSV ====================

def combine_csv_files():
    all_files = glob.glob(os.path.join(DATA_DIR, "*.csv"))
    df_list = []

    for file in all_files:
        try:
            ticker = os.path.basename(file).replace(".csv", "")
            temp_df = pd.read_csv(file)
            temp_df["Ticker"] = ticker  # Add ticker column
            df_list.append(temp_df)
        except Exception as e:
            print(f"⚠️ Error processing {file}: {e}")

    if df_list:
        combined_df = pd.concat(df_list, ignore_index=True)
        combined_df.to_csv("sp500_stocks_data.csv", index=False)
        print(f"✅ Combined data saved as 'sp500_stocks_data.csv'.")
    else:
        print("❌ No data to combine.")

combine_csv_files()

print("🎉 Process Completed Successfully!")

Downloading stock data:   0%|          | 0/503 [00:00<?, ?it/s]


1 Failed download:
['BRK.B']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')

1 Failed download:
['BF.B']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2020-01-01 -> 2025-02-01)')
Could not get exchangeTimezoneName for ticker 'ETR' reason: 'chart'

1 Failed download:
['ETR']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')
Could not get exchangeTimezoneName for ticker 'LEN' reason: 'chart'

1 Failed download:
['LEN']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


✅ Downloaded data for 499 S&P 500 stocks.
✅ Combined data saved as 'sp500_stocks_data.csv'.
🎉 Process Completed Successfully!
