In [1]:
# download APPL data from yfinance
import yfinance as yf
import pandas as pd
def download_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame:
    """
    Download stock data from Yahoo Finance for a given ticker and date range.

    Args:
        ticker (str): The ticker symbol of the stock to download data for.
        start_date (str): The start date of the date range to download data for.
        end_date (str): The end date of the date range to download data for.

    Returns:
        pd.DataFrame: A pandas DataFrame containing the stock data.
    """
    try:
        df = yf.download(ticker, start=start_date, end=end_date)
        df.reset_index(inplace=True)
        return df
    except Exception as e:
        print(f"Error downloading data for {ticker}: {e}")
        return pd.DataFrame()

In [2]:
download_data("AAPL", "2020-01-01", "2020-01-31")

  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


Price,Date,Close,High,Low,Open,Volume
Ticker,Unnamed: 1_level_1,AAPL,AAPL,AAPL,AAPL,AAPL
0,2020-01-02,72.538506,72.598884,71.292296,71.545882,135480400
1,2020-01-03,71.83329,72.594055,71.608685,71.765667,146322800
2,2020-01-06,72.405693,72.444336,70.703027,70.954203,118387200
3,2020-01-07,72.065155,72.671348,71.845377,72.415345,108872000
4,2020-01-08,73.224411,73.526303,71.768086,71.768086,132079200
5,2020-01-09,74.77977,74.972978,73.951381,74.20255,170108400
6,2020-01-10,74.948822,75.51397,74.446478,75.014035,140644800
7,2020-01-13,76.550034,76.576604,75.146842,75.265189,121532000
8,2020-01-14,75.51638,76.697384,75.393214,76.487269,161954400
9,2020-01-15,75.192726,76.197422,74.760415,75.3159,121923600


In [10]:
def get_sp500_tickers():
    url = "https://datahub.io/core/s-and-p-500-companies/r/constituents.csv"
    try:
        df = pd.read_csv(url)
        return df['Symbol'].str.replace('.', '-', regex=False).tolist()
    except Exception as e:
        print(f"Failed to load tickers: {e}")
        return []

In [11]:
get_sp500_tickers()

['MMM',
 'AOS',
 'ABT',
 'ABBV',
 'ACN',
 'ADBE',
 'AMD',
 'AES',
 'AFL',
 'A',
 'APD',
 'ABNB',
 'AKAM',
 'ALB',
 'ARE',
 'ALGN',
 'ALLE',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AEE',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'AME',
 'AMGN',
 'APH',
 'ADI',
 'AON',
 'APA',
 'APO',
 'AAPL',
 'AMAT',
 'APTV',
 'ACGL',
 'ADM',
 'ANET',
 'AJG',
 'AIZ',
 'T',
 'ATO',
 'ADSK',
 'ADP',
 'AZO',
 'AVB',
 'AVY',
 'AXON',
 'BKR',
 'BALL',
 'BAC',
 'BAX',
 'BDX',
 'BRK-B',
 'BBY',
 'TECH',
 'BIIB',
 'BLK',
 'BX',
 'XYZ',
 'BK',
 'BA',
 'BKNG',
 'BSX',
 'BMY',
 'AVGO',
 'BR',
 'BRO',
 'BF-B',
 'BLDR',
 'BG',
 'BXP',
 'CHRW',
 'CDNS',
 'CZR',
 'CPT',
 'CPB',
 'COF',
 'CAH',
 'KMX',
 'CCL',
 'CARR',
 'CAT',
 'CBOE',
 'CBRE',
 'CDW',
 'COR',
 'CNC',
 'CNP',
 'CF',
 'CRL',
 'SCHW',
 'CHTR',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CLX',
 'CME',
 'CMS',
 'KO',
 'CTSH',
 'COIN',
 'CL',
 'CMCSA',
 'CAG',
 'COP',
 'ED',
 'STZ',
 'CEG',


In [12]:
import os
import time

def download_sp500_batch(start_date: str, end_date: str, save_dir="data/sp500"):
    tickers = get_sp500_tickers()
    os.makedirs(save_dir, exist_ok=True)

    for i, ticker in enumerate(tickers):
        filepath = os.path.join(save_dir, f"{ticker}.csv")

        if os.path.exists(filepath):
            print(f"[{i+1}/{len(tickers)}] ✅ {ticker} already exists, skipping.")
            continue

        print(f"[{i+1}/{len(tickers)}] ⬇️ Downloading {ticker}...")
        df = download_data(ticker, start_date, end_date)

        if not df.empty:
            df.to_csv(filepath, index=False)
            print(f"   ✅ Saved to {filepath}")
        else:
            print(f"   ⚠️ Skipped {ticker} due to empty or failed download")

        time.sleep(1.5)  # polite pause to avoid rate limit

In [13]:
download_sp500_batch("2015-01-01", "2024-12-31")

[1/503] ⬇️ Downloading MMM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MMM.csv
[2/503] ⬇️ Downloading AOS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AOS.csv
[3/503] ⬇️ Downloading ABT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ABT.csv
[4/503] ⬇️ Downloading ABBV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ABBV.csv
[5/503] ⬇️ Downloading ACN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ACN.csv
[6/503] ⬇️ Downloading ADBE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ADBE.csv
[7/503] ⬇️ Downloading AMD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AMD.csv
[8/503] ⬇️ Downloading AES...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AES.csv
[9/503] ⬇️ Downloading AFL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AFL.csv
[10/503] ⬇️ Downloading A...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/A.csv
[11/503] ⬇️ Downloading APD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/APD.csv
[12/503] ⬇️ Downloading ABNB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ABNB.csv
[13/503] ⬇️ Downloading AKAM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AKAM.csv
[14/503] ⬇️ Downloading ALB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ALB.csv
[15/503] ⬇️ Downloading ARE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ARE.csv
[16/503] ⬇️ Downloading ALGN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ALGN.csv
[17/503] ⬇️ Downloading ALLE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ALLE.csv
[18/503] ⬇️ Downloading LNT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LNT.csv
[19/503] ⬇️ Downloading ALL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ALL.csv
[20/503] ⬇️ Downloading GOOGL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GOOGL.csv
[21/503] ⬇️ Downloading GOOG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GOOG.csv
[22/503] ⬇️ Downloading MO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MO.csv
[23/503] ⬇️ Downloading AMZN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AMZN.csv
[24/503] ⬇️ Downloading AMCR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AMCR.csv
[25/503] ⬇️ Downloading AEE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AEE.csv
[26/503] ⬇️ Downloading AEP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AEP.csv
[27/503] ⬇️ Downloading AXP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AXP.csv
[28/503] ⬇️ Downloading AIG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AIG.csv
[29/503] ⬇️ Downloading AMT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AMT.csv
[30/503] ⬇️ Downloading AWK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AWK.csv
[31/503] ⬇️ Downloading AMP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AMP.csv
[32/503] ⬇️ Downloading AME...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AME.csv
[33/503] ⬇️ Downloading AMGN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AMGN.csv
[34/503] ⬇️ Downloading APH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/APH.csv
[35/503] ⬇️ Downloading ADI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ADI.csv
[36/503] ⬇️ Downloading AON...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AON.csv
[37/503] ⬇️ Downloading APA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/APA.csv
[38/503] ⬇️ Downloading APO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/APO.csv
[39/503] ⬇️ Downloading AAPL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AAPL.csv
[40/503] ⬇️ Downloading AMAT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AMAT.csv
[41/503] ⬇️ Downloading APTV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/APTV.csv
[42/503] ⬇️ Downloading ACGL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ACGL.csv
[43/503] ⬇️ Downloading ADM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ADM.csv
[44/503] ⬇️ Downloading ANET...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ANET.csv
[45/503] ⬇️ Downloading AJG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AJG.csv
[46/503] ⬇️ Downloading AIZ...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AIZ.csv
[47/503] ⬇️ Downloading T...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/T.csv
[48/503] ⬇️ Downloading ATO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ATO.csv
[49/503] ⬇️ Downloading ADSK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ADSK.csv
[50/503] ⬇️ Downloading ADP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ADP.csv
[51/503] ⬇️ Downloading AZO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AZO.csv
[52/503] ⬇️ Downloading AVB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AVB.csv
[53/503] ⬇️ Downloading AVY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AVY.csv
[54/503] ⬇️ Downloading AXON...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AXON.csv
[55/503] ⬇️ Downloading BKR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BKR.csv
[56/503] ⬇️ Downloading BALL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BALL.csv
[57/503] ⬇️ Downloading BAC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BAC.csv
[58/503] ⬇️ Downloading BAX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BAX.csv
[59/503] ⬇️ Downloading BDX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BDX.csv
[60/503] ⬇️ Downloading BRK-B...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BRK-B.csv
[61/503] ⬇️ Downloading BBY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BBY.csv
[62/503] ⬇️ Downloading TECH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TECH.csv
[63/503] ⬇️ Downloading BIIB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BIIB.csv
[64/503] ⬇️ Downloading BLK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BLK.csv
[65/503] ⬇️ Downloading BX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BX.csv
[66/503] ⬇️ Downloading XYZ...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/XYZ.csv
[67/503] ⬇️ Downloading BK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BK.csv
[68/503] ⬇️ Downloading BA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BA.csv
[69/503] ⬇️ Downloading BKNG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BKNG.csv
[70/503] ⬇️ Downloading BSX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BSX.csv
[71/503] ⬇️ Downloading BMY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BMY.csv
[72/503] ⬇️ Downloading AVGO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/AVGO.csv
[73/503] ⬇️ Downloading BR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BR.csv
[74/503] ⬇️ Downloading BRO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BRO.csv
[75/503] ⬇️ Downloading BF-B...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BF-B.csv
[76/503] ⬇️ Downloading BLDR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BLDR.csv
[77/503] ⬇️ Downloading BG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BG.csv
[78/503] ⬇️ Downloading BXP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BXP.csv
[79/503] ⬇️ Downloading CHRW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CHRW.csv
[80/503] ⬇️ Downloading CDNS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CDNS.csv
[81/503] ⬇️ Downloading CZR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CZR.csv
[82/503] ⬇️ Downloading CPT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CPT.csv
[83/503] ⬇️ Downloading CPB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CPB.csv
[84/503] ⬇️ Downloading COF...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/COF.csv
[85/503] ⬇️ Downloading CAH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CAH.csv
[86/503] ⬇️ Downloading KMX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KMX.csv
[87/503] ⬇️ Downloading CCL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CCL.csv
[88/503] ⬇️ Downloading CARR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CARR.csv
[89/503] ⬇️ Downloading CAT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CAT.csv
[90/503] ⬇️ Downloading CBOE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CBOE.csv
[91/503] ⬇️ Downloading CBRE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CBRE.csv
[92/503] ⬇️ Downloading CDW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CDW.csv
[93/503] ⬇️ Downloading COR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/COR.csv
[94/503] ⬇️ Downloading CNC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CNC.csv
[95/503] ⬇️ Downloading CNP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CNP.csv
[96/503] ⬇️ Downloading CF...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CF.csv
[97/503] ⬇️ Downloading CRL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CRL.csv
[98/503] ⬇️ Downloading SCHW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SCHW.csv
[99/503] ⬇️ Downloading CHTR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CHTR.csv
[100/503] ⬇️ Downloading CVX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CVX.csv
[101/503] ⬇️ Downloading CMG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CMG.csv
[102/503] ⬇️ Downloading CB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CB.csv
[103/503] ⬇️ Downloading CHD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CHD.csv
[104/503] ⬇️ Downloading CI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CI.csv
[105/503] ⬇️ Downloading CINF...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CINF.csv
[106/503] ⬇️ Downloading CTAS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CTAS.csv
[107/503] ⬇️ Downloading CSCO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CSCO.csv
[108/503] ⬇️ Downloading C...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/C.csv
[109/503] ⬇️ Downloading CFG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CFG.csv
[110/503] ⬇️ Downloading CLX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CLX.csv
[111/503] ⬇️ Downloading CME...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CME.csv
[112/503] ⬇️ Downloading CMS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CMS.csv
[113/503] ⬇️ Downloading KO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KO.csv
[114/503] ⬇️ Downloading CTSH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CTSH.csv
[115/503] ⬇️ Downloading COIN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/COIN.csv
[116/503] ⬇️ Downloading CL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CL.csv
[117/503] ⬇️ Downloading CMCSA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CMCSA.csv
[118/503] ⬇️ Downloading CAG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CAG.csv
[119/503] ⬇️ Downloading COP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/COP.csv
[120/503] ⬇️ Downloading ED...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ED.csv
[121/503] ⬇️ Downloading STZ...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/STZ.csv
[122/503] ⬇️ Downloading CEG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CEG.csv
[123/503] ⬇️ Downloading COO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/COO.csv
[124/503] ⬇️ Downloading CPRT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CPRT.csv
[125/503] ⬇️ Downloading GLW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GLW.csv
[126/503] ⬇️ Downloading CPAY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CPAY.csv
[127/503] ⬇️ Downloading CTVA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CTVA.csv
[128/503] ⬇️ Downloading CSGP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CSGP.csv
[129/503] ⬇️ Downloading COST...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/COST.csv
[130/503] ⬇️ Downloading CTRA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CTRA.csv
[131/503] ⬇️ Downloading CRWD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CRWD.csv
[132/503] ⬇️ Downloading CCI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CCI.csv
[133/503] ⬇️ Downloading CSX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CSX.csv
[134/503] ⬇️ Downloading CMI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CMI.csv
[135/503] ⬇️ Downloading CVS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CVS.csv
[136/503] ⬇️ Downloading DHR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DHR.csv
[137/503] ⬇️ Downloading DRI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DRI.csv
[138/503] ⬇️ Downloading DDOG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DDOG.csv
[139/503] ⬇️ Downloading DVA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DVA.csv
[140/503] ⬇️ Downloading DAY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DAY.csv
[141/503] ⬇️ Downloading DECK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DECK.csv
[142/503] ⬇️ Downloading DE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DE.csv
[143/503] ⬇️ Downloading DELL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DELL.csv
[144/503] ⬇️ Downloading DAL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DAL.csv
[145/503] ⬇️ Downloading DVN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DVN.csv
[146/503] ⬇️ Downloading DXCM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DXCM.csv
[147/503] ⬇️ Downloading FANG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FANG.csv
[148/503] ⬇️ Downloading DLR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DLR.csv
[149/503] ⬇️ Downloading DG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DG.csv
[150/503] ⬇️ Downloading DLTR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DLTR.csv
[151/503] ⬇️ Downloading D...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/D.csv
[152/503] ⬇️ Downloading DPZ...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DPZ.csv
[153/503] ⬇️ Downloading DASH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DASH.csv
[154/503] ⬇️ Downloading DOV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DOV.csv
[155/503] ⬇️ Downloading DOW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DOW.csv
[156/503] ⬇️ Downloading DHI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DHI.csv
[157/503] ⬇️ Downloading DTE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DTE.csv
[158/503] ⬇️ Downloading DUK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DUK.csv
[159/503] ⬇️ Downloading DD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DD.csv
[160/503] ⬇️ Downloading EMN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EMN.csv
[161/503] ⬇️ Downloading ETN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ETN.csv
[162/503] ⬇️ Downloading EBAY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EBAY.csv
[163/503] ⬇️ Downloading ECL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ECL.csv
[164/503] ⬇️ Downloading EIX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EIX.csv
[165/503] ⬇️ Downloading EW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EW.csv
[166/503] ⬇️ Downloading EA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EA.csv
[167/503] ⬇️ Downloading ELV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ELV.csv
[168/503] ⬇️ Downloading EMR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EMR.csv
[169/503] ⬇️ Downloading ENPH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ENPH.csv
[170/503] ⬇️ Downloading ETR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ETR.csv
[171/503] ⬇️ Downloading EOG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EOG.csv
[172/503] ⬇️ Downloading EPAM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EPAM.csv
[173/503] ⬇️ Downloading EQT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EQT.csv
[174/503] ⬇️ Downloading EFX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EFX.csv
[175/503] ⬇️ Downloading EQIX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EQIX.csv
[176/503] ⬇️ Downloading EQR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EQR.csv
[177/503] ⬇️ Downloading ERIE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ERIE.csv
[178/503] ⬇️ Downloading ESS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ESS.csv
[179/503] ⬇️ Downloading EL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EL.csv
[180/503] ⬇️ Downloading EG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EG.csv
[181/503] ⬇️ Downloading EVRG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EVRG.csv
[182/503] ⬇️ Downloading ES...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ES.csv
[183/503] ⬇️ Downloading EXC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EXC.csv
[184/503] ⬇️ Downloading EXE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EXE.csv
[185/503] ⬇️ Downloading EXPE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EXPE.csv
[186/503] ⬇️ Downloading EXPD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EXPD.csv
[187/503] ⬇️ Downloading EXR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/EXR.csv
[188/503] ⬇️ Downloading XOM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/XOM.csv
[189/503] ⬇️ Downloading FFIV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FFIV.csv
[190/503] ⬇️ Downloading FDS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FDS.csv
[191/503] ⬇️ Downloading FICO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FICO.csv
[192/503] ⬇️ Downloading FAST...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FAST.csv
[193/503] ⬇️ Downloading FRT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FRT.csv
[194/503] ⬇️ Downloading FDX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FDX.csv
[195/503] ⬇️ Downloading FIS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FIS.csv
[196/503] ⬇️ Downloading FITB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FITB.csv
[197/503] ⬇️ Downloading FSLR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FSLR.csv
[198/503] ⬇️ Downloading FE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FE.csv
[199/503] ⬇️ Downloading FI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FI.csv
[200/503] ⬇️ Downloading F...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/F.csv
[201/503] ⬇️ Downloading FTNT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FTNT.csv
[202/503] ⬇️ Downloading FTV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FTV.csv
[203/503] ⬇️ Downloading FOXA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FOXA.csv
[204/503] ⬇️ Downloading FOX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FOX.csv
[205/503] ⬇️ Downloading BEN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/BEN.csv
[206/503] ⬇️ Downloading FCX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/FCX.csv
[207/503] ⬇️ Downloading GRMN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GRMN.csv
[208/503] ⬇️ Downloading IT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/IT.csv
[209/503] ⬇️ Downloading GE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GE.csv
[210/503] ⬇️ Downloading GEHC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GEHC.csv
[211/503] ⬇️ Downloading GEV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GEV.csv
[212/503] ⬇️ Downloading GEN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GEN.csv
[213/503] ⬇️ Downloading GNRC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GNRC.csv
[214/503] ⬇️ Downloading GD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GD.csv
[215/503] ⬇️ Downloading GIS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GIS.csv
[216/503] ⬇️ Downloading GM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GM.csv
[217/503] ⬇️ Downloading GPC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GPC.csv
[218/503] ⬇️ Downloading GILD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GILD.csv
[219/503] ⬇️ Downloading GPN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GPN.csv
[220/503] ⬇️ Downloading GL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GL.csv
[221/503] ⬇️ Downloading GDDY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GDDY.csv
[222/503] ⬇️ Downloading GS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GS.csv
[223/503] ⬇️ Downloading HAL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HAL.csv
[224/503] ⬇️ Downloading HIG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HIG.csv
[225/503] ⬇️ Downloading HAS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HAS.csv
[226/503] ⬇️ Downloading HCA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HCA.csv
[227/503] ⬇️ Downloading DOC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DOC.csv
[228/503] ⬇️ Downloading HSIC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HSIC.csv
[229/503] ⬇️ Downloading HSY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HSY.csv
[230/503] ⬇️ Downloading HPE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HPE.csv
[231/503] ⬇️ Downloading HLT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HLT.csv
[232/503] ⬇️ Downloading HOLX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HOLX.csv
[233/503] ⬇️ Downloading HD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HD.csv
[234/503] ⬇️ Downloading HON...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HON.csv
[235/503] ⬇️ Downloading HRL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HRL.csv
[236/503] ⬇️ Downloading HST...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HST.csv
[237/503] ⬇️ Downloading HWM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HWM.csv
[238/503] ⬇️ Downloading HPQ...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HPQ.csv
[239/503] ⬇️ Downloading HUBB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HUBB.csv
[240/503] ⬇️ Downloading HUM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HUM.csv
[241/503] ⬇️ Downloading HBAN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HBAN.csv
[242/503] ⬇️ Downloading HII...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/HII.csv
[243/503] ⬇️ Downloading IBM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/IBM.csv
[244/503] ⬇️ Downloading IEX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/IEX.csv
[245/503] ⬇️ Downloading IDXX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/IDXX.csv
[246/503] ⬇️ Downloading ITW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ITW.csv
[247/503] ⬇️ Downloading INCY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/INCY.csv
[248/503] ⬇️ Downloading IR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/IR.csv
[249/503] ⬇️ Downloading PODD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PODD.csv
[250/503] ⬇️ Downloading INTC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/INTC.csv
[251/503] ⬇️ Downloading ICE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ICE.csv
[252/503] ⬇️ Downloading IFF...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/IFF.csv
[253/503] ⬇️ Downloading IP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/IP.csv
[254/503] ⬇️ Downloading IPG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/IPG.csv
[255/503] ⬇️ Downloading INTU...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/INTU.csv
[256/503] ⬇️ Downloading ISRG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ISRG.csv
[257/503] ⬇️ Downloading IVZ...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/IVZ.csv
[258/503] ⬇️ Downloading INVH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/INVH.csv
[259/503] ⬇️ Downloading IQV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/IQV.csv
[260/503] ⬇️ Downloading IRM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/IRM.csv
[261/503] ⬇️ Downloading JBHT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/JBHT.csv
[262/503] ⬇️ Downloading JBL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/JBL.csv
[263/503] ⬇️ Downloading JKHY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/JKHY.csv
[264/503] ⬇️ Downloading J...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/J.csv
[265/503] ⬇️ Downloading JNJ...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/JNJ.csv
[266/503] ⬇️ Downloading JCI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/JCI.csv
[267/503] ⬇️ Downloading JPM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/JPM.csv
[268/503] ⬇️ Downloading K...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/K.csv
[269/503] ⬇️ Downloading KVUE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KVUE.csv
[270/503] ⬇️ Downloading KDP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KDP.csv
[271/503] ⬇️ Downloading KEY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KEY.csv
[272/503] ⬇️ Downloading KEYS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KEYS.csv
[273/503] ⬇️ Downloading KMB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KMB.csv
[274/503] ⬇️ Downloading KIM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KIM.csv
[275/503] ⬇️ Downloading KMI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KMI.csv
[276/503] ⬇️ Downloading KKR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KKR.csv
[277/503] ⬇️ Downloading KLAC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KLAC.csv
[278/503] ⬇️ Downloading KHC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KHC.csv
[279/503] ⬇️ Downloading KR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/KR.csv
[280/503] ⬇️ Downloading LHX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LHX.csv
[281/503] ⬇️ Downloading LH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LH.csv
[282/503] ⬇️ Downloading LRCX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LRCX.csv
[283/503] ⬇️ Downloading LW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LW.csv
[284/503] ⬇️ Downloading LVS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LVS.csv
[285/503] ⬇️ Downloading LDOS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LDOS.csv
[286/503] ⬇️ Downloading LEN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LEN.csv
[287/503] ⬇️ Downloading LII...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LII.csv
[288/503] ⬇️ Downloading LLY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LLY.csv
[289/503] ⬇️ Downloading LIN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LIN.csv
[290/503] ⬇️ Downloading LYV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LYV.csv
[291/503] ⬇️ Downloading LKQ...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LKQ.csv
[292/503] ⬇️ Downloading LMT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LMT.csv
[293/503] ⬇️ Downloading L...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/L.csv
[294/503] ⬇️ Downloading LOW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LOW.csv
[295/503] ⬇️ Downloading LULU...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LULU.csv
[296/503] ⬇️ Downloading LYB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LYB.csv
[297/503] ⬇️ Downloading MTB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MTB.csv
[298/503] ⬇️ Downloading MPC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MPC.csv
[299/503] ⬇️ Downloading MKTX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MKTX.csv
[300/503] ⬇️ Downloading MAR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MAR.csv
[301/503] ⬇️ Downloading MMC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MMC.csv
[302/503] ⬇️ Downloading MLM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MLM.csv
[303/503] ⬇️ Downloading MAS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MAS.csv
[304/503] ⬇️ Downloading MA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MA.csv
[305/503] ⬇️ Downloading MTCH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MTCH.csv
[306/503] ⬇️ Downloading MKC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MKC.csv
[307/503] ⬇️ Downloading MCD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MCD.csv
[308/503] ⬇️ Downloading MCK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MCK.csv
[309/503] ⬇️ Downloading MDT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MDT.csv
[310/503] ⬇️ Downloading MRK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MRK.csv
[311/503] ⬇️ Downloading META...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/META.csv
[312/503] ⬇️ Downloading MET...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MET.csv
[313/503] ⬇️ Downloading MTD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MTD.csv
[314/503] ⬇️ Downloading MGM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MGM.csv
[315/503] ⬇️ Downloading MCHP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MCHP.csv
[316/503] ⬇️ Downloading MU...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MU.csv
[317/503] ⬇️ Downloading MSFT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MSFT.csv
[318/503] ⬇️ Downloading MAA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MAA.csv
[319/503] ⬇️ Downloading MRNA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MRNA.csv
[320/503] ⬇️ Downloading MHK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MHK.csv
[321/503] ⬇️ Downloading MOH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MOH.csv
[322/503] ⬇️ Downloading TAP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TAP.csv
[323/503] ⬇️ Downloading MDLZ...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MDLZ.csv
[324/503] ⬇️ Downloading MPWR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MPWR.csv
[325/503] ⬇️ Downloading MNST...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MNST.csv
[326/503] ⬇️ Downloading MCO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MCO.csv
[327/503] ⬇️ Downloading MS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MS.csv
[328/503] ⬇️ Downloading MOS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MOS.csv
[329/503] ⬇️ Downloading MSI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MSI.csv
[330/503] ⬇️ Downloading MSCI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/MSCI.csv
[331/503] ⬇️ Downloading NDAQ...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NDAQ.csv
[332/503] ⬇️ Downloading NTAP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NTAP.csv
[333/503] ⬇️ Downloading NFLX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NFLX.csv
[334/503] ⬇️ Downloading NEM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NEM.csv
[335/503] ⬇️ Downloading NWSA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NWSA.csv
[336/503] ⬇️ Downloading NWS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NWS.csv
[337/503] ⬇️ Downloading NEE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NEE.csv
[338/503] ⬇️ Downloading NKE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NKE.csv
[339/503] ⬇️ Downloading NI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NI.csv
[340/503] ⬇️ Downloading NDSN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NDSN.csv
[341/503] ⬇️ Downloading NSC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NSC.csv
[342/503] ⬇️ Downloading NTRS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NTRS.csv
[343/503] ⬇️ Downloading NOC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NOC.csv
[344/503] ⬇️ Downloading NCLH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NCLH.csv
[345/503] ⬇️ Downloading NRG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NRG.csv
[346/503] ⬇️ Downloading NUE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NUE.csv
[347/503] ⬇️ Downloading NVDA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NVDA.csv
[348/503] ⬇️ Downloading NVR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NVR.csv
[349/503] ⬇️ Downloading NXPI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NXPI.csv
[350/503] ⬇️ Downloading ORLY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ORLY.csv
[351/503] ⬇️ Downloading OXY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/OXY.csv
[352/503] ⬇️ Downloading ODFL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ODFL.csv
[353/503] ⬇️ Downloading OMC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/OMC.csv
[354/503] ⬇️ Downloading ON...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ON.csv
[355/503] ⬇️ Downloading OKE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/OKE.csv
[356/503] ⬇️ Downloading ORCL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ORCL.csv
[357/503] ⬇️ Downloading OTIS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/OTIS.csv
[358/503] ⬇️ Downloading PCAR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PCAR.csv
[359/503] ⬇️ Downloading PKG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PKG.csv
[360/503] ⬇️ Downloading PLTR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PLTR.csv
[361/503] ⬇️ Downloading PANW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PANW.csv
[362/503] ⬇️ Downloading PSKY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PSKY.csv
[363/503] ⬇️ Downloading PH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PH.csv
[364/503] ⬇️ Downloading PAYX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PAYX.csv
[365/503] ⬇️ Downloading PAYC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PAYC.csv
[366/503] ⬇️ Downloading PYPL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PYPL.csv
[367/503] ⬇️ Downloading PNR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PNR.csv
[368/503] ⬇️ Downloading PEP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PEP.csv
[369/503] ⬇️ Downloading PFE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PFE.csv
[370/503] ⬇️ Downloading PCG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PCG.csv
[371/503] ⬇️ Downloading PM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PM.csv
[372/503] ⬇️ Downloading PSX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PSX.csv
[373/503] ⬇️ Downloading PNW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PNW.csv
[374/503] ⬇️ Downloading PNC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PNC.csv
[375/503] ⬇️ Downloading POOL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/POOL.csv
[376/503] ⬇️ Downloading PPG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PPG.csv
[377/503] ⬇️ Downloading PPL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PPL.csv
[378/503] ⬇️ Downloading PFG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PFG.csv
[379/503] ⬇️ Downloading PG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PG.csv
[380/503] ⬇️ Downloading PGR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PGR.csv
[381/503] ⬇️ Downloading PLD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PLD.csv
[382/503] ⬇️ Downloading PRU...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PRU.csv
[383/503] ⬇️ Downloading PEG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PEG.csv
[384/503] ⬇️ Downloading PTC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PTC.csv
[385/503] ⬇️ Downloading PSA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PSA.csv
[386/503] ⬇️ Downloading PHM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PHM.csv
[387/503] ⬇️ Downloading PWR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/PWR.csv
[388/503] ⬇️ Downloading QCOM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/QCOM.csv
[389/503] ⬇️ Downloading DGX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DGX.csv
[390/503] ⬇️ Downloading RL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/RL.csv
[391/503] ⬇️ Downloading RJF...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/RJF.csv
[392/503] ⬇️ Downloading RTX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/RTX.csv
[393/503] ⬇️ Downloading O...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/O.csv
[394/503] ⬇️ Downloading REG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/REG.csv
[395/503] ⬇️ Downloading REGN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/REGN.csv
[396/503] ⬇️ Downloading RF...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/RF.csv
[397/503] ⬇️ Downloading RSG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/RSG.csv
[398/503] ⬇️ Downloading RMD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/RMD.csv
[399/503] ⬇️ Downloading RVTY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/RVTY.csv
[400/503] ⬇️ Downloading ROK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ROK.csv
[401/503] ⬇️ Downloading ROL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ROL.csv
[402/503] ⬇️ Downloading ROP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ROP.csv
[403/503] ⬇️ Downloading ROST...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ROST.csv
[404/503] ⬇️ Downloading RCL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/RCL.csv
[405/503] ⬇️ Downloading SPGI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SPGI.csv
[406/503] ⬇️ Downloading CRM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/CRM.csv
[407/503] ⬇️ Downloading SBAC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SBAC.csv
[408/503] ⬇️ Downloading SLB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SLB.csv
[409/503] ⬇️ Downloading STX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/STX.csv
[410/503] ⬇️ Downloading SRE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SRE.csv
[411/503] ⬇️ Downloading NOW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/NOW.csv
[412/503] ⬇️ Downloading SHW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SHW.csv
[413/503] ⬇️ Downloading SPG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SPG.csv
[414/503] ⬇️ Downloading SWKS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SWKS.csv
[415/503] ⬇️ Downloading SJM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SJM.csv
[416/503] ⬇️ Downloading SW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SW.csv
[417/503] ⬇️ Downloading SNA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SNA.csv
[418/503] ⬇️ Downloading SOLV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SOLV.csv
[419/503] ⬇️ Downloading SO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SO.csv
[420/503] ⬇️ Downloading LUV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/LUV.csv
[421/503] ⬇️ Downloading SWK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SWK.csv
[422/503] ⬇️ Downloading SBUX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SBUX.csv
[423/503] ⬇️ Downloading STT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/STT.csv
[424/503] ⬇️ Downloading STLD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/STLD.csv
[425/503] ⬇️ Downloading STE...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/STE.csv
[426/503] ⬇️ Downloading SYK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SYK.csv
[427/503] ⬇️ Downloading SMCI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SMCI.csv
[428/503] ⬇️ Downloading SYF...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SYF.csv
[429/503] ⬇️ Downloading SNPS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SNPS.csv
[430/503] ⬇️ Downloading SYY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/SYY.csv
[431/503] ⬇️ Downloading TMUS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TMUS.csv
[432/503] ⬇️ Downloading TROW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TROW.csv
[433/503] ⬇️ Downloading TTWO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TTWO.csv
[434/503] ⬇️ Downloading TPR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TPR.csv
[435/503] ⬇️ Downloading TRGP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TRGP.csv
[436/503] ⬇️ Downloading TGT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TGT.csv
[437/503] ⬇️ Downloading TEL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TEL.csv
[438/503] ⬇️ Downloading TDY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TDY.csv
[439/503] ⬇️ Downloading TER...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TER.csv
[440/503] ⬇️ Downloading TSLA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TSLA.csv
[441/503] ⬇️ Downloading TXN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TXN.csv
[442/503] ⬇️ Downloading TPL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TPL.csv
[443/503] ⬇️ Downloading TXT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TXT.csv
[444/503] ⬇️ Downloading TMO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TMO.csv
[445/503] ⬇️ Downloading TJX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TJX.csv
[446/503] ⬇️ Downloading TKO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TKO.csv
[447/503] ⬇️ Downloading TTD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TTD.csv
[448/503] ⬇️ Downloading TSCO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TSCO.csv
[449/503] ⬇️ Downloading TT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TT.csv
[450/503] ⬇️ Downloading TDG...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TDG.csv
[451/503] ⬇️ Downloading TRV...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TRV.csv
[452/503] ⬇️ Downloading TRMB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TRMB.csv
[453/503] ⬇️ Downloading TFC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TFC.csv
[454/503] ⬇️ Downloading TYL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TYL.csv
[455/503] ⬇️ Downloading TSN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/TSN.csv
[456/503] ⬇️ Downloading USB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/USB.csv
[457/503] ⬇️ Downloading UBER...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/UBER.csv
[458/503] ⬇️ Downloading UDR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/UDR.csv
[459/503] ⬇️ Downloading ULTA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ULTA.csv
[460/503] ⬇️ Downloading UNP...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/UNP.csv
[461/503] ⬇️ Downloading UAL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/UAL.csv
[462/503] ⬇️ Downloading UPS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/UPS.csv
[463/503] ⬇️ Downloading URI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/URI.csv
[464/503] ⬇️ Downloading UNH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/UNH.csv
[465/503] ⬇️ Downloading UHS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/UHS.csv
[466/503] ⬇️ Downloading VLO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/VLO.csv
[467/503] ⬇️ Downloading VTR...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/VTR.csv
[468/503] ⬇️ Downloading VLTO...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/VLTO.csv
[469/503] ⬇️ Downloading VRSN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/VRSN.csv
[470/503] ⬇️ Downloading VRSK...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/VRSK.csv
[471/503] ⬇️ Downloading VZ...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/VZ.csv
[472/503] ⬇️ Downloading VRTX...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/VRTX.csv
[473/503] ⬇️ Downloading VTRS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/VTRS.csv
[474/503] ⬇️ Downloading VICI...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/VICI.csv
[475/503] ⬇️ Downloading V...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/V.csv
[476/503] ⬇️ Downloading VST...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/VST.csv
[477/503] ⬇️ Downloading VMC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/VMC.csv
[478/503] ⬇️ Downloading WRB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WRB.csv
[479/503] ⬇️ Downloading GWW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/GWW.csv
[480/503] ⬇️ Downloading WAB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WAB.csv
[481/503] ⬇️ Downloading WBA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WBA.csv
[482/503] ⬇️ Downloading WMT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WMT.csv
[483/503] ⬇️ Downloading DIS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/DIS.csv
[484/503] ⬇️ Downloading WBD...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WBD.csv
[485/503] ⬇️ Downloading WM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WM.csv
[486/503] ⬇️ Downloading WAT...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WAT.csv
[487/503] ⬇️ Downloading WEC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WEC.csv
[488/503] ⬇️ Downloading WFC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WFC.csv
[489/503] ⬇️ Downloading WELL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WELL.csv
[490/503] ⬇️ Downloading WST...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WST.csv
[491/503] ⬇️ Downloading WDC...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WDC.csv
[492/503] ⬇️ Downloading WY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WY.csv
[493/503] ⬇️ Downloading WSM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WSM.csv
[494/503] ⬇️ Downloading WMB...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WMB.csv
[495/503] ⬇️ Downloading WTW...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WTW.csv
[496/503] ⬇️ Downloading WDAY...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WDAY.csv
[497/503] ⬇️ Downloading WYNN...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/WYNN.csv
[498/503] ⬇️ Downloading XEL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/XEL.csv
[499/503] ⬇️ Downloading XYL...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/XYL.csv
[500/503] ⬇️ Downloading YUM...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/YUM.csv
[501/503] ⬇️ Downloading ZBRA...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ZBRA.csv
[502/503] ⬇️ Downloading ZBH...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ZBH.csv
[503/503] ⬇️ Downloading ZTS...


  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


   ✅ Saved to data/sp500/ZTS.csv


In [14]:
import pandas as pd
import numpy as np

def compute_technical_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Compute daily technical features from historical stock price data.
    Assumes df has columns: Date, Open, High, Low, Close, Volume
    """
    df = df.copy()
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    
    # Ensure price columns exist
    required_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
    for col in required_cols:
        if col not in df.columns:
            raise ValueError(f"Missing column: {col}")
    
    # Trend
    df['SMA_5'] = df['Close'].rolling(window=5).mean()
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    df['SMA_ratio'] = df['SMA_5'] / df['SMA_20']
    df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
    df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
    
    # Momentum
    delta = df['Close'].diff()
    up = delta.clip(lower=0)
    down = -delta.clip(upper=0)
    avg_gain = up.rolling(14).mean()
    avg_loss = down.rolling(14).mean()
    rs = avg_gain / avg_loss
    df['RSI_14'] = 100 - (100 / (1 + rs))
    
    df['MACD_line'] = df['EMA_12'] - df['EMA_26']
    df['MACD_signal'] = df['MACD_line'].ewm(span=9, adjust=False).mean()
    df['MACD_hist'] = df['MACD_line'] - df['MACD_signal']

    df['Streak'] = np.where(df['Close'] > df['Close'].shift(1), 1, 
                            np.where(df['Close'] < df['Close'].shift(1), -1, 0))
    df['Streak_length'] = df['Streak'] * (df['Streak'] != 0).astype(int)
    df['Streak_length'] = df['Streak_length'].groupby((df['Streak_length'] != df['Streak_length'].shift()).cumsum()).cumsum()

    # Volatility
    df['Log_return'] = np.log(df['Close'] / df['Close'].shift(1))
    df['Realized_vol_20d'] = df['Log_return'].rolling(20).std()
    df['Realized_vol_60d'] = df['Log_return'].rolling(60).std()
    
    rolling_std = df['Close'].rolling(window=20).std()
    df['Bollinger_bandwidth'] = (4 * rolling_std) / df['Close'].rolling(window=20).mean()

    df['Rolling_High_Low_20d'] = df['High'].rolling(20).max() - df['Low'].rolling(20).min()

    # Volume
    df['OBV'] = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0).cumsum()
    df['Volume_zscore_20d'] = (df['Volume'] - df['Volume'].rolling(20).mean()) / df['Volume'].rolling(20).std()

    # Returns / Lags
    for i in range(1, 6):
        df[f'Return_{i}d'] = df['Close'].pct_change(i)
    df['Momentum_20d'] = df['Close'].pct_change(20)
    df['Momentum_60d'] = df['Close'].pct_change(60)
    df['Rolling_mean_return_20d'] = df['Log_return'].rolling(20).mean()
    df['Rolling_var_return_20d'] = df['Log_return'].rolling(20).var()

    # Drop helper columns
    df.drop(columns=['Log_return', 'Streak'], inplace=True)

    return df.reset_index()


In [23]:
import os

def batch_compute_features(input_dir="data/sp500", output_dir="data/features"):
    os.makedirs(output_dir, exist_ok=True)
    files = [f for f in os.listdir(input_dir) if f.endswith(".csv")]

    for i, file in enumerate(files):
        ticker = file.replace(".csv", "")
        input_path = os.path.join(input_dir, file)
        output_path = os.path.join(output_dir, f"{ticker}_features.csv")

        if os.path.exists(output_path):
            print(f"[{i+1}/{len(files)}] ✅ {ticker} already processed.")
            continue

        try:
            print(f"[{i+1}/{len(files)}] ⏳ Processing {ticker}...")
            df = pd.read_csv(input_path, skiprows=2, header=None)
            df.columns = ['Date', 'Close', 'High', 'Low', 'Open', 'Volume']
            # Force numeric types after skipping both header and label
            for col in ['Open', 'High', 'Low', 'Close', 'Volume']:
                if col in df.columns:
                    df[col] = pd.to_numeric(df[col], errors='coerce')

            df.dropna(subset=['Close', 'High', 'Low', 'Open', 'Volume'], how='all', inplace=True)
            features_df = compute_technical_features(df)
            features_df.to_csv(output_path, index=False)
            print(f"   ✅ Saved to {output_path}")
        except Exception as e:
            print(f"   ❌ Failed for {ticker}: {e}")

In [24]:
batch_compute_features()

[1/503] ⏳ Processing MAR...
   ✅ Saved to data/features/MAR_features.csv
[2/503] ⏳ Processing XYZ...
   ✅ Saved to data/features/XYZ_features.csv
[3/503] ⏳ Processing NTRS...
   ✅ Saved to data/features/NTRS_features.csv
[4/503] ⏳ Processing ELV...
   ✅ Saved to data/features/ELV_features.csv
[5/503] ⏳ Processing VZ...
   ✅ Saved to data/features/VZ_features.csv
[6/503] ⏳ Processing MCD...
   ✅ Saved to data/features/MCD_features.csv
[7/503] ⏳ Processing L...
   ✅ Saved to data/features/L_features.csv
[8/503] ⏳ Processing GM...
   ✅ Saved to data/features/GM_features.csv
[9/503] ⏳ Processing OMC...
   ✅ Saved to data/features/OMC_features.csv
[10/503] ⏳ Processing TYL...
   ✅ Saved to data/features/TYL_features.csv
[11/503] ⏳ Processing WMB...
   ✅ Saved to data/features/WMB_features.csv
[12/503] ⏳ Processing BKR...
   ✅ Saved to data/features/BKR_features.csv
[13/503] ⏳ Processing CPB...
   ✅ Saved to data/features/CPB_features.csv
[14/503] ⏳ Processing FE...
   ✅ Saved to data/featur

In [6]:
import os
import pandas as pd
import yfinance as yf
import numpy as np

In [1]:
import os
import time
import pandas as pd
import numpy as np
import yfinance as yf

def extract_and_merge_fundamentals(input_dir="data/features", output_dir="data/features_with_fundamentals"):
    os.makedirs(output_dir, exist_ok=True)
    files = [f for f in os.listdir(input_dir) if f.endswith("_features.csv")]
    fundamentals_stats = []

    for i, file in enumerate(files):
        ticker = file.replace("_features.csv", "")
        input_path = os.path.join(input_dir, file)
        output_path = os.path.join(output_dir, f"{ticker}_full.csv")

        if os.path.exists(output_path):
            print(f"[{i+1}/{len(files)}] ✅ {ticker} already processed.")
            continue

        try:
            print(f"[{i+1}/{len(files)}] ⏳ Getting fundamentals for {ticker}...")

            # Load features
            df = pd.read_csv(input_path, parse_dates=['Date'])
            df.set_index('Date', inplace=True)

            # Prepare Ticker object
            yf_ticker = yf.Ticker(ticker)

            # EPS (Diluted) from income_stmt
            try:
                eps_series = yf_ticker.income_stmt.T[['Diluted EPS']]
                eps_series.index = pd.to_datetime(eps_series.index)
                df = df.merge(eps_series.rename(columns={'Diluted EPS': 'EPS_TTM'}),
                              how='left', left_index=True, right_index=True)
                df['EPS_TTM'] = df['EPS_TTM'].ffill()
            except Exception as e:
                print(f"   ⚠️ EPS extraction failed for {ticker}: {e}")
                df['EPS_TTM'] = np.nan

            # Market Cap
            try:
                shares_out = yf_ticker.info.get('sharesOutstanding')
                if shares_out is not None:
                    df['Market_Cap'] = df['Close'] * shares_out
                else:
                    df['Market_Cap'] = np.nan
            except Exception as e:
                print(f"   ⚠️ Market Cap failed for {ticker}: {e}")
                df['Market_Cap'] = np.nan

            # PE Ratio & Dividend Yield
            try:
                df['PE_Ratio'] = yf_ticker.info.get('trailingPE', np.nan)
                df['Dividend_Yield'] = yf_ticker.info.get('dividendYield', np.nan)
            except Exception as e:
                print(f"   ⚠️ PE/Yield failed for {ticker}: {e}")
                df['PE_Ratio'] = np.nan
                df['Dividend_Yield'] = np.nan

            # Save result
            df.reset_index().to_csv(output_path, index=False)
            print(f"   ✅ Saved to {output_path}")

            # Track completeness
            stats = {
                'Ticker': ticker,
                'EPS_TTM_avail': df['EPS_TTM'].notna().mean(),
                'MarketCap_avail': df['Market_Cap'].notna().mean(),
                'PE_avail': df['PE_Ratio'].notna().mean(),
                'DivYield_avail': df['Dividend_Yield'].notna().mean()
            }
            fundamentals_stats.append(stats)

        except Exception as e:
            print(f"   ❌ Totally failed for {ticker}: {e}")
        time.sleep(1.5)

    return pd.DataFrame(fundamentals_stats)

In [2]:
extract_and_merge_fundamentals()

[1/503] ✅ MSFT already processed.
[2/503] ✅ AOS already processed.
[3/503] ✅ PNR already processed.
[4/503] ✅ WBA already processed.
[5/503] ✅ TJX already processed.
[6/503] ✅ HSY already processed.
[7/503] ✅ CPT already processed.
[8/503] ✅ MRK already processed.
[9/503] ✅ GD already processed.
[10/503] ✅ VZ already processed.
[11/503] ✅ DTE already processed.
[12/503] ✅ NEM already processed.
[13/503] ✅ MOS already processed.
[14/503] ✅ ABBV already processed.
[15/503] ✅ MKTX already processed.
[16/503] ✅ MLM already processed.
[17/503] ✅ DIS already processed.
[18/503] ✅ DG already processed.
[19/503] ✅ TXN already processed.
[20/503] ✅ PRU already processed.
[21/503] ✅ HSIC already processed.
[22/503] ✅ HBAN already processed.
[23/503] ✅ CZR already processed.
[24/503] ✅ VMC already processed.
[25/503] ✅ CSCO already processed.
[26/503] ✅ TDY already processed.
[27/503] ✅ SWK already processed.
[28/503] ✅ EA already processed.
[29/503] ✅ RCL already processed.
[30/503] ✅ DGX alread

Unnamed: 0,Ticker,EPS_TTM_avail,MarketCap_avail,PE_avail,DivYield_avail
0,PFE,0.299404,1.0,1.0,1.0
1,WBD,0.299404,1.0,1.0,0.0
2,SYK,0.299404,1.0,1.0,1.0
3,KEY,0.299404,1.0,0.0,1.0
4,SNPS,0.216302,1.0,1.0,0.0
...,...,...,...,...,...
456,FICO,0.324851,1.0,1.0,0.0
457,AXP,0.299404,1.0,1.0,1.0
458,STE,0.274751,1.0,1.0,1.0
459,BEN,0.324851,1.0,1.0,1.0


In [3]:
# Re-import required packages after state reset
import pandas as pd
import numpy as np
import os

def generate_labels_from_fundamentals(input_dir="data/features_with_fundamentals",
                                      output_dir="data/labeled_features",
                                      horizon=5, vol_window=20, min_vol_threshold=0.005):
    os.makedirs(output_dir, exist_ok=True)
    files = [f for f in os.listdir(input_dir) if f.endswith("_full.csv")]
    summary_stats = []

    for i, file in enumerate(files):
        ticker = file.replace("_full.csv", "")
        input_path = os.path.join(input_dir, file)
        output_path = os.path.join(output_dir, f"{ticker}_labeled.csv")

        try:
            df = pd.read_csv(input_path, parse_dates=['Date'])
            df.set_index('Date', inplace=True)
            df.sort_index(inplace=True)

            # Step 1: Compute log returns
            df['LogReturn'] = np.log(df['Close'] / df['Close'].shift(1))

            # Step 2: Estimate rolling volatility (past 20 days)
            df['RollingVol'] = df['LogReturn'].rolling(vol_window).std()

            # Step 3: Compute forward return over horizon
            df['ForwardReturn'] = df['Close'].shift(-horizon) / df['Close'] - 1

            # Step 4: Apply minimum volatility threshold
            df['VolThreshold'] = df['RollingVol'].clip(lower=min_vol_threshold)

            # Step 5: Assign label
            def label_row(row):
                if pd.isna(row['ForwardReturn']) or pd.isna(row['VolThreshold']):
                    return np.nan
                if row['ForwardReturn'] <= -row['VolThreshold']:
                    return "down"
                elif row['ForwardReturn'] >= row['VolThreshold']:
                    return "up"
                else:
                    return "flat"

            df['Label'] = df.apply(label_row, axis=1)

            # Save labeled file
            df.reset_index().to_csv(output_path, index=False)

            # Collect label distribution stats
            label_counts = df['Label'].value_counts(normalize=True).to_dict()
            summary_stats.append({
                "Ticker": ticker,
                "Up_pct": label_counts.get("up", 0.0),
                "Down_pct": label_counts.get("down", 0.0),
                "Flat_pct": label_counts.get("flat", 0.0),
                "Total": df['Label'].notna().sum()
            })

            print(f"[{i+1}/{len(files)}] ✅ Labeled {ticker}: {df['Label'].notna().sum()} samples")

        except Exception as e:
            print(f"[{i+1}/{len(files)}] ❌ Failed for {ticker}: {e}")

    return pd.DataFrame(summary_stats)

In [4]:
summary_df = generate_labels_from_fundamentals()

[1/503] ✅ Labeled KDP: 2490 samples
[2/503] ✅ Labeled MPC: 2490 samples
[3/503] ✅ Labeled AVY: 2490 samples
[4/503] ✅ Labeled D: 2490 samples
[5/503] ✅ Labeled ADBE: 2490 samples
[6/503] ✅ Labeled HWM: 2028 samples
[7/503] ✅ Labeled ANET: 2490 samples
[8/503] ✅ Labeled AMD: 2490 samples
[9/503] ✅ Labeled MSI: 2490 samples
[10/503] ✅ Labeled GEN: 2490 samples
[11/503] ✅ Labeled EQT: 2490 samples
[12/503] ✅ Labeled VICI: 1735 samples
[13/503] ✅ Labeled CI: 2490 samples
[14/503] ✅ Labeled PHM: 2490 samples
[15/503] ✅ Labeled AEP: 2490 samples
[16/503] ✅ Labeled ODFL: 2490 samples
[17/503] ✅ Labeled DECK: 2490 samples
[18/503] ✅ Labeled EVRG: 2490 samples
[19/503] ✅ Labeled DVN: 2490 samples
[20/503] ✅ Labeled GNRC: 2490 samples
[21/503] ✅ Labeled STX: 2490 samples
[22/503] ✅ Labeled FSLR: 2490 samples
[23/503] ✅ Labeled LHX: 2490 samples
[24/503] ✅ Labeled MKC: 2490 samples
[25/503] ✅ Labeled MO: 2490 samples
[26/503] ✅ Labeled PAYC: 2490 samples
[27/503] ✅ Labeled TGT: 2490 samples
[28/5

In [16]:
# Re-import required packages after reset
import pandas as pd
import numpy as np
import os

def build_final_dataset(input_dir="data/labeled_features", output_file="data/final_dataset_GBDT.csv"):
    all_data = []
    files = [f for f in os.listdir(input_dir) if f.endswith("_labeled.csv")]

    for i, file in enumerate(files):
        ticker = file.replace("_labeled.csv", "")
        input_path = os.path.join(input_dir, file)

        try:
            df = pd.read_csv(input_path, parse_dates=['Date'])

            # Add ticker column for reference (optional)
            df['Ticker'] = ticker

            # Drop the unwanted columns entirely
            cols_to_remove = ['PE_Ratio', 'EPS_TTM', 'Dividend_Yield']
            for c in cols_to_remove:
                if c in df.columns:
                    df.drop(columns=c, inplace=True)

            # Drop rows with any NaNs now (strict)
            df.dropna(axis=0, how='any', inplace=True)

            all_data.append(df)
            print(f"[{i+1}/{len(files)}] ✅ Loaded {ticker}: {len(df)} rows")

        except Exception as e:
            print(f"[{i+1}/{len(files)}] ❌ Failed for {ticker}: {e}")

    combined_df = pd.concat(all_data, ignore_index=True)

    # Move Label column to the end
    target = combined_df.pop("Label")
    combined_df["Label"] = target

    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    combined_df.to_csv(output_file, index=False)
    print(f"\n✅ Final dataset saved to {output_file} with shape {combined_df.shape}")

    return combined_df

In [10]:
# for all csvs under data/features_with_fundamentals, locate labels (column names) has only nan values. genereate a set include all nan column names
nan_cols = set()
for file in os.listdir("data/features_with_fundamentals"):
    df = pd.read_csv(os.path.join("data/features_with_fundamentals", file))
    file_nan_cols = set(df.columns[df.isna().all()])
    print(file_nan_cols)
    nan_cols.update(file_nan_cols)
print(nan_cols)


set()
set()
set()
set()
{'Dividend_Yield'}
set()
{'Dividend_Yield'}
{'Dividend_Yield'}
set()
set()
set()
set()
set()
set()
set()
set()
{'Dividend_Yield'}
set()
set()
{'Dividend_Yield'}
set()
{'Dividend_Yield'}
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
{'Dividend_Yield'}
set()
set()
set()
set()
set()
{'PE_Ratio'}
set()
set()
set()
{'Dividend_Yield'}
{'EPS_TTM'}
set()
set()
set()
set()
set()
{'Dividend_Yield'}
set()
set()
{'Dividend_Yield'}
set()
set()
set()
set()
{'Dividend_Yield'}
{'Dividend_Yield'}
set()
set()
{'Dividend_Yield', 'PE_Ratio'}
set()
set()
set()
set()
set()
{'Dividend_Yield'}
set()
set()
set()
{'Dividend_Yield'}
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
{'Dividend_Yield'}
set()
set()
set()
set()
set()
set()
{'Dividend_Yield'}
{'Dividend_Yield'}
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
{'Dividend_Yield'}
set()
set()
set()
set()
{'Dividend_Yield'}
{'Dividend_Yield'}
set()
{'Dividend_Yield'}
{'PE_

In [17]:
build_final_dataset()

[1/503] ✅ Loaded CMCSA: 2450 rows
[2/503] ✅ Loaded HAS: 2450 rows
[3/503] ✅ Loaded VST: 2007 rows
[4/503] ✅ Loaded HII: 2450 rows
[5/503] ✅ Loaded UDR: 2450 rows
[6/503] ✅ Loaded VRTX: 2450 rows
[7/503] ✅ Loaded CNC: 2450 rows
[8/503] ✅ Loaded EXPE: 2450 rows
[9/503] ✅ Loaded DGX: 2450 rows
[10/503] ✅ Loaded ANET: 2450 rows
[11/503] ✅ Loaded FRT: 2450 rows
[12/503] ✅ Loaded IFF: 2450 rows
[13/503] ✅ Loaded APA: 2450 rows
[14/503] ✅ Loaded HRL: 2450 rows
[15/503] ✅ Loaded DLTR: 2450 rows
[16/503] ✅ Loaded INCY: 2450 rows
[17/503] ✅ Loaded TECH: 2450 rows
[18/503] ✅ Loaded URI: 2450 rows
[19/503] ✅ Loaded MCD: 2450 rows
[20/503] ✅ Loaded LLY: 2450 rows
[21/503] ✅ Loaded AEP: 2450 rows
[22/503] ✅ Loaded AON: 2450 rows
[23/503] ✅ Loaded GEV: 127 rows
[24/503] ✅ Loaded CRM: 2450 rows
[25/503] ✅ Loaded XYZ: 2227 rows
[26/503] ✅ Loaded RF: 2450 rows
[27/503] ✅ Loaded LYV: 2450 rows
[28/503] ✅ Loaded ABBV: 2450 rows
[29/503] ✅ Loaded GRMN: 2450 rows
[30/503] ✅ Loaded MAS: 2450 rows
[31/503] ✅ 

Unnamed: 0,Date,Close,High,Low,Open,Volume,SMA_5,SMA_20,SMA_ratio,EMA_12,...,Momentum_60d,Rolling_mean_return_20d,Rolling_var_return_20d,Market_Cap,LogReturn,RollingVol,ForwardReturn,VolThreshold,Ticker,Label
0,2015-03-31,22.334261,22.512238,22.061360,22.231428,35082600,22.204336,23.064602,0.962702,22.652507,...,-0.007028,-0.002774,0.000178,8.225177e+10,-0.002476,0.013360,0.047990,0.013360,CMCSA,up
1,2015-04-01,22.567612,22.575522,22.168149,22.460825,31569000,22.266245,23.003300,0.967959,22.639446,...,0.024234,-0.002645,0.000182,8.311115e+10,0.010394,0.013480,0.044339,0.013480,CMCSA,up
2,2015-04-02,22.915659,23.010581,22.385680,22.579479,41127600,22.446586,22.961368,0.977581,22.681940,...,0.053777,-0.001797,0.000198,8.439292e+10,0.015305,0.014066,0.032275,0.014066,CMCSA,up
3,2015-04-06,23.022449,23.192518,22.741640,22.797011,21333000,22.645923,22.923989,0.987870,22.734326,...,0.059647,-0.001598,0.000200,8.478621e+10,0.004649,0.014131,0.009792,0.014131,CMCSA,flat
4,2015-04-07,23.065947,23.208329,22.868193,23.006620,24779600,22.781186,22.884848,0.995470,22.785345,...,0.039053,-0.001669,0.000199,8.494639e+10,0.001888,0.014108,0.011317,0.014108,CMCSA,flat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1194425,2024-12-16,353.705048,360.951782,352.933911,360.951782,348600,357.491541,370.993719,0.963605,363.969970,...,-0.071933,-0.001601,0.000114,1.483555e+10,-0.013989,0.010672,0.004416,0.010672,EG,flat
1194426,2024-12-17,351.025848,352.083698,348.405963,350.590845,379800,356.030334,370.185846,0.961761,361.978567,...,-0.077057,-0.002250,0.000113,1.472318e+10,-0.007604,0.010620,0.020616,0.010620,EG,up
1194427,2024-12-18,344.372314,355.356100,344.164709,351.203808,505100,353.258191,369.187418,0.956853,359.269913,...,-0.086138,-0.002818,0.000126,1.444411e+10,-0.019136,0.011219,0.039618,0.011219,EG,up
1194428,2024-12-19,345.390594,347.980852,343.798902,343.927429,410700,350.636322,367.923697,0.953014,357.134633,...,-0.082741,-0.003531,0.000106,1.448682e+10,0.002953,0.010294,0.030313,0.010294,EG,up


In [20]:
def split_gbdt_dataset(input_file="data/final_dataset_GBDT.csv",
                       train_file="data/final_train_GBDT.csv",
                       test_file="data/final_test_GBDT.csv",
                       cutoff_date="2023-01-01"):
    """
    Split the preprocessed GBDT dataset by date.
    Data on or after cutoff_date will be test; before cutoff_date will be train.
    """
    df = pd.read_csv(input_file, parse_dates=['Date'])

    train_df = df[df['Date'] < cutoff_date].reset_index(drop=True)
    test_df = df[df['Date'] >= cutoff_date].reset_index(drop=True)

    train_df.to_csv(train_file, index=False)
    test_df.to_csv(test_file, index=False)

    print(f"✅ Train set saved to {train_file} with shape {train_df.shape}")
    print(f"✅ Test set saved to {test_file} with shape {test_df.shape}")
    return train_df, test_df
split_gbdt_dataset()

✅ Train set saved to data/final_train_GBDT.csv with shape (946121, 38)
✅ Test set saved to data/final_test_GBDT.csv with shape (248309, 38)


(             Date       Close        High         Low        Open    Volume  \
 0      2015-03-31   22.334261   22.512238   22.061360   22.231428  35082600   
 1      2015-04-01   22.567612   22.575522   22.168149   22.460825  31569000   
 2      2015-04-02   22.915659   23.010581   22.385680   22.579479  41127600   
 3      2015-04-06   23.022449   23.192518   22.741640   22.797011  21333000   
 4      2015-04-07   23.065947   23.208329   22.868193   23.006620  24779600   
 ...           ...         ...         ...         ...         ...       ...   
 946116 2022-12-23  318.694794  319.817622  314.165383  316.458634    158300   
 946117 2022-12-27  319.798492  321.977561  318.285524  319.084826    135200   
 946118 2022-12-28  317.771790  322.272626  317.705174  320.769182    137000   
 946119 2022-12-29  318.637665  320.407572  316.820220  320.407572    121700   
 946120 2022-12-30  315.221619  318.247586  313.023529  316.439608    151900   
 
              SMA_5      SMA_20  SMA_r

In [None]:
WINDOW_SIZE = 60      # how many days per sequence
HORIZON = 5           # how far ahead the label is (already used in your labeling)
FEATURE_COLS = [...]  # ~30–40 features, excluding non-numeric or sparse ones

In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

def generate_lstm_dataset(input_dir="data/labeled_features",
                          output_file=None,
                          window_size=60):
    """
    Generate LSTM-ready dataset from time series per-stock labeled CSVs,
    excluding leakage-prone features.
    """

    # Columns never used as features
    always_ignore = {
        'Date', 'Ticker', 'Label',
        'EPS_TTM', 'PE_Ratio', 'Dividend_Yield',   # fundamentals
        'Market_Cap', 'ForwardReturn', 'VolThreshold'  # leakage features
    }

    X_all, y_all = [], []
    files = [f for f in os.listdir(input_dir) if f.endswith(".csv")]

    print(f"📂 Found {len(files)} labeled CSVs in {input_dir}")
    print(f"🧹 Ignoring columns: {sorted(always_ignore)}\n")

    for i, file in enumerate(files):
        ticker = file.replace(".csv", "").replace("_train", "").replace("_test", "")
        path = os.path.join(input_dir, file)

        try:
            df = pd.read_csv(path, parse_dates=['Date'])

            # feature columns
            feature_cols = [c for c in df.columns if c not in always_ignore]

            # drop rows with NaNs in features or label
            df = df.dropna(subset=feature_cols + ['Label']).reset_index(drop=True)

            if len(df) < window_size + 1:
                print(f"[{i+1}/{len(files)}] ⚠️ Skipped {ticker} (not enough data, rows={len(df)})")
                continue

            features = df[feature_cols].values
            label_map = {'down': 0, 'flat': 1, 'up': 2}
            labels = df['Label'].map(label_map).values

            # scale features per stock
            scaler = StandardScaler()
            features = scaler.fit_transform(features)

            # sliding windows
            num_samples = len(features) - window_size
            for t in range(num_samples):
                X_all.append(features[t:t+window_size])
                y_all.append(labels[t + window_size])

            print(f"[{i+1}/{len(files)}] ✅ Processed {ticker}: {num_samples} samples")

        except Exception as e:
            print(f"[{i+1}/{len(files)}] ❌ Failed {ticker}: {e}")

    # convert to arrays
    X_array = np.array(X_all)
    y_array = np.array(y_all)

    if output_file:
        os.makedirs(os.path.dirname(output_file), exist_ok=True)
        np.savez_compressed(output_file, X=X_array, y=y_array)
        print(f"\n✅ Saved LSTM dataset to {output_file} with shape X: {X_array.shape}, y: {y_array.shape}")
    else:
        print(f"\n✅ Prepared in-memory dataset with shape X: {X_array.shape}, y: {y_array.shape}")

    return X_array, y_array

In [2]:
import os
import pandas as pd

def split_lstm_by_date(input_dir="data/labeled_features",
                       output_dir="data/labeled_split",
                       cutoff_date="2023-01-01"):
    """
    For each ticker labeled CSV, split by date BEFORE window slicing.
    Saves train/test CSVs per ticker.
    """
    os.makedirs(os.path.join(output_dir, "train"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "test"), exist_ok=True)

    files = [f for f in os.listdir(input_dir) if f.endswith("_labeled.csv")]

    for i, file in enumerate(files):
        ticker = file.replace("_labeled.csv", "")
        path = os.path.join(input_dir, file)
        try:
            df = pd.read_csv(path, parse_dates=['Date'])

            train_df = df[df['Date'] < cutoff_date].reset_index(drop=True)
            test_df = df[df['Date'] >= cutoff_date].reset_index(drop=True)

            train_path = os.path.join(output_dir, "train", f"{ticker}_train.csv")
            test_path = os.path.join(output_dir, "test", f"{ticker}_test.csv")

            train_df.to_csv(train_path, index=False)
            test_df.to_csv(test_path, index=False)

            print(f"[{i+1}/{len(files)}] ✅ {ticker}: train {len(train_df)} rows, test {len(test_df)} rows")

        except Exception as e:
            print(f"[{i+1}/{len(files)}] ❌ Failed for {ticker}: {e}")

In [4]:
# Step 1: Run this first (splits the full labeled_features folder)
split_lstm_by_date(
    input_dir="data/features_with_fundamentals",
    output_dir="data/labeled_split",  # this creates train/ and test/
    cutoff_date="2023-01-01"
)

# Step 2: Use the generated splits to build LSTM datasets
generate_lstm_dataset(
    input_dir="data/labeled_split/train",
    output_file="data/lstm_train.npz",
    window_size=60,
)

generate_lstm_dataset(
    input_dir="data/labeled_split/test",
    output_file="data/lstm_test.npz",
    window_size=60,
)

📂 Found 503 labeled CSVs in data/labeled_split/train
🧹 Ignoring columns: ['Date', 'Dividend_Yield', 'EPS_TTM', 'ForwardReturn', 'Label', 'Market_Cap', 'PE_Ratio', 'Ticker', 'VolThreshold']

[1/503] ✅ Processed BKR: 1894 samples
[2/503] ✅ Processed ON: 1894 samples
[3/503] ✅ Processed FRT: 1894 samples
[4/503] ✅ Processed GOOG: 1894 samples
[5/503] ✅ Processed WAT: 1894 samples
[6/503] ✅ Processed NVR: 1894 samples
[7/503] ✅ Processed UAL: 1894 samples
[8/503] ✅ Processed VICI: 1139 samples
[9/503] ✅ Processed SJM: 1894 samples
[10/503] ✅ Processed UBER: 799 samples
[11/503] ✅ Processed ABBV: 1894 samples
[12/503] ✅ Processed ECL: 1894 samples
[13/503] ✅ Processed CTAS: 1894 samples
[14/503] ✅ Processed HD: 1894 samples
[15/503] ✅ Processed POOL: 1894 samples
[16/503] ✅ Processed EG: 1894 samples
[17/503] ✅ Processed EIX: 1894 samples
[18/503] ✅ Processed RCL: 1894 samples
[19/503] ✅ Processed TTD: 1461 samples
[20/503] ✅ Processed LII: 1894 samples
[21/503] ✅ Processed WMB: 1894 sample

(array([[[-0.30556495, -0.40393635, -0.38980209, ...,  0.05933824,
           0.84323122,  0.19011563],
         [ 0.2946394 ,  0.16286324, -0.1428917 , ...,  0.61588376,
           2.96742643,  0.75005505],
         [ 0.16026394,  0.14947483,  0.04229022, ...,  0.54604235,
          -0.646406  ,  0.6835341 ],
         ...,
         [ 0.6036992 ,  0.61362422,  0.72357962, ..., -0.23665216,
           0.44584793, -0.14141344],
         [ 0.42901315,  0.45742034,  0.58183438, ..., -0.21377544,
          -0.82869712, -0.11475733],
         [ 0.51187608,  0.40832721,  0.49267258, ..., -0.23207582,
           0.40501547, -0.136066  ]],
 
        [[ 0.2946394 ,  0.16286324, -0.1428917 , ...,  0.61588376,
           2.96742643,  0.75005505],
         [ 0.16026394,  0.14947483,  0.04229022, ...,  0.54604235,
          -0.646406  ,  0.6835341 ],
         [ 0.20505633,  0.28559437,  0.23661717, ...,  0.50032307,
           0.22588291,  0.63946178],
         ...,
         [ 0.42901315,  0.4574203

In [5]:
import pandas as pd

df = pd.read_csv("data/labeled_split/train/YUM_train.csv", parse_dates=["Date"])
print(len(df))  # total rows after split

# How many rows have any NaNs in important cols?
required_cols = [col for col in df.columns if col not in ['Date', 'Label', 'Ticker', 'EPS_TTM', 'PE_Ratio', 'Dividend_Yield']]
valid_rows = df[required_cols + ['Label']].dropna()
print(len(valid_rows))

# How many LSTM windows can we generate from that?
window_size = 60
print(len(valid_rows) - window_size)

2014
1954
1894


In [6]:
import numpy as np
import matplotlib.pyplot as plt

# Load train and test data
train_data = np.load("data/lstm_train.npz")
test_data = np.load("data/lstm_test.npz")

y_train = train_data["y"]
y_test = test_data["y"]

# Count distribution
def print_label_distribution(y, name=""):
    total = len(y)
    ones = np.sum(y == 1)
    zeros = np.sum(y == 0)
    print(f"{name} set:")
    print(f" - Total: {total}")
    print(f" - 1s: {ones} ({ones/total:.2%})")
    print(f" - 0s: {zeros} ({zeros/total:.2%})\n")

print_label_distribution(y_train, "Train")
print_label_distribution(y_test, "Test")

Train set:
 - Total: 916241
 - 1s: 342474 (37.38%)
 - 0s: 248897 (27.17%)

Test set:
 - Total: 218129
 - 1s: 79866 (36.61%)
 - 0s: 61590 (28.24%)



In [7]:
import pandas as pd
df = pd.read_csv("data/labeled_split/train/YUM_train.csv")
print(df.columns)
print(df[['Date','Label']].head())
print(df['Label'].value_counts(dropna=False))

Index(['Date', 'Close', 'High', 'Low', 'Open', 'Volume', 'SMA_5', 'SMA_20',
       'SMA_ratio', 'EMA_12', 'EMA_26', 'RSI_14', 'MACD_line', 'MACD_signal',
       'MACD_hist', 'Streak_length', 'Realized_vol_20d', 'Realized_vol_60d',
       'Bollinger_bandwidth', 'Rolling_High_Low_20d', 'OBV',
       'Volume_zscore_20d', 'Return_1d', 'Return_2d', 'Return_3d', 'Return_4d',
       'Return_5d', 'Momentum_20d', 'Momentum_60d', 'Rolling_mean_return_20d',
       'Rolling_var_return_20d', 'EPS_TTM', 'Market_Cap', 'PE_Ratio',
       'Dividend_Yield', 'LogReturn', 'RollingVol', 'ForwardReturn',
       'VolThreshold', 'Label'],
      dtype='object')
         Date Label
0  2015-01-02   NaN
1  2015-01-05   NaN
2  2015-01-06   NaN
3  2015-01-07   NaN
4  2015-01-08   NaN
flat    761
up      729
down    504
NaN      20
Name: Label, dtype: int64


In [4]:
print(f"    Label counts: {df['Label'].value_counts().to_dict()}")

    Label counts: {'flat': 761, 'up': 729, 'down': 504}


In [8]:
import numpy as np

data = np.load("data/lstm_train.npz")
X, y = data["X"], data["y"]

print(f"X shape: {X.shape}")  # Expected: (num_samples, window_size, num_features)
print(f"y shape: {y.shape}")  # Expected: (num_samples,)

X shape: (916241, 60, 32)
y shape: (916241,)


In [9]:
import collections

print("Label distribution:", collections.Counter(y))

Label distribution: Counter({1: 342474, 2: 324870, 0: 248897})


In [10]:
print("Any NaNs in X?", np.isnan(X).any())
print("Any Infs in X?", np.isinf(X).any())

Any NaNs in X? False
Any Infs in X? False


In [1]:
import torch
torch.cuda.is_available()

True

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import numpy as np
from sklearn.model_selection import train_test_split

@torch.no_grad()
def evaluate(model, dataloader, device):
    model.eval()
    correct, total = 0, 0
    for xb, yb in dataloader:
        xb, yb = xb.to(device), yb.to(device)
        outputs = model(xb)
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == yb).sum().item()
        total += xb.size(0)
    return correct / total

# Load data
data = np.load("data/lstm_train.npz")
X, y = data["X"], data["y"]

# Convert to torch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)

# Create TensorDataset
dataset = TensorDataset(X_tensor, y_tensor)

# Train/val split
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=512, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=512)

class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1, dropout=0.25):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(hidden_dim, 32)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(32, output_dim)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)  # take last hidden state
        x = h_n[-1]
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

lstm_model = LSTMClassifier(input_dim=32, hidden_dim=128, output_dim=3, num_layers = 2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=1e-3)

num_epochs = 200
patience = 5  # stop if no improvement for 5 epochs
best_val_acc = 0.0
patience_counter = 0
best_model_state = None

for epoch in range(num_epochs):
    lstm_model.train()
    total_loss, correct, total = 0, 0, 0

    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        outputs = lstm_model(xb)
        loss = criterion(outputs, yb)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * xb.size(0)
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == yb).sum().item()
        total += xb.size(0)

    train_acc = correct / total
    val_acc = evaluate(lstm_model, val_loader, device)

    print(f"Epoch {epoch+1}/{num_epochs} | "
          f"Train Loss: {total_loss/total:.4f} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

    # ---- early stopping logic ----
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state = lstm_model.state_dict()
        patience_counter = 0  # reset
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"⏹️ Early stopping at epoch {epoch+1}. Best Val Acc: {best_val_acc:.4f}")
            break

# Restore best model weights
if best_model_state is not None:
    lstm_model.load_state_dict(best_model_state)

# Save best model
torch.save(lstm_model.state_dict(), 'lstm_model_long.pth')

# load the model
#lstm_model = LSTMClassifier(input_dim=35, hidden_dim=128, output_dim=3, num_layers = 2).to(device)
#lstm_model.load_state_dict(torch.load('lstm_model.pth'))


Epoch 1/200 | Train Loss: 1.0573 | Train Acc: 0.4304 | Val Acc: 0.4517
Epoch 2/200 | Train Loss: 1.0236 | Train Acc: 0.4620 | Val Acc: 0.4767
Epoch 3/200 | Train Loss: 0.9981 | Train Acc: 0.4826 | Val Acc: 0.4916
Epoch 4/200 | Train Loss: 0.9783 | Train Acc: 0.4990 | Val Acc: 0.5038
Epoch 5/200 | Train Loss: 0.9608 | Train Acc: 0.5118 | Val Acc: 0.5116
Epoch 6/200 | Train Loss: 0.9440 | Train Acc: 0.5242 | Val Acc: 0.5220
Epoch 7/200 | Train Loss: 0.9276 | Train Acc: 0.5363 | Val Acc: 0.5285
Epoch 8/200 | Train Loss: 0.9112 | Train Acc: 0.5478 | Val Acc: 0.5372
Epoch 9/200 | Train Loss: 0.8960 | Train Acc: 0.5577 | Val Acc: 0.5450
Epoch 10/200 | Train Loss: 0.8812 | Train Acc: 0.5679 | Val Acc: 0.5498
Epoch 11/200 | Train Loss: 0.8679 | Train Acc: 0.5764 | Val Acc: 0.5537
Epoch 12/200 | Train Loss: 0.8543 | Train Acc: 0.5851 | Val Acc: 0.5617
Epoch 13/200 | Train Loss: 0.8424 | Train Acc: 0.5931 | Val Acc: 0.5655
Epoch 14/200 | Train Loss: 0.8306 | Train Acc: 0.6005 | Val Acc: 0.5679
E

In [3]:
import numpy as np
# Load data
data = np.load("data/lstm_train.npz")
X, y = data["X"], data["y"]

# X shape: (num_samples, window_size, num_features)
X_2d = X.reshape(X.shape[0], -1)  # shape: (num_samples, window_size * num_features)
from sklearn.preprocessing import LabelEncoder

import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Split (can use original train/test split instead)
X_train, X_val, y_train, y_val = train_test_split(X_2d, y, test_size=0.2, random_state=42)

from lightgbm import LGBMClassifier
from lightgbm import early_stopping, log_evaluation
from sklearn.metrics import classification_report

lgbm_model = LGBMClassifier(
    min_child_samples=5,       # default 20
    min_split_gain=0.0,        # allow weaker splits
    learning_rate=0.05,
    num_leaves=64,
    max_depth=-1
)

lgbm_model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    eval_metric="multi_logloss", 
    callbacks=[early_stopping(stopping_rounds=50), log_evaluation(25)]
)

y_pred = lgbm_model.predict(X_val)
print(classification_report(y_val, y_pred))

# save the model
lgbm_model.booster_.save_model("lgbm_model.txt")

# load the model
# lgbm_model = lgb.Booster(model_file='lgbm_model.txt')



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 2.486899 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 489600
[LightGBM] [Info] Number of data points in the train set: 732992, number of used features: 1920
[LightGBM] [Info] Start training from score -1.303348
[LightGBM] [Info] Start training from score -0.984562
[LightGBM] [Info] Start training from score -1.036269
Training until validation scores don't improve for 50 rounds
[25]	valid_0's multi_logloss: 1.05494
[50]	valid_0's multi_logloss: 1.04236
[75]	valid_0's multi_logloss: 1.03365
[100]	valid_0's multi_logloss: 1.02652
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 1.02652
              precision    recall  f1-score   support

           0       0.56      0.18      0.28     49801
           1       0.47      0.56      0.51     68626
           2       0.45      0.59      0.51     64822

    accuracy             

<lightgbm.basic.Booster at 0x73d21be340a0>

In [11]:
lgbm_model.booster_.save_model("lgbm_model.txt")

<lightgbm.basic.Booster at 0x77c886ed39a0>

In [1]:
import torch
import torch.nn as nn

class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim, model_dim=64, num_heads=4, num_layers=2, num_classes=3, dropout=0.1):
        super().__init__()
        self.model_dim = model_dim
        self.input_proj = nn.Linear(input_dim, model_dim)
        self.pos_encoder = PositionalEncoding(model_dim, dropout)

        encoder_layers = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dropout=dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)

        self.classifier = nn.Sequential(
            nn.Linear(model_dim, model_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(model_dim, num_classes)
        )

    def forward(self, x):
        # x: (batch_size, seq_len, input_dim)
        x = self.input_proj(x)  # -> (batch_size, seq_len, model_dim)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)  # -> (batch_size, seq_len, model_dim)
        x = x.mean(dim=1)  # global average pooling over time
        out = self.classifier(x)
        return out

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=500):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe = torch.zeros(1, max_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return self.dropout(x)

In [2]:
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

# Load .npz
data = np.load("data/lstm_train.npz")
X, y = data['X'], data['y']

# Train/Val split
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to torch
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)
X_val_t = torch.tensor(X_val, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train_t, y_train_t), batch_size=128, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val_t, y_val_t), batch_size=128)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TimeSeriesTransformer(input_dim=X.shape[2], num_classes=3).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

EPOCHS = 200
early_stop_patience = 10
best_val_acc = 0.0
epochs_no_improve = 0
best_model_state = None

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        pred = model(xb)
        loss = criterion(pred, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    # Calculate validation accuracy at the end of each epoch
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            outputs = model(xb)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == yb).sum().item()
            total += yb.size(0)
    val_acc = correct / total
    print(f"Epoch {epoch+1} | Valid Acc: {val_acc:.4f}")

    print(f"Epoch {epoch+1} | Train Loss: {total_loss / len(train_loader):.4f}")

    # Early stopping logic
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        epochs_no_improve = 0
        best_model_state = model.state_dict()
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= early_stop_patience:
            print(f"Early stopping at epoch {epoch+1}. Best val acc: {best_val_acc:.4f}")
            if best_model_state is not None:
                model.load_state_dict(best_model_state)
            break

Epoch 1 | Valid Acc: 0.4274
Epoch 1 | Train Loss: 1.0715
Epoch 2 | Valid Acc: 0.4347
Epoch 2 | Train Loss: 1.0580
Epoch 3 | Valid Acc: 0.4444
Epoch 3 | Train Loss: 1.0480
Epoch 4 | Valid Acc: 0.4518
Epoch 4 | Train Loss: 1.0397
Epoch 5 | Valid Acc: 0.4589
Epoch 5 | Train Loss: 1.0322
Epoch 6 | Valid Acc: 0.4688
Epoch 6 | Train Loss: 1.0255
Epoch 7 | Valid Acc: 0.4716
Epoch 7 | Train Loss: 1.0189
Epoch 8 | Valid Acc: 0.4783
Epoch 8 | Train Loss: 1.0123
Epoch 9 | Valid Acc: 0.4830
Epoch 9 | Train Loss: 1.0061
Epoch 10 | Valid Acc: 0.4848
Epoch 10 | Train Loss: 0.9994
Epoch 11 | Valid Acc: 0.4927
Epoch 11 | Train Loss: 0.9930
Epoch 12 | Valid Acc: 0.4974
Epoch 12 | Train Loss: 0.9868
Epoch 13 | Valid Acc: 0.5006
Epoch 13 | Train Loss: 0.9812
Epoch 14 | Valid Acc: 0.5057
Epoch 14 | Train Loss: 0.9745
Epoch 15 | Valid Acc: 0.5088
Epoch 15 | Train Loss: 0.9682
Epoch 16 | Valid Acc: 0.5151
Epoch 16 | Train Loss: 0.9622
Epoch 17 | Valid Acc: 0.5104
Epoch 17 | Train Loss: 0.9561
Epoch 18 | Vali

In [4]:
from sklearn.metrics import classification_report

model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for xb, yb in val_loader:
        xb = xb.to(device)
        preds = model(xb).argmax(dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(yb.numpy())

print(classification_report(all_labels, all_preds))

              precision    recall  f1-score   support

           0       0.60      0.73      0.66     49801
           1       0.57      0.42      0.49     68626
           2       0.65      0.73      0.69     64822

    accuracy                           0.61    183249
   macro avg       0.61      0.63      0.61    183249
weighted avg       0.61      0.61      0.60    183249



In [5]:
torch.save(model.state_dict(), "transformer_model_long.pth")

In [None]:
# model = TimeSeriesTransformer(input_dim=X.shape[2], num_classes=3)
# model.load_state_dict(torch.load("transformer_model.pth"))
# model.to(device)
# model.eval()