In [7]:
import yfinance as yf
import pandas as pd

In [3]:
df = yf.download("SPY", period='max', interval='1mo')


  df = yf.download("SPY", period='max', interval='1mo')
[*********************100%***********************]  1 of 1 completed


In [9]:
import os
import pandas as pd
import yfinance as yf

TICKERS = ["SPY", "IVV", "VOO", "VTI", "QQQ", "VEA", "VWO"]
START = "2010-01-01"
INTERVAL = "1mo"  # monthly

def download_adjusted_close(tickers, start, interval="1mo"):
    """
    Downloads adjusted prices by using auto_adjust=True and reading 'Close'.
    Handles:
      - MultiIndex columns (field->ticker)
      - Single ticker returning a Series/DataFrame
      - Fallback to per-ticker downloads if the bulk call is missing data
    """
    # Bulk download with auto-adjust (Close == Adj Close)
    df = yf.download(
        tickers,
        start=start,
        interval=interval,
        auto_adjust=True,   # <-- key change
        group_by=None,
        progress=False,
        threads=True,
    )

    # Normalize to a 2D DataFrame with tickers as columns
    prices = pd.DataFrame()

    if isinstance(df.columns, pd.MultiIndex):
        # Expect a top-level field like 'Close'
        if "Close" in df.columns.get_level_values(0):
            prices = df["Close"].copy()
        else:
            # Rare case: try to find any price-like field
            for field in ("Close", "Adj Close"):
                if field in df.columns.get_level_values(0):
                    prices = df[field].copy()
                    break
    else:
        # Single-level columns:
        # - Multi-ticker bulk call sometimes returns columns per ticker
        # - Single ticker returns a Series or a DataFrame with 'Close'
        if set(tickers).issubset(set(df.columns)):
            # Already columns per ticker (probably 'Close' only)
            prices = df.copy()
        elif "Close" in df.columns:
            # Single-ticker DataFrame with 'Close'
            prices = df[["Close"]].copy()
            # Rename column to the ticker if it’s a single ticker
            if len(tickers) == 1:
                prices.columns = tickers
        elif isinstance(df, pd.Series):
            # Single-ticker Series (likely 'Close')
            prices = df.to_frame(name=tickers[0])
        else:
            prices = pd.DataFrame()

    # If bulk path failed or is empty, fallback per ticker
    if prices.empty or prices.shape[1] < len(tickers):
        cols = []
        for t in tickers:
            try:
                h = yf.Ticker(t).history(
                    start=start, interval=interval, auto_adjust=True
                )
                if not h.empty:
                    s = h["Close"].rename(t)
                    cols.append(s)
            except Exception:
                pass
        if cols:
            prices = pd.concat(cols, axis=1)

    # Basic cleaning
    prices = prices.sort_index()
    prices = prices.dropna(how="all")
    # Some ETFs were launched after 2010; keep partial columns
    # but you can also drop cols that are entirely NaN:
    prices = prices.dropna(axis=1, how="all")

    return prices

if __name__ == "__main__":
    adj_close = download_adjusted_close(TICKERS, START, INTERVAL)

    if adj_close.empty:
        raise RuntimeError("No data returned. Try a different interval or check network.")

    # Ensure folder exists and save
    os.makedirs("data", exist_ok=True)
    adj_close.to_csv("data/index_funds_data.csv")

    print("✅ Downloaded monthly *adjusted* prices (via Close) from 2010 to today.")
    print(adj_close.head())
    print(f"\nColumns: {list(adj_close.columns)}")

✅ Downloaded monthly *adjusted* prices (via Close) from 2010 to today.
                                 SPY        IVV  VOO        VTI        QQQ  \
Date                                                                         
2010-01-01 00:00:00-05:00  80.809418  80.910347  NaN  41.111614  37.234818   
2010-02-01 00:00:00-05:00  83.330292  83.458023  NaN  42.525875  38.949081   
2010-03-01 00:00:00-05:00  88.040802  88.185181  NaN  45.066975  41.907669   
2010-04-01 00:00:00-04:00  89.770966  89.993034  NaN  46.232800  42.893200   
2010-05-01 00:00:00-04:00  82.638260  82.792976  NaN  42.588226  39.722370   

                                 VEA        VWO  
Date                                             
2010-01-01 00:00:00-05:00  20.203415  25.061535  
2010-02-01 00:00:00-05:00  20.353348  25.533403  
2010-03-01 00:00:00-05:00  21.627773  27.624052  
2010-04-01 00:00:00-04:00  21.034784  27.565065  
2010-05-01 00:00:00-04:00  18.610102  25.035315  

Columns: ['SPY', 'IVV', 'VOO', 