In [6]:
# ---- setup ----
import sys, platform, pandas as pd
import yfinance as yf

print("Python:", sys.version)
print("Pandas:", pd.__version__)
print("yfinance:", yf.__version__)


Python: 3.9.6 (default, Mar 29 2024, 10:51:09) 
[Clang 15.0.0 (clang-1500.3.9.4)]
Pandas: 2.3.2
yfinance: 0.2.66


In [7]:
# ---- download prices ----
tickers = ["NVDA", "AAPL"]
hist = yf.download(
    tickers,
    start="2018-01-01",
    auto_adjust=True,      # adjusted close/ohlc
    group_by="column",     # yfinance default; we’ll normalize below
    progress=False
)

# show the raw shape/columns so we know what we’re normalizing
print(type(hist.columns), getattr(hist.columns, "nlevels", 1), "levels")
print(hist.columns)
hist.tail(3)


<class 'pandas.core.indexes.multi.MultiIndex'> 2 levels
MultiIndex([( 'Close', 'AAPL'),
            ( 'Close', 'NVDA'),
            (  'High', 'AAPL'),
            (  'High', 'NVDA'),
            (   'Low', 'AAPL'),
            (   'Low', 'NVDA'),
            (  'Open', 'AAPL'),
            (  'Open', 'NVDA'),
            ('Volume', 'AAPL'),
            ('Volume', 'NVDA')],
           names=['Price', 'Ticker'])


Price,Close,Close,High,High,Low,Low,Open,Open,Volume,Volume
Ticker,AAPL,NVDA,AAPL,NVDA,AAPL,NVDA,AAPL,NVDA,AAPL,NVDA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2025-10-22,258.450012,180.279999,262.850006,183.440002,255.429993,176.759995,262.649994,181.139999,45015300,162249600
2025-10-23,259.579987,182.160004,260.619995,183.029999,258.01001,179.789993,259.940002,180.419998,32754900,111363700
2025-10-24,262.820007,186.259995,264.130005,187.470001,259.179993,183.5,261.190002,183.839996,38221700,130942300


In [8]:
# ---- normalize to a flat table ----
# yfinance can return either:
#  A) a MultiIndex with two levels (field, ticker) or (ticker, field)
#  B) a single-level Index when a single ticker is passed
# We'll detect and normalize safely.

def normalize_yf(df: pd.DataFrame, tickers: list[str]) -> pd.DataFrame:
    if not isinstance(df.columns, pd.MultiIndex):
        # single ticker case -> just add the ticker name
        tkr = tickers[0] if len(tickers) == 1 else "UNK"
        out = df.reset_index().copy()
        out.insert(1, "Ticker", tkr)
        # Ensure consistent column order
        cols = ["Date","Ticker","Open","High","Low","Close","Adj Close","Volume"]
        for c in cols:
            if c not in out.columns: out[c] = pd.NA
        return out[cols]

    # MultiIndex (two levels)
    lvl0 = list(df.columns.get_level_values(0))
    lvl1 = list(df.columns.get_level_values(1))
    s_tickers = set(tickers)

    # Decide which level is the ticker level by checking presence
    if s_tickers.issubset(set(lvl0)) and not s_tickers.issubset(set(lvl1)):
        ticker_level = 0
        other_level  = 1
        ticker_col_name = df.columns.names[ticker_level] or "Ticker"
    else:
        ticker_level = 1
        other_level  = 0
        ticker_col_name = df.columns.names[ticker_level] or "Ticker"

    out = (
        df
        .stack(level=ticker_level)                 # push ticker to rows
        .reset_index()
        .rename(columns={df.columns.names[ticker_level]: "Ticker",
                         "index":"Date"})
        .rename(columns={df.columns.names[other_level] or "level": "Field"})
        .pivot(index=["Date","Ticker"], columns="Field", values=0)  # 0 = stacked values
        .reset_index()
        .rename_axis(None, axis=1)
    )

    # Ensure canonical columns exist and order them
    wanted = ["Date","Ticker","Open","High","Low","Close","Adj Close","Volume"]
    for c in wanted:
        if c not in out.columns: out[c] = pd.NA
    out = out[wanted]

    # Types
    out["Date"] = pd.to_datetime(out["Date"])
    return out.sort_values(["Ticker","Date"]).reset_index(drop=True)

flat = normalize_yf(hist, tickers)
flat.head(10)


  df


KeyError: 'Field'