In [17]:
import numpy as np
import pandas as pd
import yfinance as yf
from pathlib import Path

# Notebook is in bayes_bandit/notebooks
CWD = Path.cwd()
ROOT = CWD.parent

DATA_RAW = Path("../data_raw")
DATA_PROCESSED = Path("../data_processed")
DATA_RAW.mkdir(exist_ok=True)
DATA_PROCESSED.mkdir(exist_ok=True)

for p in [DATA_RAW, DATA_PROCESSED]:
    p.mkdir(parents=True, exist_ok=True)

print("CWD:", CWD)
print("ROOT:", ROOT)
print("data_raw:", DATA_RAW)
print("data_processed:", DATA_PROCESSED)


CWD: c:\Users\nicka\dev\bayes_bandit\notebooks
ROOT: c:\Users\nicka\dev\bayes_bandit
data_raw: ..\data_raw
data_processed: ..\data_processed


In [18]:
START_DATE = "2010-01-01"
END_DATE   = None  

# TICKERS = ["SPY", "AAPL", "MSFT", "NVDA", "TLT", "GLD"]
# TICKERS

UNIVERSE_TICKERS = [
    "SPY",  # broad market
    "QQQ",
    "TLT", "IEF", "HYG", "LQD",  # bonds / credit
    "GLD", "SLV",                # metals
    # sectors / large caps, add as many as you want:
    "AAPL", "MSFT", "GOOGL", "AMZN", "META", "NVDA",
    "JPM", "BAC", "GS", "WFC",
    "XOM", "CVX",
    "UNH", "JNJ",
    "HD", "PG", "KO",
    "PFE", "MRK",
    # ... extend this list to ~50–100 names
]
UNIVERSE_TICKERS = sorted(set(UNIVERSE_TICKERS))
len(UNIVERSE_TICKERS), UNIVERSE_TICKERS[:10]




(27, ['AAPL', 'AMZN', 'BAC', 'CVX', 'GLD', 'GOOGL', 'GS', 'HD', 'HYG', 'IEF'])

In [19]:
prices_path = DATA_RAW / "prices_adj_universe.parquet"

if prices_path.exists():
    adj = pd.read_parquet(prices_path)
else:
    data = yf.download(
        UNIVERSE_TICKERS,
        start=START_DATE,
        end=END_DATE,
        auto_adjust=True,   # <-- keep this
        progress=False,
    )
    # With auto_adjust=True, adjusted prices live in 'Close'
    adj = data["Close"].copy()
    adj.columns.name = "ticker"
    adj.index.name = "date"

    # Drop columns that are completely NaN (delisted / failed tickers)
    adj = adj.dropna(axis=1, how="all")

    adj.to_parquet(prices_path)

adj.head()


ticker,AAPL,AMZN,BAC,CVX,GLD,GOOGL,GS,HD,HYG,IEF,...,NVDA,PFE,PG,QQQ,SLV,SPY,TLT,UNH,WFC,XOM
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,6.418382,6.695,12.232429,41.669128,109.800003,15.576997,130.641495,19.571789,35.435688,62.542007,...,0.423807,9.354956,38.417023,40.393551,17.23,85.27919,56.984058,24.762829,18.026003,37.881802
2010-01-05,6.429481,6.7345,12.630036,41.964283,109.699997,15.508403,132.951248,19.715157,35.603783,62.816559,...,0.429995,9.221525,38.429596,40.393551,17.51,85.504967,57.352074,24.723557,18.520864,38.029713
2010-01-06,6.327213,6.6125,12.778169,41.969559,111.510002,15.117455,131.532211,19.646885,35.69585,62.563118,...,0.432746,9.191875,38.247326,40.14991,17.860001,85.56514,56.584358,24.967024,18.547256,38.358406
2010-01-07,6.315513,6.5,13.199171,41.811432,110.82,14.765528,134.106049,19.878994,35.839951,62.563118,...,0.424265,9.157284,38.039879,40.176025,17.889999,85.926346,56.679508,25.925171,19.220259,38.237885
2010-01-08,6.357502,6.676,13.082226,41.885201,111.370003,14.96237,131.569962,19.783417,35.896015,62.640572,...,0.425182,9.231411,37.989613,40.506691,18.15,86.21228,56.654118,25.681702,19.04211,38.084488


In [20]:
# Wide log returns
wide_ret = np.log(adj / adj.shift(1))
wide_ret.index.name = "date"

# Optionally drop first row (all NaN)
wide_ret = wide_ret.iloc[1:]

wide_ret_path = DATA_PROCESSED / "returns_wide_universe.parquet"
wide_ret.to_parquet(wide_ret_path)

# Long format
returns_long = (
    wide_ret.stack()
    .reset_index()
    .rename(columns={0: "ret"})
)

# remove NaNs
returns_long = returns_long.dropna(subset=["ret"])

returns_long_path = DATA_PROCESSED / "returns_long_universe.parquet"
returns_long.to_parquet(returns_long_path)

returns_long.head()


Unnamed: 0,date,ticker,ret
0,2010-01-05,AAPL,0.001728
1,2010-01-05,AMZN,0.005883
2,2010-01-05,BAC,0.031987
3,2010-01-05,CVX,0.007058
4,2010-01-05,GLD,-0.000911


In [68]:
# wide_ret = pd.read_parquet(DATA_PROCESSED / "returns_wide_universe.parquet")

# LOOKBACK = 60      # trading days
# TOP_K = 20         # number of active tickers per day

# # Rolling std, then shift by 1 day so that vol at date t uses data up to t-1
# daily_vol = wide_ret.rolling(window=LOOKBACK, min_periods=LOOKBACK).std().shift(1)

# rows = []

# for date, row in daily_vol.iterrows():
#     vols = row.dropna()
#     if vols.empty:
#         continue
#     top_tickers = vols.sort_values(ascending=False).head(TOP_K).index
#     for ticker in top_tickers:
#         rows.append((date, ticker))

# active_universe = pd.DataFrame(rows, columns=["date", "ticker"])
# active_universe["active"] = True

# active_path = DATA_PROCESSED / f"active_universe_top{TOP_K}_vol.parquet"
# active_universe.to_parquet(active_path)

# active_universe.head()



In [21]:
wide_ret = pd.read_parquet(DATA_PROCESSED / "returns_wide_universe.parquet")

LOOKBACK = 60      # trading days
TOP_K = 20         # number of active tickers per day (vol-based *excluding* SPY)
ANCHOR_TICKER = "SPY"

# Rolling std, then shift by 1 day so that vol at date t uses data up to t-1
daily_vol = wide_ret.rolling(window=LOOKBACK, min_periods=LOOKBACK).std().shift(1)

MOM_LOOKBACK = 60  # past 60 trading days as momentum window (can tune)
daily_mom = wide_ret.rolling(window=MOM_LOOKBACK, min_periods=MOM_LOOKBACK).sum().shift(1)


rows = []

for date, row in daily_vol.iterrows():
    vols = row.dropna()
    if vols.empty:
        continue

    # Sort tickers by volatility (descending)
    vols_sorted = vols.sort_values(ascending=False)

    # Take top-K by vol
    top_tickers = list(vols_sorted.head(TOP_K).index)

    # Always include SPY if it exists on that date
    if ANCHOR_TICKER in vols.index and ANCHOR_TICKER not in top_tickers:
        top_tickers.append(ANCHOR_TICKER)

    for ticker in top_tickers:
        rows.append((date, ticker))

active_universe = pd.DataFrame(rows, columns=["date", "ticker"])
active_universe["active"] = True

active_path = DATA_PROCESSED / f"active_universe_top{TOP_K}_vol_plus_{ANCHOR_TICKER}.parquet"
active_universe.to_parquet(active_path)

print("Saved active universe to:", active_path)
active_universe.head()



Saved active universe to: ..\data_processed\active_universe_top20_vol_plus_SPY.parquet


Unnamed: 0,date,ticker,active
0,2010-04-01,NVDA,True
1,2010-04-01,BAC,True
2,2010-04-01,UNH,True
3,2010-04-01,SLV,True
4,2010-04-01,AMZN,True


In [22]:
summary = returns_long.groupby("ticker")["ret"].agg(
    mean="mean",
    std="std",
    skew=lambda x: x.skew(),
    kurtosis=lambda x: x.kurt(),
)

summary


Unnamed: 0_level_0,mean,std,skew,kurtosis
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAPL,0.000941,0.017772,-0.151399,6.121489
AMZN,0.000882,0.020664,0.031863,5.953125
BAC,0.00037,0.020993,-0.317604,10.154065
CVX,0.00032,0.016883,-0.927561,26.046087
GLD,0.000314,0.009966,-0.501302,4.746034
GOOGL,0.000755,0.017396,0.125003,7.085535
GS,0.000469,0.018162,-0.254088,8.303651
HD,0.000723,0.014711,-0.989131,18.27395
HYG,0.000206,0.005257,-0.25215,19.150714
IEF,0.000108,0.004184,0.023361,2.285766


In [23]:
out_path = DATA_PROCESSED / "returns_panel.parquet"
returns_long.to_parquet(out_path, index=False)

print("Saved returns panel to:", out_path)
print("Shape:", returns_long.shape)


Saved returns panel to: ..\data_processed\returns_panel.parquet
Shape: (107563, 3)


In [24]:
test = pd.read_parquet(DATA_PROCESSED / "returns_panel.parquet")
print(test.head())
print(test.dtypes)


        date ticker       ret
0 2010-01-05   AAPL  0.001728
1 2010-01-05   AMZN  0.005883
2 2010-01-05    BAC  0.031987
3 2010-01-05    CVX  0.007058
4 2010-01-05    GLD -0.000911
date      datetime64[ns]
ticker            object
ret              float64
dtype: object


In [25]:
out_path = DATA_PROCESSED / "returns_panel.parquet"
returns_long.to_parquet(out_path, index=False)

print("Saved returns panel to:", out_path)
print("Shape:", returns_long.shape)


Saved returns panel to: ..\data_processed\returns_panel.parquet
Shape: (107563, 3)


In [26]:
test = pd.read_parquet(DATA_PROCESSED / "returns_panel.parquet")
print(test.head())
print(test.dtypes)


        date ticker       ret
0 2010-01-05   AAPL  0.001728
1 2010-01-05   AMZN  0.005883
2 2010-01-05    BAC  0.031987
3 2010-01-05    CVX  0.007058
4 2010-01-05    GLD -0.000911
date      datetime64[ns]
ticker            object
ret              float64
dtype: object


In [27]:
wide_ret = pd.read_parquet(DATA_PROCESSED / "returns_wide_universe.parquet")
active_universe = pd.read_parquet(DATA_PROCESSED / f"active_universe_top{TOP_K}_vol.parquet")
daily_vol = wide_ret.rolling(window=LOOKBACK, min_periods=LOOKBACK).std().shift(1)

# Turn active_universe into a dict: date -> list of tickers
active_map = (
    active_universe
    .groupby("date")["ticker"]
    .apply(list)
    .to_dict()
)


In [28]:
# -------------------------------------------------------------------
# Portfolio templates: EW_active, EW_top5vol, MOM_top5, LOWVOL_top5,
# and a static Markowitz portfolio (Markowitz_static)
# -------------------------------------------------------------------

LOOKBACK = 60       # already used for vol; reuse for other features
TOP_K    = 20       # active universe size (already defined above)
MOM_K    = 5        # number of momentum names
LOWVOL_K = 5        # number of low-vol names

wide_ret = pd.read_parquet(DATA_PROCESSED / "returns_wide_universe.parquet")

# 1) Rolling volatility (already computed above if you like, but ensure we have it)
daily_vol = wide_ret.rolling(window=LOOKBACK, min_periods=LOOKBACK).std().shift(1)

# 2) Rolling momentum: past LOOKBACK-day cumulative return, shifted by 1 day
#    to avoid lookahead.
mom_raw = wide_ret.rolling(window=LOOKBACK, min_periods=LOOKBACK).sum().shift(1)
# Alternative: cumulative return (exp of sum of log returns) - 1
# mom_raw = (wide_ret.rolling(LOOKBACK, min_periods=LOOKBACK).sum().shift(1)).apply(np.exp) - 1

# 3) Equal-weight portfolios (existing + new feature-based ones)
portfolio_rows = []

# active_universe: you may have your own "plus SPY" version; just reuse it:
active_universe = pd.read_parquet(
    DATA_PROCESSED / f"active_universe_top{TOP_K}_vol_plus_SPY.parquet"
)

# Turn active_universe into a dict: date -> list of tickers
active_map = (
    active_universe
    .groupby("date")["ticker"]
    .apply(list)
    .to_dict()
)

for date in wide_ret.index:
    if date not in active_map:
        continue

    active_tickers = active_map[date]
    active_tickers = [t for t in active_tickers if t in wide_ret.columns]
    if len(active_tickers) == 0:
        continue

    # returns of active tickers at this date
    ret_t = wide_ret.loc[date, active_tickers]

    # --- EW_active: equal-weight all active tickers ---
    w_e = np.ones(len(active_tickers)) / len(active_tickers)
    ret_e = float(np.dot(w_e, ret_t.values))
    portfolio_rows.append((date, "EW_active", ret_e))

    # --- EW_top5vol: equal-weight of top 5 vol among active tickers ---
    vol_row = daily_vol.loc[date, active_tickers].dropna()
    if len(vol_row) >= 5:
        top5_vol = vol_row.sort_values(ascending=False).head(5).index
        ret_top5vol = wide_ret.loc[date, top5_vol]
        w5 = np.ones(len(top5_vol)) / len(top5_vol)
        ret_5_vol = float(np.dot(w5, ret_top5vol.values))
        portfolio_rows.append((date, "EW_top5vol", ret_5_vol))

    # --- MOM_top5: top 5 momentum names among active tickers ---
    mom_row = mom_raw.loc[date, active_tickers].dropna()
    if len(mom_row) >= MOM_K:
        top_mom = mom_row.sort_values(ascending=False).head(MOM_K).index
        ret_mom = wide_ret.loc[date, top_mom]
        w_mom = np.ones(len(top_mom)) / len(top_mom)
        ret_mom_port = float(np.dot(w_mom, ret_mom.values))
        portfolio_rows.append((date, "MOM_top5", ret_mom_port))

    # --- LOWVOL_top5: 5 lowest-vol names among active tickers ---
    lowvol_row = vol_row  # already vol_row = daily_vol.loc[date, active_tickers].dropna()
    if len(lowvol_row) >= LOWVOL_K:
        low5 = lowvol_row.sort_values(ascending=True).head(LOWVOL_K).index
        ret_low5 = wide_ret.loc[date, low5]
        w_low5 = np.ones(len(low5)) / len(low5)
        ret_lowvol_port = float(np.dot(w_low5, ret_low5.values))
        portfolio_rows.append((date, "LOWVOL_top5", ret_lowvol_port))

# -------------------------------------------------------------------
# 4) Static Markowitz portfolio (Markowitz_static)
#    - Estimate mean & covariance on an initial training window
#    - Compute tangency-style weights (long-only, normalised)
#    - Apply those weights out-of-sample to get a single return series
# -------------------------------------------------------------------

TRAIN_DAYS = 252 * 5   # ~5 years of daily data
if wide_ret.shape[0] > TRAIN_DAYS:
    train = wide_ret.iloc[:TRAIN_DAYS].dropna(axis=1, how="any")

    # Restrict to tickers with full data in training window
    train_cols = train.columns.tolist()

    mu_hat = train.mean()            # (n_assets,)
    Sigma_hat = train.cov()          # (n_assets x n_assets)

    # Use pseudo-inverse for stability
    Sigma_inv = np.linalg.pinv(Sigma_hat.values)
    mu_vec = mu_hat.values.reshape(-1, 1)

    # Tangency-like portfolio: w ∝ Σ^{-1} μ
    w_unnorm = Sigma_inv @ mu_vec    # shape (n_assets, 1)
    w_unnorm = w_unnorm.flatten()

    # Enforce long-only by clipping negatives and renormalising
    w_unnorm = np.clip(w_unnorm, 0.0, None)
    if w_unnorm.sum() == 0.0:
        # Fallback: equal-weight if everything was clipped
        w_unnorm = np.ones_like(w_unnorm)

    w_star = w_unnorm / w_unnorm.sum()

    markowitz_tickers = train_cols
    w_star_series = pd.Series(w_star, index=markowitz_tickers)

    # Compute Markowitz returns over *full* period where data exists for these tickers
    wide_mark = wide_ret[markowitz_tickers].dropna(how="any", axis=0)
    mark_ret_series = (wide_mark @ w_star_series).dropna()

    for d, r in mark_ret_series.items():
        portfolio_rows.append((d, "Markowitz_static", float(r)))
else:
    print("Not enough data for Markowitz_static; skipping that template.")

# -------------------------------------------------------------------
# Save all portfolio templates to disk
# -------------------------------------------------------------------
port_returns = pd.DataFrame(portfolio_rows, columns=["date", "portfolio", "ret"])
port_returns = port_returns.sort_values("date")

port_ret_path = DATA_PROCESSED / "portfolio_returns_dynamic.parquet"
port_returns.to_parquet(port_ret_path)

print("Saved dynamic portfolio returns to:", port_ret_path)
print(port_returns["portfolio"].value_counts())


Saved dynamic portfolio returns to: ..\data_processed\portfolio_returns_dynamic.parquet
portfolio
Markowitz_static    4006
EW_active           3946
EW_top5vol          3946
MOM_top5            3946
LOWVOL_top5         3946
Name: count, dtype: int64


In [29]:
port_returns.head()


Unnamed: 0,date,portfolio,ret
15784,2010-01-05,Markowitz_static,0.003374
15785,2010-01-06,Markowitz_static,-0.002281
15786,2010-01-07,Markowitz_static,0.004022
15787,2010-01-08,Markowitz_static,0.000735
15788,2010-01-11,Markowitz_static,-0.002843


In [30]:
class AssetBanditEnv:
    def __init__(self, returns_long: pd.DataFrame, active_universe: pd.DataFrame):
        """
        returns_long: DataFrame with columns ['date', 'ticker', 'ret']
        active_universe: DataFrame with ['date', 'ticker'] for active tickers
        """
        # Merge to keep only active rows
        df = returns_long.merge(
            active_universe[["date", "ticker"]],
            on=["date", "ticker"],
            how="inner",
        )
        df = df.sort_values("date")

        self.data = df
        self.dates = df["date"].unique()
        self.current_idx = 0

    def reset(self):
        self.current_idx = 0

    def current_date(self):
        return self.dates[self.current_idx]

    def available_arms(self):
        d = self.current_date()
        return (
            self.data.loc[self.data["date"] == d, "ticker"]
            .unique()
            .tolist()
        )

    def step(self, chosen_ticker: str):
        """
        chosen_ticker must be one of available_arms().
        Returns: date, ret, done
        """
        d = self.current_date()
        mask = (self.data["date"] == d) & (self.data["ticker"] == chosen_ticker)
        r = float(self.data.loc[mask, "ret"].iloc[0])

        self.current_idx += 1
        done = self.current_idx >= len(self.dates)

        return d, r, done


In [31]:
class PortfolioBanditEnv:
    """
    Bandit environment over portfolio templates.

    Data assumptions:
      - port_returns: ['date', 'portfolio', 'ret']
    """

    def __init__(self, port_returns: pd.DataFrame):
        df = port_returns.sort_values(["date", "portfolio"])
        self.data = df
        self.dates = df["date"].unique()
        self.current_idx = 0

    def reset(self):
        self.current_idx = 0

    def current_date(self):
        return self.dates[self.current_idx]

    def available_arms(self):
        """
        Portfolios actually available on the current date.
        """
        d = self.current_date()
        return (
            self.data.loc[self.data["date"] == d, "portfolio"]
            .unique()
            .tolist()
        )

    def step(self, portfolio_name: str):
        """
        portfolio_name must be in available_arms() for the current date.
        Returns: date, ret, done
        """
        d = self.current_date()
        mask = (self.data["date"] == d) & (self.data["portfolio"] == portfolio_name)

        if not mask.any():
            raise ValueError(
                f"No row for date={d} and portfolio={portfolio_name}. "
                f"Check available_arms() and your portfolio construction."
            )

        r = float(self.data.loc[mask, "ret"].iloc[0])

        self.current_idx += 1
        done = self.current_idx >= len(self.dates)

        return d, r, done


In [32]:
port_returns = pd.read_parquet(DATA_PROCESSED / "portfolio_returns_dynamic.parquet")
penv = PortfolioBanditEnv(port_returns)
penv.reset()

wealth = 1.0
while True:
    arms = penv.available_arms()
    # your portfolio-level TS policy chooses a portfolio_name here
    chosen_port = np.random.choice(arms)  # placeholder
    date, ret, done = penv.step(chosen_port)
    wealth *= (1.0 + ret)
    if done:
        break
