In [1]:
import sys, os, pickle
from joblib import Parallel, delayed
import pandas as pd
from jumpmodels.utils import filter_date_range
from jumpmodels.preprocess import StandardScalerPD, DataClipperStd
from jumpmodels.sparse_jump import SparseJumpModel
from sklearn.preprocessing import StandardScaler
from pypfopt.black_litterman import BlackLittermanModel, market_implied_prior_returns
from pypfopt.efficient_frontier import EfficientFrontier
import numpy as np
import matplotlib.pyplot as plt
from pypfopt import exceptions 

#sys.path.append('/Users/victor/Documents/thesis_vri_vp/vic_new')         # for mac
sys.path.append('C:\\Users\\victo\\git_new\\thesis_vri_vp\\vic_new')      # for windows
from feature_set_v2 import MergedDataLoader 


In [2]:
# 0) Global parameters ------------------------------------------------------------------
REFIT_FREQ        = "ME"        
MIN_TRAINING_YEARS= 8
MAX_TRAINING_YEARS= 12
INITIAL_TRAIN_START = "2002-05-31"
test_start        = "2017-01-01"

# Pick method to drive backtest: "grid", "bayes" or "history" ---------------------------
cv_choice = "bayes"

# Paths & tickers -----------------------------------------------------------------------
script_dir = os.getcwd()
base_dir   = os.path.abspath(os.path.join(script_dir, "..", ".."))
data_dir   = os.path.join(base_dir, "data_new")

factor_file = os.path.join(data_dir, "1estimation_index_returns.csv")
market_file = os.path.join(data_dir, "1macro_data.csv")
etf_file    = os.path.join(data_dir, "2trading_etf_returns_aligned.csv")

factors = ["iwf", "mtum", "qual", "size", "usmv", "vlue"]   # used everywhere

grid_df    = pd.read_parquet("cv_params_grid.parquet")
bayes_df   = pd.read_parquet("cv_params_bayes_v2.parquet") # v2 is the one searching between 20-2000
history_df = pd.read_parquet("cv_params_history.parquet")

In [3]:
# ──────────────────────────────────────────────────────────────
# HYPERPARAMETERS
# ──────────────────────────────────────────────────────────────
df_map = {
    "grid":    grid_df,
    "bayes":   bayes_df,
    "history": history_df
}
cv_df = df_map[cv_choice]

# ─────────────────────────────────────────────────────
# HYPERPARAMETER SMOOTHING SETUP
# ─────────────────────────────────────────────────────
# pick one
SMOOTH_METHOD = "none"   # options: "none", "rolling_median", "ewma"
SMOOTH_WINDOW = 3        # # of folds to include in the window
# ─────────────────────────────────────────────────────

# … right after cv_df = df_map[cv_choice] …
cv_df = cv_df.sort_values(["factor","date"])

if SMOOTH_METHOD == "none":
    # simply copy original λ & κ forward
    cv_df["sm_lambda"] = cv_df["best_lambda"]
    cv_df["sm_kappa"]  = cv_df["best_kappa"]

elif SMOOTH_METHOD == "rolling_median":
    # Centered rolling median
    cv_df["sm_lambda"] = (
        cv_df
        .groupby("factor")["best_lambda"]
        .transform(lambda x: x.rolling(SMOOTH_WINDOW, min_periods=1, center=True).median())
    )
    cv_df["sm_kappa"] = (
        cv_df
        .groupby("factor")["best_kappa"]
        .transform(lambda x: x.rolling(SMOOTH_WINDOW, min_periods=1, center=True).median())
    )

elif SMOOTH_METHOD == "ewma":
    # Exponential‐weight moving average
    cv_df["sm_lambda"] = (
        cv_df
        .groupby("factor")["best_lambda"]
        .transform(lambda x: x.ewm(span=SMOOTH_WINDOW, min_periods=1).mean())
    )
    cv_df["sm_kappa"] = (
        cv_df
        .groupby("factor")["best_kappa"]
        .transform(lambda x: x.ewm(span=SMOOTH_WINDOW, min_periods=1).mean())
    )

else:
    raise ValueError(f"Unknown SMOOTH_METHOD {SMOOTH_METHOD!r}")

# round κ back to integer
cv_df["sm_kappa"] = cv_df["sm_kappa"].round().astype(int)

# overwrite with the chosen values
cv_df["best_lambda"] = cv_df["sm_lambda"]
cv_df["best_kappa"]  = cv_df["sm_kappa"]

# (optional) drop helpers
cv_df.drop(columns=["sm_lambda","sm_kappa"], inplace=True)

# ─────────────────────────────────────────────────────
# Now build saved_hyperparams exactly as before
# ─────────────────────────────────────────────────────
saved_hyperparams = {}
for fac in factors:
    sub = cv_df[cv_df["factor"] == fac].sort_values("date")
    saved_hyperparams[fac] = [
        {
            "date":      row["date"],
            "new_lambda": row["best_lambda"],
            "new_kappa":  row["best_kappa"]
        }
        for _, row in sub.iterrows()
    ]


In [4]:
# ──────────────────────────────────────────────────────────────
# DATA‑LOADING BLOCK  (pulled from old notebook)
# ──────────────────────────────────────────────────────────────

# 1) Load full data for every factor + market ------------------------------------------------
factor_data_dict  = {}
factor_returns_ls = []

for fac in factors:
    print(f"Loading data for {fac}")
    data = MergedDataLoader(
        factor_file=factor_file,
        market_file=market_file,
        ver="v2",
        factor_col=fac
    ).load()

    common_idx = (data.X.index
                  .intersection(data.ret_ser.index)
                  .intersection(data.market_ser.index))

    X_full        = data.X.loc[common_idx]
    fac_ret_full  = data.ret_ser.loc[common_idx]
    mkt_ret_full  = data.market_ser.loc[common_idx]
    active_ret    = fac_ret_full - mkt_ret_full

    factor_data_dict[fac] = {
        "X"        : X_full,
        "fac_ret"  : fac_ret_full,
        "mkt_ret"  : mkt_ret_full,
        "active_ret": active_ret,
    }
    factor_returns_ls.append(fac_ret_full)

# save last loop’s mkt_ret_full as market series
all_market_ret = mkt_ret_full

# 2) Assemble master return dataframe (factors + Market + rf) -------------------------------
full_factors_df = pd.concat(factor_returns_ls, axis=1).dropna()
full_df = pd.concat([full_factors_df, all_market_ret], axis=1).dropna()
full_df.columns = factors + ["Market"]

# risk‑free
etf_df   = pd.read_csv(etf_file, index_col=0, parse_dates=True).dropna().sort_index()
rf_ser   = etf_df["rf"]
full_df  = pd.concat([full_df, rf_ser], axis=1).dropna()
full_df.columns = factors + ["Market", "rf"]

# 3) Define test index (everything from 2017‑01‑01 on) --------------------------------------
test_slice = full_df.loc[test_start:]
test_index = test_slice.index.sort_values()
# ──────────────────────────────────────────────────────────────


Loading data for iwf
Loading data for mtum
Loading data for qual
Loading data for size
Loading data for usmv
Loading data for vlue


In [5]:
# ------------------------------------------------------------
# 1  BUILD & CACHE FACTOR‑VIEWS  (run once, takes minutes)
# ------------------------------------------------------------
VIEWS_FILE = "bayes_factor_views_v2.pkl" # "SAVEfactor_views.pkl" is the views for the outperforming sharpe run
FORCE_REBUILD = False 

def _fit_one_factor(fac, refit_date, test_dates_chunk,
                    factor_data_dict, hyperparams,
                    min_years, max_years, init_start):

    # ---------- helpers ----------
    def get_train_window(current_date, full_data):
        train_end  = current_date
        train_start= max(train_end - pd.DateOffset(years=max_years),
                         pd.to_datetime(init_start))
        if (train_end - train_start) < pd.Timedelta(days=365.25*min_years):
            train_start = train_end - pd.DateOffset(years=min_years)
        idx = full_data.index
        subset = idx[(idx >= train_start) & (idx <= train_end)]
        start_date, end_date = subset.min(), subset.max()
        return start_date, end_date 

    # ---------- data ----------
    fac_data = factor_data_dict[fac]
    X   = fac_data["X"]
    ret = fac_data["fac_ret"]
    act = fac_data["active_ret"]

    lam = hyperparams["new_lambda"]
    kp  = hyperparams["new_kappa"]
    train_start, train_end = get_train_window(refit_date, X)

    # ---------- preprocess ----------
    clipper = DataClipperStd(mul=3.0)
    scaler  = StandardScaler()
    X_train = scaler.fit_transform(clipper.fit_transform(
                 filter_date_range(X, train_start, train_end)))
    active_train = filter_date_range(act, train_start, train_end)

    # ---------- fit SJM ----------
    sjm = SparseJumpModel(n_components=2,
                          max_feats=int(kp**2),
                          jump_penalty=lam)
    
    train_idx = filter_date_range(X, train_start, train_end).index
    X_train_df = pd.DataFrame(X_train, index=train_idx, columns=X.columns)
    sjm.fit(X_train_df, ret_ser=active_train, sort_by="cumret")

    ret_train = filter_date_range(ret, train_start, train_end)

    # regime‑level abs returns
    train_states = sjm.predict(X_train_df)
    abs_ret = {}
    for st in range(2):
        st_idx = (train_states==st)
        abs_ret[st] = ret_train.loc[st_idx].mean() * 252

    # ---------- online prediction for test dates ----------
    states = {}
    for day in test_dates_chunk:
        X_hist = X.loc[:day]                          # all history up to 'day'
        temp_clipper = DataClipperStd(mul=3.0)
        X_hist_clip  = temp_clipper.fit_transform(X_hist)

        temp_scaler  = StandardScaler()
        _ = temp_scaler.fit_transform(X_hist_clip)    # fit on *all* history

        if day in X.index:
            X_day_clip   = temp_clipper.transform(X.loc[[day]])
            X_day_scaled = temp_scaler.transform(X_day_clip)
            states[day]  = sjm.predict_online(
                pd.DataFrame(X_day_scaled,
                            index=[day],
                            columns=X.columns)).iloc[0]

    # assemble mini‑df for this factor & period
    out = pd.DataFrame({"state": pd.Series(states)},
                       index=list(states.keys()))
    out["ann_abs_ret"] = out["state"].map(abs_ret)
    return fac, out

def build_factor_views(factor_data_dict, saved_hyperparams, factors,
                       test_index,
                       refit_freq="ME", min_years=8, max_years=12,
                       init_start="2002-05-31"):

    views = {f:[] for f in factors}
    refit_dates = (test_index.to_series()
                   .resample(refit_freq)
                   .last()
                   .dropna())

    for j, refit_date in enumerate(refit_dates):
        if j < len(refit_dates)-1:
            next_refit = refit_dates.iloc[j+1]
        else:
            next_refit = test_index[-1]
        test_mask = (test_index>refit_date)&(test_index<=next_refit)
        test_chunk = test_index[test_mask]

        # ---- parallel over factors ----
        jobs = []
        for fac in factors:
            # latest hyperparams before refit_date
            hp_hist = [h for h in saved_hyperparams[fac]
                       if pd.to_datetime(h["date"])<=refit_date]
            if not hp_hist: continue
            hp = hp_hist[-1]
            jobs.append(delayed(_fit_one_factor)(
                fac, refit_date, test_chunk,
                factor_data_dict, hp,
                min_years, max_years, init_start))
        for fac, df in Parallel(n_jobs=-1)(jobs):
            views[fac].append(df)

    # concat & tidy
    for fac in factors:
        views[fac] = (pd.concat(views[fac])
                      .sort_index()
                      .loc[:,["state","ann_abs_ret"]])
    return views


# --------- build or load ----------         
if FORCE_REBUILD or not os.path.exists(VIEWS_FILE):
    factor_views = build_factor_views(factor_data_dict, saved_hyperparams, factors, 
                                      test_index,
                                      refit_freq=REFIT_FREQ, 
                                      min_years=8, max_years=12, init_start="2002-05-31")
    with open(VIEWS_FILE, "wb") as f:
        pickle.dump(factor_views, f)
else:
    with open(VIEWS_FILE, "rb") as f:
        factor_views = pickle.load(f)



  views[fac] = (pd.concat(views[fac])
  views[fac] = (pd.concat(views[fac])
  views[fac] = (pd.concat(views[fac])
  views[fac] = (pd.concat(views[fac])
  views[fac] = (pd.concat(views[fac])
  views[fac] = (pd.concat(views[fac])


In [6]:
# ------------------------------------------------------------
# 2  FAST BLACK‑LITTERMAN FUNCTION  (run as often as you like)
# ------------------------------------------------------------

def ewm_covariance(returns, halflife=126, min_periods=60):
    ewm_cov = returns.ewm(halflife=halflife,
                          adjust=False,
                          min_periods=min_periods).cov()
    if returns.empty: return pd.DataFrame()
    return ewm_cov.loc[returns.index[-1]]

def detect_state_shifts(views, factors):
    # 1 col per factor with the model‑state
    state_df = pd.concat({f: views[f]["state"] for f in factors}, axis=1)
    # True when *any* factor changes state vs. the day before
    return state_df.ne(state_df.shift()).any(axis=1)

def run_bl_once(views, returns_df,
                shift_series=None,
                tau=0.05, delta=2.5,
                trade_market=True,
                use_bl_cov=False,
                allow_market_short=False,
                allow_factor_short=False,
                use_bl_prior=False,     
                fallback_strategy="HOLD_RFR", # "HOLD_RFR", "SHORT_MARKET"
                tcost=0.0005):

    assets  = returns_df.columns.tolist()
    factors = list(views.keys())
    if trade_market:
        trade_assets = [a for a in returns_df.columns if a != "rf"]
    else:
        trade_assets = [a for a in returns_df.columns
                        if a not in {"rf", "Market"}]
    cash_asset = "rf"

    market_caps = {etf: 1.0 for etf in trade_assets}

    # ---------- per‑asset bounds ----------
    bounds = []
    for a in trade_assets:
        if a == "Market":
            bounds.append((-1, 1) if allow_market_short else (0, 1))
        else:
            bounds.append((-1, 1) if allow_factor_short else (0, 1))

    w = pd.DataFrame(index=returns_df.index,
                     columns=trade_assets + [cash_asset], # was + [cash_asset]
                     dtype=float)

    for t in returns_df.index:
        # ------ carry weights forward if no trade today ------
        if shift_series is not None and not shift_series.loc[t]:
                w.loc[t] = w.shift(1).loc[t] 
                continue
        
        # ------ optimiser block ------
        hist  = returns_df[trade_assets].loc[:t].iloc[:-1]
        cov   = ewm_covariance(hist) * 252
        if cov.empty or cov.isna().any().any():
            continue

        if use_bl_prior:
            prior_for_bl = market_implied_prior_returns(market_caps, delta, cov)
        else:
            prior_for_bl = "equal"          # or None


        q = {fac: views[fac].loc[t, "ann_abs_ret"] for fac in factors}

        bl = BlackLittermanModel(
                cov,
                pi=prior_for_bl,
                tau=tau,
                delta=delta,
                absolute_views=q)

        cov_for_ef = bl.bl_cov() if use_bl_cov else cov

        ef = EfficientFrontier(bl.bl_returns(), cov_for_ef,
                               weight_bounds=bounds,
                               solver="SCS"                         # "OSQP" is standard and probably fastest 
                               )

        rf_annual = returns_df.loc[t, cash_asset] * 252

        if (bl.bl_returns() <= rf_annual).all():
            w.loc[t] = 0.0                                             # start clean

            if fallback_strategy == "HOLD_RFR":
                w.loc[t, cash_asset] = 1.0                             # 100 % cash

            elif fallback_strategy == "SHORT_MARKET" and "Market" in trade_assets:
                w.loc[t, "Market"] = -1.0
                w.loc[t, cash_asset] = 1.0      # offset the short


            # add more elif blocks here for any new fallbacks you invent
            continue                                                   # next date

        # —— Otherwise run the optimiser (now guaranteed feasible) ——
        ef.max_sharpe(risk_free_rate=rf_annual)

        w_t = ef.clean_weights()
        w.loc[t, trade_assets] = pd.Series(w_t)

    # ---------- P&L ----------
    pnl = (w.shift(1).fillna(0) * returns_df).sum(axis=1)
    if tcost > 0:
        pnl -= w.diff().abs().sum(axis=1).fillna(0) * tcost

    return w, pnl


In [7]:
# ------------------------------------------------------------
# 3  QUICK EXPERIMENTS
# ------------------------------------------------------------
def annualized_sharpe(r):          # helper
    return (r.mean() / r.std()) * np.sqrt(252)

def ann_turnover(w):
    daily_turn = w.diff().abs().sum(axis=1).mean()
    return daily_turn * 252

cfgs = [
    dict(label="Long only base",  tau=0.05, delta=2.5,
         use_bl_cov=False, use_bl_prior=False, allow_market_short=False, allow_factor_short=False),
    dict(label="+ BL prior",  tau=0.05, delta=2.5,
         use_bl_cov=False, use_bl_prior=True, allow_market_short=False, allow_factor_short=False),
#     dict(label="Long‑Only - allow mkt short",  tau=0.05, delta=2.5,
#          use_bl_cov=False, allow_market_short=True, allow_factor_short=False),
    dict(label="+ BL cov",  tau=0.05, delta=2.5,
         use_bl_cov=True, use_bl_prior=False, allow_market_short=False, allow_factor_short=False),
    dict(label="+ BL cov + BL prior",  tau=0.05, delta=2.5,
         use_bl_cov=True, use_bl_prior=True, allow_market_short=False, allow_factor_short=False)
#     dict(label="BL cov - allow mkt short",  tau=0.05, delta=2.5,
#          use_bl_cov=True, allow_market_short=True, allow_factor_short=False),
]

#     dict(label="L/S Market", tau=0.05, delta=2.5,
#         use_bl_cov=False, allow_market_short=True,  allow_factor_short=False),

test_df = full_df.loc[test_index]

shift_days = detect_state_shifts(factor_views, factors).reindex(test_df.index, fill_value=False)

shift_days.iloc[0] = True

run_results = {}                   # label → dict(rets, wts, cfg)
for c in cfgs:
    label = c.pop("label")         # remove label before **c
    wts, rets = run_bl_once(factor_views, test_df, shift_series=shift_days, **c)  # set shift_series to None to trade daily. set to shift_days to only trade on regime shifts
    run_results[label] = dict(returns=rets, weights=wts, cfg=c)
    #print(f"{label:12s}  Sharpe {annualized_sharpe(rets):6.3f}")

print()

rows = []
for label, res in run_results.items():
    rows.append({
        "Strategy": label,
        "Sharpe": annualized_sharpe(res["returns"]),
        "Turnover": ann_turnover(res["weights"])
    })

# make and print the table
df_table   = pd.DataFrame(rows)
print(df_table.to_string(index=False,float_format=lambda x: f"{x:.3f}"))



           Strategy  Sharpe  Turnover
     Long only base   0.254    42.881
         + BL prior   0.349    51.211
           + BL cov   0.254    42.793
+ BL cov + BL prior   0.353    51.116


In [31]:
# build a list of dicts from your run_results
rows = []
for label, res in run_results.items():
    rows.append({
        "Strategy": label,
        "Sharpe": annualized_sharpe(res["returns"]),
        "Turnover": ann_turnover(res["weights"])
    })

# make and print the table
df_table   = pd.DataFrame(rows)
print(df_table.to_string(index=False,float_format=lambda x: f"{x:.3f}"))

           Strategy  Sharpe  Turnover
     Long only base   0.603    16.801
         + BL prior   0.507    20.849
           + BL cov   0.605    16.777
+ BL cov + BL prior   0.509    20.738


In [8]:
# ────────────────────────────────────────────────────────────────
# Interactive comparison of configs  +  weights‑over‑time viewer
# ────────────────────────────────────────────────────────────────
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import SelectMultiple, Dropdown, ToggleButtons, VBox, HBox, interact

# 1) -------------  compute the fixed quarterly‑EW benchmark (once) ----
try:
    quarterly_ew_rets
    quarterly_ew_cum
except NameError:
    TCOST = 0.0005                                   # keep in‑sync with runs
    overlap_idx     = test_index.intersection(etf_df.index)
    test_returns_df = etf_df.loc[overlap_idx]        # factors + Market + rf
    n_assets        = test_returns_df.shape[1]

    first_test_date = test_returns_df.index[0]
    q_dates = (test_returns_df.index.to_series()
               .resample("QE").last().dropna().index)
    if first_test_date not in q_dates:
        q_dates = q_dates.insert(0, first_test_date)

    prev_w            = np.zeros(n_assets)
    quarterly_ew_rets = pd.Series(index=test_returns_df.index, dtype=float)

    for i in range(len(q_dates) - 1):
        start_q, end_q = q_dates[i], q_dates[i + 1]
        mask    = (test_returns_df.index > start_q) & (test_returns_df.index <= end_q)
        dates   = test_returns_df.index[mask]

        w              = np.ones(n_assets) / n_assets             # new EW weights
        turnover       = np.sum(np.abs(w - prev_w))
        rebalance_cost = turnover * TCOST

        for day in dates:
            r_i = test_returns_df.loc[day].values
            r_p = np.dot(w, r_i) - rebalance_cost
            quarterly_ew_rets.loc[day] = r_p

            w = w * (1 + r_i)
            if (1 + r_p) != 0:
                w /= (1 + r_p)
            rebalance_cost = 0.0                                   # cost once only
        prev_w = w.copy()

    quarterly_ew_rets = quarterly_ew_rets.dropna()
    quarterly_ew_cum  = quarterly_ew_rets.cumsum()

# 2) -------------  widgets for return / Sharpe / weights --------------
labels   = list(run_results)
cmp_sel  = SelectMultiple(
    options     = labels,
    value       = tuple(labels[:2]),         # default: first two configs
    description = "Compare:",
    rows        = min(8, len(labels)),
    style       = {"description_width":"70px"},
)
wgt_sel  = Dropdown(
    options     = labels,
    value       = labels[0],                 # default: first config
    description = "Weights:",
    style       = {"description_width":"70px"},
)
sign_sel = ToggleButtons(
    options     = [("Both", "both"), ("Positive", "pos"), ("Negative", "neg")],
    value       = "both",
    description = "Show:",
    style       = {"description_width":"70px"},
)

def annualized_sharpe(x):
    return (x.mean() / x.std()) * np.sqrt(252)

# build list of all dates where any config actually has weights
active_dates = pd.Index([])
for cfg in run_results.values():
    w = cfg["weights"].drop(columns="rf").dropna(how="all")
    active_dates = active_dates.union(w.index)
active_dates = active_dates.sort_values()

# filter EW returns to exactly those dates, then re‑cumulate
ew_rets = quarterly_ew_rets.loc[quarterly_ew_rets.index.isin(active_dates)]
ew_cum  = ew_rets.cumsum()

def _update(compare, weights_cfg, sign_filter):
    if not compare:
        print("Pick ≥1 config to display."); return

    # ---- cumulative returns ----
    plt.figure(figsize=(12,6))
    ew_cum.plot(label="Quarterly EW (5 bps)", lw=2)
    for lab in compare: 
        run_results[lab]["returns"].cumsum().plot(label=lab, lw=1.5)
    plt.title("Cumulative Returns"); plt.grid(True); plt.legend(); plt.show()

    # ---- Sharpe table ----
    print("Annualised Sharpe ratios")
    print(f"  EW benchmark : {annualized_sharpe(ew_rets):6.3f}")
    for lab in compare:
        print(f"  {lab:<12s}: {annualized_sharpe(run_results[lab]['returns']):6.3f}")

    # ---- weights over time for the chosen config ----
    wdf = run_results[weights_cfg]["weights"].drop(columns='rf').dropna(how='all')
    if sign_filter == "pos":        # show only positive weights
        wdf = wdf.where(wdf > 0, 0)
    elif sign_filter == "neg":      # show only negative weights
        wdf = wdf.where(wdf < 0, 0)

    plt.figure(figsize=(12,6))
    plt.stackplot(wdf.index, wdf.T.values, labels=wdf.columns)
    filt = {"both":"(all)", "pos":"(positive)", "neg":"(negative)"}[sign_filter]
    plt.title(f"Weights over time – {weights_cfg} {filt}")
    plt.xlabel("Trading days"); plt.ylabel("Weight")
    plt.legend(loc="center left", bbox_to_anchor=(1,.5), fontsize="small")
    plt.tight_layout(); plt.show()

ui = VBox([HBox([cmp_sel, wgt_sel, sign_sel])])
interact(_update, compare=cmp_sel, weights_cfg=wgt_sel, sign_filter=sign_sel)
ui


interactive(children=(SelectMultiple(description='Compare:', index=(0, 1), options=('Long only base', '+ BL pr…

VBox(children=(HBox(children=(SelectMultiple(description='Compare:', index=(0, 1), options=('Long only base', …

In [11]:
# --- helper to reproduce the BL expected‑return vector for ONE date ---
from pypfopt.black_litterman import BlackLittermanModel, market_implied_prior_returns

def bl_expected_returns_for_day(day, returns_df, views, tau=0.05, delta=2.5,
                                use_bl_prior=False):
    trade_assets = [c for c in returns_df.columns if c != "rf"]
    cov = ewm_covariance(returns_df[trade_assets].loc[:day].iloc[:-1]) * 252
    market_caps = {a: 1.0 for a in trade_assets}

    if use_bl_prior:
        pi = market_implied_prior_returns(market_caps, delta, cov)
    else:
        pi = "equal"          # same as in your main function

    q = {fac: views[fac].loc[day, "ann_abs_ret"] for fac in views.keys()}

    bl = BlackLittermanModel(cov, pi=pi, tau=tau, delta=delta,
                             absolute_views=q)
    return bl.bl_returns()    # pandas Series of ERs


In [23]:
import pandas as pd

# figure out your first‐month window
first_date = min(cfg["weights"].index.min() for cfg in run_results.values())
end_1m    = first_date + pd.DateOffset(months=1)

for name, cfg in run_results.items():
    # drop the rf‐column and slice
    w = cfg["weights"].drop(columns="rf")
    w1 = w.loc[(w.index >= first_date) & (w.index <  end_1m)]
    
    # check if any weight ever deviates from zero
    has_pos = (w1.abs() > 0).any().any()
    print(f"{name:20s}: {'positions' if has_pos else 'no positions'}")


Long only base      : no positions
+ BL prior          : no positions
+ BL cov            : no positions
+ BL cov + BL prior : no positions
