In [None]:
import os, glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import statsmodels.api as sm

project_root = Path.cwd().parents[1]  # /home/ug/orlovsd2/csc1171
data_dir = str((project_root / "data/clean/SP500_ETF_FX_Crypto_Daily").resolve())

In [None]:
# test

project_root = Path.cwd().parents[1]  # /home/ug/orlovsd2/csc1171
data_dir = str((project_root / "data/clean/SP500_ETF_FX_Crypto_Daily").resolve())
print("cwd:", os.getcwd())
print("data_dir:", data_dir)
print("exists? ->", os.path.exists(data_dir))
print("train:", len(glob.glob(os.path.join(data_dir, "*_cleaned_2008_2020.csv"))))
print("test :", len(glob.glob(os.path.join(data_dir, "*_cleaned_2020_onward.csv"))))
print("first few (train):", glob.glob(os.path.join(data_dir, "*_cleaned_2008_2020.csv"))[:5])
print("first few (test) :", glob.glob(os.path.join(data_dir, "*_cleaned_2020_onward.csv"))[:5])

pairs = collect_train_test_pairs(data_dir, tickers=tickers_to_plot)
print(f"paired tickers: {len(pairs)}")

# for tkr, (train_path, test_path) in sorted(pairs.items()):
#     df_tr = load_clean_csv(train_path)
#     df_te = load_clean_csv(test_path)

#     ret_tr = monthly_returns_from_real_close(df_tr)
#     ret_te = monthly_returns_from_real_close(df_te)

#     if ret_tr.empty and ret_te.empty:
#         continue

#     plot_ticker(tkr, ret_tr, ret_te, sigma, method)


In [None]:
# based on Frank's outlier test
sigma = 3.0
method = "overall"
tickers_to_plot = None

def load_clean_csv(path):
    try:
        df = pd.read_csv(path, parse_dates=["Date"]).set_index("Date")
    except ValueError:
        df = pd.read_csv(path, index_col=0, parse_dates=[0])
    df.index = pd.to_datetime(df.index).tz_localize(None)
    df = df.sort_index()
    if "date" in df.columns and df.index.name is None:
        df["date"] = pd.to_datetime(df["date"])
        df = df.set_index("date")
    return df

def returns_from_real_close(df, rule):
    if "real_close" not in df.columns:
        return pd.Series(dtype=float)
    close_p = df["real_close"].resample(rule).last()
    ret_p = close_p.pct_change().dropna()
    return ret_p

def detect_outliers_overall(series, s):
    mu = series.mean()
    sd = series.std()
    if sd == 0 or np.isnan(sd):
        return pd.Series(False, index=series.index)
    z = (series - mu) / sd
    return z.abs() > s

def detect_outliers_seasonal(series, s):
    df_ = pd.DataFrame({"ret": series})
    df_["month"] = df_.index.month
    stats = df_.groupby("month")["ret"].agg(["mean", "std"])
    mu = df_["month"].map(stats["mean"])
    sd = df_["month"].map(stats["std"])
    z = (df_["ret"] - mu) / sd.replace(0, np.nan)
    mask = (z.abs() > s).fillna(False)
    return mask.reindex(series.index)

def ticker_from_path(path):
    base = os.path.basename(path)
    return base.split("_cleaned", 1)[0]

def collect_train_test_pairs(data_dir, tickers=None):
    train_paths = glob.glob(os.path.join(data_dir, "*_cleaned_2008_2020.csv"))
    test_paths  = glob.glob(os.path.join(data_dir, "*_cleaned_2020_onward.csv"))
    train_map = {ticker_from_path(p): p for p in train_paths}
    test_map  = {ticker_from_path(p): p for p in test_paths}
    pairs = {}
    for tkr in sorted(set(train_map).intersection(test_map)):
        if tickers and tkr not in tickers:
            continue
        pairs[tkr] = (train_map[tkr], test_map[tkr])
    return pairs

def plot_ticker(tkr, ret_train, ret_test, s, meth, freq_tag):
    combined = pd.concat([ret_train, ret_test]).sort_index()
    if meth == "overall":
        mask = detect_outliers_overall(combined, s)
        outlier_label = f"outlier (|z|>{s:g}) overall"
    elif meth == "seasonal":
        mask = detect_outliers_seasonal(combined, s)
        outlier_label = f"outlier (|z|>{s:g}) seasonal"
    else:
        raise ValueError("method must be 'overall' or 'seasonal'")

    fig, ax = plt.subplots(figsize=(12, 6))
    if not ret_train.empty:
        ax.plot(ret_train.index, ret_train.values, marker='.', linewidth=1.0, label="train 2008–2020")
    if not ret_test.empty:
        ax.plot(ret_test.index, ret_test.values, marker='.', linewidth=1.0, label="test 2020–onward")
    if mask.any():
        ax.scatter(combined.index[mask], combined[mask].values, s=80, zorder=5, label=outlier_label)
    ax.axhline(0.0, linewidth=0.8)
    ax.set_title(f"{tkr}: {freq_tag} returns with outliers ({meth}, σ={s:g})")
    ax.set_xlabel("date")
    ax.set_ylabel(f"{freq_tag} return")
    ax.grid(True)
    ax.legend()
    plt.show()

freqs = [("weekly","W-FRI"), ("monthly","ME"), ("yearly","YE-DEC")]

pairs = collect_train_test_pairs(data_dir, tickers=tickers_to_plot)
print("paired tickers:", len(pairs))

for tkr, (train_path, test_path) in sorted(pairs.items()):
    df_tr = load_clean_csv(train_path)
    df_te = load_clean_csv(test_path)
    for freq_tag, rule in freqs:
        ret_tr = returns_from_real_close(df_tr, rule)
        ret_te = returns_from_real_close(df_te, rule)
        if ret_tr.empty and ret_te.empty:
            continue
        plot_ticker(tkr, ret_tr, ret_te, sigma, method, freq_tag)


In [None]:
# learn 2008–2020 month/week/year mean returns, predict 2020+ by matching calendar slot, and plot actual vs expected

sigma = 3.0
method = "overall"
tickers_to_plot = None
show_outliers = False

def load_clean_csv(path):
    try:
        df = pd.read_csv(path, parse_dates=["Date"]).set_index("Date")
    except ValueError:
        df = pd.read_csv(path, index_col=0, parse_dates=[0])
    df.index = pd.to_datetime(df.index).tz_localize(None)
    df = df.sort_index()
    if "date" in df.columns and df.index.name is None:
        df["date"] = pd.to_datetime(df["date"])
        df = df.set_index("date")
    return df

def returns_from_real_close(df, rule):
    if "real_close" not in df.columns:
        return pd.Series(dtype=float)
    close_p = df["real_close"].resample(rule).last()
    ret_p = close_p.pct_change().dropna()
    return ret_p

def detect_outliers_overall(series, s):
    mu = series.mean()
    sd = series.std()
    if sd == 0 or np.isnan(sd):
        return pd.Series(False, index=series.index)
    z = (series - mu) / sd
    return z.abs() > s

def detect_outliers_seasonal(series, s):
    df_ = pd.DataFrame({"ret": series})
    df_["month"] = df_.index.month
    stats = df_.groupby("month")["ret"].agg(["mean", "std"])
    mu = df_["month"].map(stats["mean"])
    sd = df_["month"].map(stats["std"])
    z = (df_["ret"] - mu) / sd.replace(0, np.nan)
    mask = (z.abs() > s).fillna(False)
    return mask.reindex(series.index)

def ticker_from_path(path):
    base = os.path.basename(path)
    return base.split("_cleaned", 1)[0]

def collect_train_test_pairs(data_dir, tickers=None):
    train_paths = glob.glob(os.path.join(data_dir, "*_cleaned_2008_2020.csv"))
    test_paths  = glob.glob(os.path.join(data_dir, "*_cleaned_2020_onward.csv"))
    train_map = {ticker_from_path(p): p for p in train_paths}
    test_map  = {ticker_from_path(p): p for p in test_paths}
    pairs = {}
    for tkr in sorted(set(train_map).intersection(test_map)):
        if tickers and tkr not in tickers:
            continue
        pairs[tkr] = (train_map[tkr], test_map[tkr])
    return pairs

def seasonal_profile_on_train(ret_train, freq_tag):
    idx = ret_train.index
    if freq_tag == "monthly":
        key = idx.month
        prof = pd.Series(ret_train.values, index=key).groupby(level=0).mean()
        return prof  # index 1..12
    elif freq_tag == "weekly":
        # ISO week number (1..53)
        week = idx.isocalendar().week.to_numpy()
        prof = pd.Series(ret_train.values, index=week).groupby(level=0).mean()
        return prof  # index 1..53 (some weeks may be missing)
    elif freq_tag == "yearly":
        # no within-year seasonality -> constant mean
        return pd.Series({0: ret_train.mean()})
    else:
        return pd.Series(dtype=float)

def predict_seasonal_on_test(ret_train, ret_test, freq_tag):
    prof = seasonal_profile_on_train(ret_train, freq_tag)
    if prof.empty:
        return pd.Series(index=ret_test.index, dtype=float)

    if freq_tag == "monthly":
        keys = ret_test.index.month
        yhat = pd.Series(index=ret_test.index, dtype=float)
        for k in np.unique(keys):
            if k in prof.index:
                yhat[keys == k] = prof.loc[k]
        return yhat
    elif freq_tag == "weekly":
        keys = ret_test.index.isocalendar().week.to_numpy()
        yhat = pd.Series(index=ret_test.index, dtype=float)
        for k in np.unique(keys):
            if k in prof.index:
                yhat[keys == k] = prof.loc[k]
        return yhat
    elif freq_tag == "yearly":
        const = prof.iloc[0] if not prof.empty else np.nan
        return pd.Series(const, index=ret_test.index)
    else:
        return pd.Series(index=ret_test.index, dtype=float)

def plot_test_vs_pred(tkr, ret_train, ret_test, freq_tag, s, meth, show_outliers=False):
    yhat = predict_seasonal_on_test(ret_train, ret_test, freq_tag)

    fig, ax = plt.subplots(figsize=(12, 6))
    if not ret_test.empty:
        ax.plot(ret_test.index, ret_test.values, marker='.', linewidth=1.0, label="actual 2020–onward")
    if not yhat.empty:
        ax.plot(yhat.index, yhat.values, linewidth=2.0, label="predicted seasonal (fit on 2008–2020)")

    if show_outliers and not ret_test.empty:
        if meth == "overall":
            mask = detect_outliers_overall(ret_test, s)
        else:
            mask = detect_outliers_seasonal(ret_test, s)
        if mask.any():
            ax.scatter(ret_test.index[mask], ret_test[mask].values, s=80, zorder=5, label=f"outlier (|z|>{s:g})")

    ax.axhline(0.0, linewidth=0.8)
    ax.set_title(f"{tkr}: {freq_tag} returns — seasonal prediction (train) vs actual (test)")
    ax.set_xlabel("date")
    ax.set_ylabel(f"{freq_tag} return")
    ax.grid(True)
    ax.legend()
    if not ret_test.empty:
        ax.set_xlim(ret_test.index.min(), ret_test.index.max())
    plt.show()

freqs = [("weekly","W-FRI"), ("monthly","ME"), ("yearly","YE-DEC")]

pairs = collect_train_test_pairs(data_dir, tickers=tickers_to_plot)
print("paired tickers:", len(pairs))

for tkr, (train_path, test_path) in sorted(pairs.items()):
    df_tr = load_clean_csv(train_path)
    df_te = load_clean_csv(test_path)
    for freq_tag, rule in freqs:
        ret_tr = returns_from_real_close(df_tr, rule)
        ret_te = returns_from_real_close(df_te, rule)
        if ret_te.empty:
            continue
        plot_test_vs_pred(tkr, ret_tr, ret_te, freq_tag, sigma, method, show_outliers=show_outliers)


In [None]:
# fit 2008–2020 returns with ols (time trend + seasonal dummies [+ volume z-scored]), predict on 2020+ timestamps, plot actual vs predicted

sigma = 3.0
method = "overall"
tickers_to_plot = None

def load_clean_csv(path):
    try:
        df = pd.read_csv(path, parse_dates=["Date"]).set_index("Date")
    except ValueError:
        df = pd.read_csv(path, index_col=0, parse_dates=[0])
    df.index = pd.to_datetime(df.index).tz_localize(None)
    df = df.sort_index()
    if "date" in df.columns and df.index.name is None:
        df["date"] = pd.to_datetime(df["date"])
        df = df.set_index("date")
    return df

def resample_returns_and_volume(df, rule):
    has_vol = "volume" in df.columns
    close_p = df["real_close"].resample(rule).last()
    ret_p = close_p.pct_change()
    if has_vol:
        vol_p = df["volume"].resample(rule).sum()
        out = pd.DataFrame({"ret": ret_p, "volume": vol_p})
    else:
        out = pd.DataFrame({"ret": ret_p})
    return out.dropna()

def ticker_from_path(path):
    base = os.path.basename(path)
    return base.split("_cleaned", 1)[0]

def collect_train_test_pairs(data_dir, tickers=None):
    train_paths = glob.glob(os.path.join(data_dir, "*_cleaned_2008_2020.csv"))
    test_paths  = glob.glob(os.path.join(data_dir, "*_cleaned_2020_onward.csv"))
    train_map = {ticker_from_path(p): p for p in train_paths}
    test_map  = {ticker_from_path(p): p for p in test_paths}
    pairs = {}
    for tkr in sorted(set(train_map).intersection(test_map)):
        if tickers and tkr not in tickers:
            continue
        pairs[tkr] = (train_map[tkr], test_map[tkr])
    return pairs

def make_design(df_train, df_test, freq_tag):
    df_tr = df_train.copy()
    df_te = df_test.copy()
    df_tr["t"] = np.arange(len(df_tr), dtype=float)
    df_te["t"] = np.arange(len(df_tr), len(df_tr)+len(df_te), dtype=float)

    if "volume" in df_tr.columns and "volume" in df_te.columns:
        mu = df_tr["volume"].mean()
        sd = df_tr["volume"].std()
        if sd == 0 or np.isnan(sd):
            df_tr["vol_z"] = 0.0
            df_te["vol_z"] = 0.0
        else:
            df_tr["vol_z"] = (df_tr["volume"] - mu) / sd
            df_te["vol_z"] = (df_te["volume"] - mu) / sd
        use_vol = True
    else:
        use_vol = False

    if freq_tag == "monthly":
        df_tr["slot"] = df_tr.index.month.astype(int)
        df_te["slot"] = df_te.index.month.astype(int)
        d_tr = pd.get_dummies(df_tr["slot"].astype("category"), drop_first=True, prefix="m")
        d_te = pd.get_dummies(df_te["slot"].astype("category"), drop_first=True, prefix="m")
        d_tr, d_te = d_tr.align(d_te, join="outer", axis=1, fill_value=0)
    elif freq_tag == "weekly":
        df_tr["slot"] = df_tr.index.isocalendar().week.astype(int).to_numpy()
        df_te["slot"] = df_te.index.isocalendar().week.astype(int).to_numpy()
        d_tr = pd.get_dummies(df_tr["slot"].astype("category"), drop_first=True, prefix="w")
        d_te = pd.get_dummies(df_te["slot"].astype("category"), drop_first=True, prefix="w")
        d_tr, d_te = d_tr.align(d_te, join="outer", axis=1, fill_value=0)
    else:
        d_tr = pd.DataFrame(index=df_tr.index)
        d_te = pd.DataFrame(index=df_te.index)

    x_tr_cols = ["t"] + (["vol_z"] if use_vol else []) + list(d_tr.columns)
    x_te_cols = ["t"] + (["vol_z"] if use_vol else []) + list(d_te.columns)

    X_tr = sm.add_constant(pd.concat([df_tr[["t"] + (["vol_z"] if use_vol else [])], d_tr], axis=1).astype(float), has_constant="add")
    X_te = sm.add_constant(pd.concat([df_te[["t"] + (["vol_z"] if use_vol else [])], d_te], axis=1).astype(float), has_constant="add")
    y_tr = df_tr["ret"].astype(float)

    return X_tr, y_tr, X_te

def fit_predict_train_ols(X_tr, y_tr, X_te):
    ok = y_tr.notna()
    X = X_tr.loc[ok]
    y = y_tr.loc[ok]
    if len(y) < (X.shape[1] + 1):
        return pd.Series(index=X_te.index, dtype=float), None
    res = sm.OLS(y, X).fit()
    yhat_te = pd.Series(res.predict(X_te), index=X_te.index)
    return yhat_te, res

def plot_test_actual_vs_pred(tkr, freq_tag, ret_test, yhat_te, res=None):
    fig, ax = plt.subplots(figsize=(12,6))
    ax.plot(ret_test.index, ret_test.values, marker='.', linewidth=1.0, label="actual 2020–onward")
    ax.plot(yhat_te.index, yhat_te.values, linewidth=2.0, label="predicted (train OLS)")
    ax.axhline(0.0, linewidth=0.8)
    ax.set_title(f"{tkr}: {freq_tag} returns — OLS train(2008–2020) → predict(2020+)")
    ax.set_xlabel("date")
    ax.set_ylabel(f"{freq_tag} return")
    ax.grid(True)
    ax.legend()
    ax.set_xlim(ret_test.index.min(), ret_test.index.max())
    plt.show()

freqs = [("weekly","W-FRI"), ("monthly","ME"), ("yearly","YE-DEC")]

pairs = collect_train_test_pairs(data_dir, tickers=tickers_to_plot)
print("paired tickers:", len(pairs))

for tkr, (train_path, test_path) in pairs.items():
    df_tr_raw = load_clean_csv(train_path)
    df_te_raw = load_clean_csv(test_path)
    for freq_tag, rule in freqs:
        if "real_close" not in df_tr_raw.columns or "real_close" not in df_te_raw.columns:
            continue
        tr = resample_returns_and_volume(df_tr_raw, rule)
        te = resample_returns_and_volume(df_te_raw, rule)
        if te.empty or tr.empty:
            continue
        X_tr, y_tr, X_te = make_design(tr, te, freq_tag)
        yhat_te, res = fit_predict_train_ols(X_tr, y_tr, X_te)
        if yhat_te.empty:
            continue
        plot_test_actual_vs_pred(tkr, freq_tag, te["ret"], yhat_te, res)


In [None]:
# fit 2008–2020 returns with ols (centered time trend + seasonal dummies [+ volume z-scored]), drop ±3σ train outliers, predict 2020+ and plot actual vs predicted

sigma = 3.0
tickers_to_plot = None

def load_clean_csv(path):
    try:
        df = pd.read_csv(path, parse_dates=["Date"]).set_index("Date")
    except ValueError:
        df = pd.read_csv(path, index_col=0, parse_dates=[0])
    df.index = pd.to_datetime(df.index).tz_localize(None)
    df = df.sort_index()
    if "date" in df.columns and df.index.name is None:
        df["date"] = pd.to_datetime(df["date"])
        df = df.set_index("date")
    return df

def safe_pct_change(s):
    r = s.pct_change()
    return r.replace([np.inf, -np.inf], np.nan)

def flag_outliers_3sigma(series):
    s = pd.to_numeric(series, errors="coerce")
    mu = s.mean()
    sd = s.std(ddof=1)
    if sd == 0 or np.isnan(sd):
        z = pd.Series(np.nan, index=s.index)
        mask = pd.Series(False, index=s.index)
        return mu, sd, z, mask
    z = (s - mu) / sd
    mask = z.abs() > 3
    return mu, sd, z, mask

def ticker_from_path(path):
    base = os.path.basename(path)
    return base.split("_cleaned", 1)[0]

def collect_train_test_pairs(data_dir, tickers=None):
    train_paths = glob.glob(os.path.join(data_dir, "*_cleaned_2008_2020.csv"))
    test_paths  = glob.glob(os.path.join(data_dir, "*_cleaned_2020_onward.csv"))
    train_map = {ticker_from_path(p): p for p in train_paths}
    test_map  = {ticker_from_path(p): p for p in test_paths}
    pairs = {}
    for tkr in sorted(set(train_map).intersection(test_map)):
        if tickers and tkr not in tickers:
            continue
        pairs[tkr] = (train_map[tkr], test_map[tkr])
    return pairs

def resample_returns_and_volume(df, rule):
    close_p = df["real_close"].resample(rule).last()
    ret_p   = safe_pct_change(close_p)
    out = pd.DataFrame({"ret": ret_p})
    if "volume" in df.columns:
        out["volume"] = df["volume"].resample(rule).sum()
    return out.dropna()

def make_design(df_train, df_test, freq_tag):
    df_tr = df_train.copy()
    df_te = df_test.copy()

    df_tr["t"] = np.arange(len(df_tr), dtype=float)
    df_te["t"] = np.arange(len(df_tr), len(df_tr)+len(df_te), dtype=float)
    # center t on train so const = mean-period level (like your MLR block)
    t_mu = df_tr["t"].mean()
    df_tr["t"] -= t_mu
    df_te["t"] -= t_mu

    if "volume" in df_tr.columns and "volume" in df_te.columns:
        mu = df_tr["volume"].mean()
        sd = df_tr["volume"].std(ddof=1)
        if sd == 0 or np.isnan(sd):
            df_tr["vol_z"] = 0.0
            df_te["vol_z"] = 0.0
        else:
            df_tr["vol_z"] = (df_tr["volume"] - mu) / sd
            df_te["vol_z"] = (df_te["volume"] - mu) / sd
        use_vol = True
    else:
        use_vol = False

    if freq_tag == "monthly":
        df_tr["slot"] = df_tr.index.month.astype(int)
        df_te["slot"] = df_te.index.month.astype(int)
        d_tr = pd.get_dummies(df_tr["slot"].astype("category"), drop_first=True, prefix="m")
        d_te = pd.get_dummies(df_te["slot"].astype("category"), drop_first=True, prefix="m")
        d_tr, d_te = d_tr.align(d_te, join="outer", axis=1, fill_value=0)
    elif freq_tag == "weekly":
        df_tr["slot"] = df_tr.index.isocalendar().week.astype(int).to_numpy()
        df_te["slot"] = df_te.index.isocalendar().week.astype(int).to_numpy()
        d_tr = pd.get_dummies(df_tr["slot"].astype("category"), drop_first=True, prefix="w")
        d_te = pd.get_dummies(df_te["slot"].astype("category"), drop_first=True, prefix="w")
        d_tr, d_te = d_tr.align(d_te, join="outer", axis=1, fill_value=0)
    else:
        d_tr = pd.DataFrame(index=df_tr.index)
        d_te = pd.DataFrame(index=df_te.index)

    X_tr = pd.concat([df_tr[["t"] + (["vol_z"] if use_vol else [])], d_tr], axis=1).astype(float)
    X_te = pd.concat([df_te[["t"] + (["vol_z"] if use_vol else [])], d_te], axis=1).astype(float)
    X_tr = sm.add_constant(X_tr, has_constant="add")
    X_te = sm.add_constant(X_te, has_constant="add")
    y_tr = df_tr["ret"].astype(float)

    return X_tr, y_tr, X_te

def fit_predict_ols_train_only(X_tr, y_tr, X_te):
    mu, sd, z, mask = flag_outliers_3sigma(y_tr)    # drop ±3σ train outliers (your rule)
    ok = y_tr.notna() & (~mask)
    X = X_tr.loc[ok]
    y = y_tr.loc[ok]
    if len(y) <= X.shape[1] + 1:
        return pd.Series(index=X_te.index, dtype=float), None
    res = sm.OLS(y, X).fit()
    yhat = pd.Series(res.predict(X_te), index=X_te.index)
    return yhat, res

def plot_test_overlay(tkr, freq_tag, ret_test, yhat_te):
    fig, ax = plt.subplots(figsize=(12,6))
    ax.plot(ret_test.index, ret_test.values, marker='.', linewidth=1.0, label="actual 2020–onward")
    ax.plot(yhat_te.index, yhat_te.values, linewidth=2.0, label="predicted OLS (train 2008–2020)")
    ax.axhline(0.0, linewidth=0.8)
    ax.set_title(f"{tkr}: {freq_tag} returns — train OLS vs test actual")
    ax.set_xlabel("date")
    ax.set_ylabel(f"{freq_tag} return")
    ax.grid(True)
    ax.legend()
    ax.set_xlim(ret_test.index.min(), ret_test.index.max())
    plt.show()

freqs = [("weekly","W-FRI"), ("monthly","ME"), ("yearly","YE-DEC")]

pairs = collect_train_test_pairs(data_dir, tickers=tickers_to_plot)
print("paired tickers:", len(pairs))

for tkr, (train_path, test_path) in pairs.items():
    df_tr = load_clean_csv(train_path)
    df_te = load_clean_csv(test_path)
    if "real_close" not in df_tr.columns or "real_close" not in df_te.columns:
        continue
    for freq_tag, rule in freqs:
        tr = resample_returns_and_volume(df_tr, rule)
        te = resample_returns_and_volume(df_te, rule)
        if tr.empty or te.empty:
            continue
        X_tr, y_tr, X_te = make_design(tr, te, freq_tag)
        yhat_te, res = fit_predict_ols_train_only(X_tr, y_tr, X_te)
        if yhat_te.empty:
            continue
        plot_test_overlay(tkr, freq_tag, te["ret"], yhat_te)
