In [None]:
import pandas as pd

# Load the feature matrix
df = pd.read_csv("features_all_models.csv")

# Show basic info
print(f"Loaded {df.shape[0]:,} rows and {df.shape[1]} columns.\n")

# Loop through feature columns (excluding 'inst', 'time', and possibly 'true_regime')
exclude_cols = {"inst", "time", "true_regime"}
feature_cols = [col for col in df.columns if col not in exclude_cols]

# Print value frequencies per column
for col in feature_cols:
    print(f"\n📊 Value frequencies for: `{col}`")
    print(df[col].value_counts(dropna=False))


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from precision_labeller import plot_all_regimes_long

def compute_zscore_regimes(prices: pd.Series,
                           window: int,
                           threshold: float,
                           drop_last: int = 10):
    ma = prices.rolling(window=window, min_periods=window).mean()
    sd = prices.rolling(window=window, min_periods=window).std()
    z = (prices - ma) / sd

    regs_full = np.where(z > threshold, 2,
                 np.where(z < -threshold, 0, 1))

    N = len(prices) - drop_last
    regs_trim = regs_full[:N]
    z_trim = z[:N]

    valid = ~np.isnan(z_trim)
    return regs_trim[valid], valid

# ─── Parameters ───────────────────────────────
price_file = "prices.txt"
window = 90
threshold = 0.5
drop_last = 10

# Load full price matrix
df = pd.read_csv(price_file, sep=r"\s+", header=None)
T = df.shape[0]
n_inst = df.shape[1]

# Initialize total regime counts
total_counts = {0: 0, 1: 0, 2: 0}
total_correct = 0
total_total = 0

for inst in range(n_inst):
    prices = df.iloc[:, inst]
    true_regs = plot_all_regimes_long(end_point=T, plot_graph=False, inst=inst)
    true_regs = true_regs[:T - drop_last]

    pred, valid = compute_zscore_regimes(prices, window, threshold, drop_last)
    true_trim = true_regs[valid]
    acc = accuracy_score(true_trim, pred)

    # Count regime frequencies
    freqs = pd.Series(pred).value_counts()
    for r in [0, 1, 2]:
        total_counts[r] += freqs.get(r, 0)

    total_correct += (pred == true_trim).sum()
    total_total += len(true_trim)

# ─── Final Output ─────────────────────────────
print(f"✅ Z-score Regime Classifier Summary (All 50 Instruments)")
print(f"Params → window={window}, threshold={threshold}")
print(f"Overall Accuracy: {total_correct / total_total:.4f}")
print("\n📊 Total Regime Frequencies Across All Instruments:")
for r in [0, 1, 2]:
    print(f"  Regime {r}: {total_counts[r]}")
