In [1]:
# Imports and Setup
import os
import sys
from pathlib import Path
import numpy as np

sys.path.insert(0, str(Path.cwd().parent / "src"))

from utils import config
from utils.io import load_cleaned, load_method_ready, results_dir, save_json, append_csv_row
from utils.evaluation import compute_binary_metrics
from utils.plotting import plot_signal, plot_scores

In [2]:
# Baseline Methods

def z_score_predict(train_z, test_z, q):
    # Score = |z|, threshold from train quantile
    train_scores = np.abs(np.asarray(train_z).reshape(-1))
    test_scores = np.abs(np.asarray(test_z).reshape(-1))

    thr = float(np.quantile(train_scores, q))
    y_pred = (test_scores > thr).astype(int)
    return y_pred, test_scores, thr


def _moving_average(x, window):
    # Simple moving average (same length)
    x = np.asarray(x, dtype=float).reshape(-1)
    if window <= 1:
        return x.copy()
    kernel = np.ones(window, dtype=float) / window
    return np.convolve(x, kernel, mode="same")


def moving_average_predict(train_raw, test_raw, window, q):
    # Score = |x - MA(x)|, threshold from train quantile
    train_ma = _moving_average(train_raw, window)
    test_ma = _moving_average(test_raw, window)

    train_scores = np.abs(np.asarray(train_raw) - train_ma)
    test_scores = np.abs(np.asarray(test_raw) - test_ma)

    thr = float(np.quantile(train_scores, q))
    y_pred = (test_scores > thr).astype(int)
    return y_pred, test_scores, thr


def ewma_predict(train_raw, test_raw, span, q):
    # Score = |x - EWMA(x)|, threshold from train quantile
    x_train = np.asarray(train_raw, dtype=float).reshape(-1)
    x_test = np.asarray(test_raw, dtype=float).reshape(-1)

    alpha = 2.0 / (span + 1.0)

    def ewma(x, init):
        out = np.empty_like(x, dtype=float)
        prev = float(init)
        for i, v in enumerate(x):
            prev = alpha * float(v) + (1.0 - alpha) * prev
            out[i] = prev
        return out

    train_ew = ewma(x_train, init=x_train[0])
    test_ew = ewma(x_test, init=x_train[-1])

    train_scores = np.abs(x_train - train_ew)
    test_scores = np.abs(x_test - test_ew)

    thr = float(np.quantile(train_scores, q))
    y_pred = (test_scores > thr).astype(int)
    return y_pred, test_scores, thr

In [3]:
# Main Execution Loop

# Output CSV (reset each run)
base_out = results_dir("statistical_baselines")
csv_path = base_out / "statistical_results.csv"
if csv_path.exists():
    os.remove(csv_path)

for dataset_name in config.DATASETS:
    # Load labels + split index
    _, labels, meta = load_cleaned(dataset_name)
    train_end = int(meta["train_end"])
    y_test = labels[train_end:]

    # Thresholding + zoom window
    q = config.BASELINE_THR_QUANTILE
    margin = config.PLOT_ZOOM_MARGIN
    a0 = int(meta["anomaly_start"]) - train_end
    a1 = int(meta["anomaly_end"]) - train_end

    # Load method-ready arrays
    mr = load_method_ready(dataset_name)
    train_raw = mr["train_raw"]
    test_raw = mr["test_raw"]
    train_z = mr["train_z"]
    test_z = mr["test_z"]

    # Zoom slice (test coordinates)
    z0 = max(0, a0 - margin)
    z1 = min(len(test_raw), a1 + margin)

    # Per-dataset output folder
    out_dir = results_dir("statistical_baselines", dataset_name)

    # One overview plot per dataset
    plot_signal(
        test_raw,
        true_labels=y_test,
        title=f"{dataset_name} - Test (overview)",
        save_path=out_dir / "overview_signal.png",
        max_points=5000,
    )

    # ---- Z-score ----
    y_pred, scores, thr = z_score_predict(train_z, test_z, q)
    metrics = compute_binary_metrics(y_test, y_pred)

    row = {"dataset": dataset_name, "method": "z_score", "threshold": thr, "q": q, **metrics}
    append_csv_row(csv_path, row)
    save_json(out_dir / "z_score_metrics.json", row)
    np.save(out_dir / "z_score_pred.npy", y_pred)
    np.save(out_dir / "z_score_scores.npy", scores)

    plot_signal(
        test_raw[z0:z1], y_test[z0:z1], y_pred[z0:z1],
        title=f"{dataset_name} - Z-score (zoom)",
        save_path=out_dir / "z_score_signal_zoom.png",
        x_offset=z0,
    )
    plot_scores(
        scores[z0:z1],
        threshold=thr,
        true_labels=y_test[z0:z1],
        title=f"{dataset_name} - Z-score scores (zoom)",
        save_path=out_dir / "z_score_scores_zoom.png",
        x_offset=z0,
    )

    # ---- Moving average ----
    y_pred, scores, thr = moving_average_predict(train_raw, test_raw, config.MOVING_AVG_WINDOW, q)
    metrics = compute_binary_metrics(y_test, y_pred)

    row = {"dataset": dataset_name, "method": "moving_average", "threshold": thr, "q": q, **metrics}
    append_csv_row(csv_path, row)
    save_json(out_dir / "moving_average_metrics.json", row)
    np.save(out_dir / "moving_average_pred.npy", y_pred)
    np.save(out_dir / "moving_average_scores.npy", scores)

    plot_signal(
        test_raw[z0:z1], y_test[z0:z1], y_pred[z0:z1],
        title=f"{dataset_name} - Moving Average (zoom)",
        save_path=out_dir / "moving_average_signal_zoom.png",
        x_offset=z0,
    )
    plot_scores(
        scores[z0:z1],
        threshold=thr,
        true_labels=y_test[z0:z1],
        title=f"{dataset_name} - Moving Average scores (zoom)",
        save_path=out_dir / "moving_average_scores_zoom.png",
        x_offset=z0,
    )

    # ---- EWMA ----
    y_pred, scores, thr = ewma_predict(train_raw, test_raw, config.EWMA_SPAN, q)
    metrics = compute_binary_metrics(y_test, y_pred)

    row = {"dataset": dataset_name, "method": "ewma", "threshold": thr, "q": q, **metrics}
    append_csv_row(csv_path, row)
    save_json(out_dir / "ewma_metrics.json", row)
    np.save(out_dir / "ewma_pred.npy", y_pred)
    np.save(out_dir / "ewma_scores.npy", scores)

    plot_signal(
        test_raw[z0:z1], y_test[z0:z1], y_pred[z0:z1],
        title=f"{dataset_name} - EWMA (zoom)",
        save_path=out_dir / "ewma_signal_zoom.png",
        x_offset=z0,
    )
    plot_scores(
        scores[z0:z1],
        threshold=thr,
        true_labels=y_test[z0:z1],
        title=f"{dataset_name} - EWMA scores (zoom)",
        save_path=out_dir / "ewma_scores_zoom.png",
        x_offset=z0,
    )