# Run anomaly detection

In [1]:
# 1. Setup
%pip install -e .. -q
%load_ext autoreload
%autoreload 2

Note: you may need to restart the kernel to use updated packages.


In [4]:
# 2. Load default config
from pathlib import Path

import pandas as pd

from metro_disruptions_intelligence.detect.streaming_iforest import StreamingIForestDetector

project_root = Path.cwd().parent

config_path = project_root / "configs" / "iforest_default.yaml"

In [None]:
# 3. Choose processed features root
processed_root = Path("data/stations_features_time_series/2023/05/01")
processed_root.mkdir(parents=True, exist_ok=True)

In [None]:
# 4. Stream 2 hours of synthetic data
import json
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import numpy as np

start = datetime(2023, 5, 1, 0, 0)
rows = []
det = StreamingIForestDetector(config_path)
for i in range(120):
    ts = int((start + timedelta(minutes=i)).timestamp())
    df = pd.DataFrame({
        "snapshot_timestamp": [ts],
        "stop_id": ["100"],
        "direction_id": [0],
        "central_flag": [1],
        "congestion_level": [np.random.rand()],
        "occupancy": [np.random.rand()],
        "node_degree": [2],
        "hub_flag": [0],
    })
    out = det.score_and_update(df, explain=True)
    rows.append(out)
scores = pd.concat(rows, ignore_index=True)

In [None]:
# 5. Histogram of anomaly_score
plt.hist(scores["anomaly_score"], bins=20)
plt.xlabel("score")
plt.ylabel("count");

In [None]:
# 6. Top-10 anomalies with SHAP explanations
top10 = scores.nlargest(10, "anomaly_score").copy()
top10["shap_top3"] = top10["shap_top3_json"].apply(json.loads)
top10[["ts", "stop_id", "anomaly_score", "shap_top3"]]

In [None]:
# 7. Mean anomaly_score over time
scores["dt"] = pd.to_datetime(scores["ts"], unit="s")
mean_series = scores.groupby("dt")["anomaly_score"].mean()
mean_series.plot();

In [None]:
# 8. Tune hyper-parameters via CLI
!poetry run mdi tune-iforest --processed-root data/stations_features_time_series/2023/05/01 --start 2023-05-01T00:00:00Z --end 2023-05-01T02:00:00Z
print(Path("iforest_best.yaml").read_text())