In [46]:
"""
LPA-LSTM: Adaptive Window Estimation for LSTM

This notebook implements the Local Parametric Approach (LPA) for LSTM-based 
change point detection, following Spokoiny (1998) and Cizek et al. (2009).

Pipeline:
1. Compute raw critical values via Monte Carlo simulation (residual bootstrap)
2. Apply Spokoiny-style sqrt(log) adjustment to control false positives
3. Run LPA detection using adjusted critical values
4. Save all results (raw CVs, adjusted CVs, detection results)

The adjustment ensures smaller windows have higher critical values to control
false positive rates across scales (Cizek 2009 eq. 3.8: z_k/n_k = C + D*log(n_k) with D < 0).

Output structure:
    LPA/Geometric/Jump_{jump}_N0_{n_0}/lambda{penalty}/
        - critical_values_raw.csv
        - critical_values_adjusted.csv
        - detection_results.csv
"""

import os
import math
import time
from typing import Dict, Optional

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

In [47]:
# =============================================================================
# Configuration
# =============================================================================

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# LSTM hyperparameters
LSTM_SEQ_LEN = 3
LSTM_HIDDEN = 16
LSTM_LAYERS = 1
LSTM_EPOCHS = 15
LSTM_BATCH = 64
LSTM_LR = 1e-2
LSTM_DROPOUT = 0.0
MIN_SEG = 20  # Minimum segment size for splits


# =============================================================================
# LSTM Model
# =============================================================================

class LSTMRegressor(nn.Module):
    """Simple LSTM for time series regression."""
    
    def __init__(self, input_size: int = 1, hidden: int = 64,
                 layers: int = 1, dropout: float = 0.0):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden, num_layers=layers,
                            batch_first=True, dropout=dropout if layers > 1 else 0.0)
        self.fc = nn.Linear(hidden, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :]).squeeze(-1)


# =============================================================================
# Sequence Building and LSTM Training
# =============================================================================

def build_sequences(y: np.ndarray, seq_len: int, start_abs_idx: int = 0):
    """
    Build sequences for LSTM from a time series window.
    
    Args:
        y: Time series array (1D float32)
        seq_len: LSTM sequence length
        start_abs_idx: Absolute index of first element in y
        
    Returns:
        X: Input sequences [N, seq_len, 1]
        y_target: Target values [N]
        t_abs: Absolute target indices [N]
    """
    n = len(y)
    if n <= seq_len:
        return None, None, None
    
    X = np.lib.stride_tricks.sliding_window_view(y, seq_len + 1)
    X_seq, y_target = X[:, :-1], X[:, -1]
    t_abs = np.arange(start_abs_idx + seq_len, start_abs_idx + n, dtype=np.int64)
    
    return X_seq[..., None].astype(np.float32), y_target.astype(np.float32), t_abs


def fit_lstm(X: np.ndarray, y: np.ndarray, epochs: int = LSTM_EPOCHS,
             batch_size: int = LSTM_BATCH, lr: float = LSTM_LR,
             hidden: int = LSTM_HIDDEN, layers: int = LSTM_LAYERS,
             dropout: float = LSTM_DROPOUT) -> nn.Module:
    """Fit LSTM and return trained model."""
    if X is None or len(y) == 0:
        return None

    if DEVICE.type == "cuda":
        torch.backends.cudnn.deterministic = False
        torch.backends.cudnn.benchmark = True

    ds = TensorDataset(torch.from_numpy(X), torch.from_numpy(y))
    dl = DataLoader(ds, batch_size=batch_size, shuffle=True,
                    pin_memory=(DEVICE.type == "cuda"), num_workers=0)

    model = LSTMRegressor(hidden=hidden, layers=layers, dropout=dropout).to(DEVICE)
    opt = torch.optim.AdamW(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()

    for _ in range(epochs):
        model.train()
        for xb, yb in dl:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            opt.zero_grad()
            loss_fn(model(xb), yb).backward()
            opt.step()

    return model


def predict_lstm(model: nn.Module, X: np.ndarray) -> np.ndarray:
    """Get predictions from trained model."""
    if model is None or X is None:
        return None

    model.eval()
    ds = TensorDataset(torch.from_numpy(X))
    dl = DataLoader(ds, batch_size=512, shuffle=False)

    preds = []
    with torch.no_grad():
        for (xb,) in dl:
            preds.append(model(xb.to(DEVICE)).cpu().numpy())

    return np.concatenate(preds).astype(np.float32)


def get_residuals(model: nn.Module, X: np.ndarray, y: np.ndarray) -> np.ndarray:
    """Get residuals from fitted model."""
    yhat = predict_lstm(model, X)
    if yhat is None:
        return None
    return (y - yhat).astype(np.float32)


def fit_lstm_sse(X: np.ndarray, y: np.ndarray, **kwargs) -> tuple:
    """
    Fit LSTM and return (SSE, m, yhat, resid).
    
    Returns:
        SSE: Sum of squared errors
        m: Number of samples
        yhat: Predictions
        resid: Residuals
    """
    if X is None or len(y) == 0:
        return math.inf, 0, None, None

    model = fit_lstm(X, y, **kwargs)
    yhat = predict_lstm(model, X)
    
    if yhat is None:
        return math.inf, 0, None, None
    
    resid = y - yhat
    SSE = float(np.sum(resid**2))
    m = len(y)
    
    return SSE, m, yhat.astype(np.float32), resid.astype(np.float32)


# =============================================================================
# Likelihood Computation
# =============================================================================

def log_likelihood(SSE: float, m: int) -> float:
    """Gaussian log-likelihood (used for LR statistic)."""
    if SSE <= 0 or m <= 0:
        return -math.inf
    return -(m / 2) * np.log(SSE)


# =============================================================================
# Wild Bootstrap for Monte Carlo Simulation
# =============================================================================

def draw_mammen(m: int, rng: np.random.Generator) -> np.ndarray:
    """
    Draw Mammen two-point distribution weights for wild bootstrap.
    E[w] = 0, E[w^2] = 1, E[w^3] = 1
    """
    p = (np.sqrt(5) + 1) / (2 * np.sqrt(5))
    a = (1 - np.sqrt(5)) / 2   # approx -0.618
    b = (1 + np.sqrt(5)) / 2   # approx  1.618
    u = rng.random(m)
    return np.where(u < p, a, b).astype(np.float32)


def fit_i0_model(data: np.ndarray, n_0: int, seq_len: int = LSTM_SEQ_LEN,
                 epochs: int = LSTM_EPOCHS) -> tuple:
    """
    Fit LSTM on I_0 (last n_0 points) and return model, residuals, seed values.
    
    Args:
        data: Full time series
        n_0: Initial window size (I_0 size)
        seq_len: LSTM sequence length
        epochs: Training epochs
        
    Returns:
        (model, residuals, seed_values)
    """
    i0_data = data[-n_0:].astype(np.float32)
    X, y, _ = build_sequences(i0_data, seq_len, 0)
    
    if X is None:
        raise ValueError(f"I_0 too short: need > {seq_len} points, got {n_0}")

    model = fit_lstm(X, y, epochs=epochs)
    residuals = get_residuals(model, X, y)
    seed_values = i0_data[-seq_len:]

    return model, residuals, seed_values


def simulate_series_from_model(model: nn.Module, seed_values: np.ndarray,
                                residual_pool: np.ndarray, n_total: int,
                                seq_len: int, rng: np.random.Generator) -> np.ndarray:
    """
    Simulate series via recursive 1-step forecasts + wild bootstrap residuals.
    
    Args:
        model: Fitted LSTM model
        seed_values: Initial values (at least seq_len points)
        residual_pool: Pool of residuals to resample from
        n_total: Total length to generate
        seq_len: LSTM sequence length
        rng: Random generator
        
    Returns:
        Simulated series of length n_total
    """
    if n_total <= 0:
        return np.array([], dtype=np.float32)
    if model is None or residual_pool is None or len(residual_pool) == 0:
        raise ValueError("Model or residual pool missing")
    if len(seed_values) < seq_len:
        raise ValueError("Not enough seed values")

    y_star = np.empty(n_total, dtype=np.float32)

    # Initialize with seed
    init = np.asarray(seed_values[-seq_len:], dtype=np.float32)
    n_init = min(seq_len, n_total)
    y_star[:n_init] = init[:n_init]

    # Recursive simulation
    model.eval()
    with torch.no_grad():
        for t in range(seq_len, n_total):
            x = y_star[t - seq_len:t].reshape(1, seq_len, 1)
            xb = torch.from_numpy(x).to(DEVICE)
            pred = float(model(xb).item())

            # Wild bootstrap residual
            e = float(residual_pool[rng.integers(0, len(residual_pool))])
            w = float(draw_mammen(1, rng)[0])
            y_star[t] = np.float32(pred + w * e)

    return y_star

In [48]:
# =============================================================================
# Critical Value Computation via Monte Carlo
# =============================================================================

def compute_suplr_for_series(y: np.ndarray, seq_len: int, search_step: int,
                              j_start_pos: int, j_end_pos: int,
                              epochs: int = LSTM_EPOCHS) -> float:
    """
    Compute SupLR for a single series over specified J_k range.
    
    Args:
        y: Time series array
        seq_len: LSTM sequence length
        search_step: Split search granularity
        j_start_pos: Start of J_k range (series coordinates)
        j_end_pos: End of J_k range (series coordinates)
        epochs: LSTM training epochs
        
    Returns:
        Maximum LR statistic over splits in J_k
    """
    X, y_target, t_idx = build_sequences(y, seq_len, 0)
    if X is None:
        return 0.0

    # Fit on full window
    SSE_full, m_full, _, _ = fit_lstm_sse(X, y_target, epochs=epochs)
    LL_full = log_likelihood(SSE_full, m_full)

    n_targets = len(y_target)

    # Convert J_k range to target indices
    split_start = max(MIN_SEG, j_start_pos - seq_len)
    split_end = min(n_targets - MIN_SEG, j_end_pos - seq_len)

    if split_end <= split_start:
        return 0.0

    T_vals = []
    for split_idx in range(split_start, split_end, search_step):
        X_left, y_left = X[:split_idx], y_target[:split_idx]
        X_right, y_right = X[split_idx:], y_target[split_idx:]

        if len(y_left) < MIN_SEG or len(y_right) < MIN_SEG:
            continue

        SSE_left, m_left, _, _ = fit_lstm_sse(X_left, y_left, epochs=epochs)
        SSE_right, m_right, _, _ = fit_lstm_sse(X_right, y_right, epochs=epochs)

        T_i = (log_likelihood(SSE_left, m_left) +
               log_likelihood(SSE_right, m_right) - LL_full)
        T_vals.append(max(0.0, T_i))

    return max(T_vals) if T_vals else 0.0


def compute_critical_values(
    data: np.ndarray,
    n_0: int,
    c: float,
    mc_reps: int = 300,
    alpha: float = 0.95,
    search_step: int = 1,
    max_len: int = 1500,
    seq_len: int = LSTM_SEQ_LEN,
    epochs: int = LSTM_EPOCHS,
    seed: int = 42,
    verbose: bool = True
) -> pd.DataFrame:
    """
    Compute raw critical values via Monte Carlo simulation.
    
    For each scale k:
    1. Simulate homogeneous series of length n_{k+1} from I_0 model
    2. Compute SupLR over J_k split range
    3. Critical value = alpha-quantile of SupLR distribution
    
    Args:
        data: Time series data
        n_0: Initial window size
        c: Geometric ratio for window sizes
        mc_reps: Number of Monte Carlo replications
        alpha: Significance level (e.g., 0.95 for 95th percentile)
        search_step: Split search granularity
        max_len: Maximum series length to simulate
        seq_len: LSTM sequence length
        epochs: LSTM training epochs
        seed: Random seed
        verbose: Print progress
        
    Returns:
        DataFrame with columns: k, n_k, n_k_plus1, j_start, j_end, critical_value_95, mean, std
    """
    try:
        from tqdm.auto import tqdm
        has_tqdm = True
    except ImportError:
        has_tqdm = False
        
    rng = np.random.default_rng(seed)
    data = np.asarray(data, dtype=np.float32)

    # Step 1: Fit I_0 model
    if verbose:
        print("Step 1: Fitting LSTM on I_0...")
    i0_model, i0_residuals, seed_values = fit_i0_model(data, n_0, seq_len, epochs)
    if verbose:
        print(f"  I_0 size: {n_0}")
        print(f"  Residuals: {len(i0_residuals)} points, std={np.std(i0_residuals):.4f}")

    # Step 2: Determine scales
    K_max = 0
    for k in range(1, 100):
        n_k_plus1 = int(n_0 * c**(k+1))
        if n_k_plus1 > max_len:
            break
        K_max = k

    if verbose:
        print(f"\nStep 2: Computing critical values for k=1..{K_max}")
        print(f"  MC replications per scale: {mc_reps}")
        print(f"  Device: {DEVICE}")

    # Step 3: MC simulation for each scale
    results = []
    
    for k in range(1, K_max + 1):
        n_k_minus1 = n_0 if k == 1 else int(n_0 * c**(k-1))
        n_k = int(n_0 * c**k)
        n_k_plus1 = int(n_0 * c**(k+1))

        # J_k range
        j_start = n_k_plus1 - n_k
        j_end = n_k_plus1 - n_k_minus1

        suplr_values = []
        
        rep_iter = range(mc_reps)
        if has_tqdm and verbose:
            rep_iter = tqdm(rep_iter, desc=f"k={k} (n={n_k_plus1})", unit="rep", leave=False)

        for _ in rep_iter:
            # Simulate homogeneous series
            y_sim = simulate_series_from_model(
                model=i0_model,
                seed_values=seed_values,
                residual_pool=i0_residuals,
                n_total=n_k_plus1,
                seq_len=seq_len,
                rng=rng
            )

            # Compute SupLR over J_k
            suplr = compute_suplr_for_series(
                y_sim, seq_len, search_step, j_start, j_end, epochs
            )
            suplr_values.append(suplr)

        suplr_arr = np.array(suplr_values)
        alpha_pct = int(alpha * 100)
        
        results.append({
            'k': k,
            'n_k': n_k,
            'n_k_plus1': n_k_plus1,
            'j_start': j_start,
            'j_end': j_end,
            f'critical_value_{alpha_pct}': float(np.quantile(suplr_arr, alpha)),
            'critical_value_99': float(np.quantile(suplr_arr, 0.99)),
            'mean': float(np.mean(suplr_arr)),
            'std': float(np.std(suplr_arr)),
        })

        if verbose and not has_tqdm:
            cv = results[-1][f'critical_value_{alpha_pct}']
            print(f"  k={k}: n_k={n_k}, CV({alpha_pct}%)={cv:.3f}")

    return pd.DataFrame(results)


# =============================================================================
# Spokoiny-Style Critical Value Adjustment
# =============================================================================

def adjust_critical_values(
    df_raw: pd.DataFrame,
    alpha: float = 0.95,
    penalty_factor: float = 0.25
) -> pd.DataFrame:
    """
    Apply Spokoiny-style adjustment to raw critical values.
    
    Following Cizek 2009, the critical value should satisfy:
        z_k / n_k = C + D * log(n_k)  with D < 0
    
    Raw MC critical values typically have D ~ 0 (linear scaling).
    This adjustment inflates small-window critical values:
        adjusted_cv = raw_cv * (1 + lambda * sqrt(log(n_K_max / n_k)))
    
    Args:
        df_raw: DataFrame with raw critical values (from compute_critical_values)
        alpha: Significance level
        penalty_factor: Lambda parameter (higher = more conservative for small windows)
        
    Returns:
        DataFrame with adjusted critical values
    """
    df = df_raw.copy()
    alpha_pct = int(alpha * 100)
    cv_col = f'critical_value_{alpha_pct}'
    
    n_k = df['n_k'].values
    n_K_max = n_k.max()
    raw_cv = df[cv_col].values

    # Spokoiny-style adjustment: inflate small windows more
    ratio = n_K_max / n_k  # >= 1, largest for k=1
    adjustment = 1 + penalty_factor * np.sqrt(np.log(ratio))

    df['adjustment_factor'] = adjustment
    df[cv_col] = raw_cv * adjustment
    
    # Also adjust 99% if present
    if 'critical_value_99' in df.columns:
        df['critical_value_99'] = df['critical_value_99'] * adjustment

    df['penalty_factor'] = penalty_factor
    
    return df


def critical_values_to_dict(df: pd.DataFrame, alpha: float = 0.95) -> Dict[int, float]:
    """Convert critical values DataFrame to dict mapping k -> CV."""
    alpha_pct = int(alpha * 100)
    cv_col = f'critical_value_{alpha_pct}'
    return {int(row['k']): float(row[cv_col]) for _, row in df.iterrows()}

In [49]:
# =============================================================================
# LPA Change Point Detection
# =============================================================================

def detect_changes_with_lstm(
    Data_N: np.ndarray,
    critical_values: Dict[int, float],
    seq_len: int = LSTM_SEQ_LEN,
    n_0: int = 100,
    jump: int = 10,
    search_step: int = 1,
    c: float = 1.4142,  # sqrt(2), geometric ratio
    epochs: int = LSTM_EPOCHS,
    verbose: bool = True
) -> pd.DataFrame:
    """
    Detect change points using LPA with LSTM.
    
    Implements the Local Parametric Approach (Spokoiny 1998, Cizek 2009):
    - At each time point, tests progressively larger windows I_k
    - Computes SupLR statistic over split candidates in J_k range
    - Stops when SupLR > critical_value[k] (change detected)
    - Uses geometric window schedule: n_k = n_0 * c^k
    
    Args:
        Data_N: Time series array (1D)
        critical_values: Dict mapping scale k -> critical value
        seq_len: LSTM sequence length
        n_0: Initial window size
        jump: Step size for moving through time series
        search_step: Step size for split search within J_k
        c: Geometric ratio for window sizes
        epochs: LSTM training epochs
        verbose: Print progress
        
    Returns:
        DataFrame with columns: Date, N, windows_1, scaled_windows_1
    """
    Data_N = np.asarray(Data_N, dtype=np.float32)
    T = len(Data_N)
    
    results = {
        "Date": np.arange(T),
        "N": Data_N,
        "windows_1": [np.nan] * T,
        "scaled_windows_1": [np.nan] * T,
    }
    
    # Process from end to beginning
    step_indices = list(range(0, T, jump))
    
    for l in step_indices:
        t0 = time.time()
        io = T - l  # Current time index (working backwards)
        
        if io <= n_0:
            continue
        
        # Maximum scale K for this time point
        K_max = max(0, math.ceil((math.log(io) - math.log(n_0)) / math.log(c)))
        
        # Initialize with I_0
        I_k_minus1 = Data_N[max(0, io - n_0):io]
        n_k_minus1 = n_0
        selected_window_size = n_0
        
        for k in range(1, K_max + 1):
            # Geometric window sizes
            n_k = int(n_0 * c**k)
            n_k_plus1 = int(n_0 * c**(k + 1))
            
            if n_k_plus1 > io:
                break
            
            # Extract window I_{k+1}
            start_abs = max(0, io - n_k_plus1)
            y_win = Data_N[start_abs:io].astype(np.float32)
            
            # Build sequences
            X_all, y_all, t_abs = build_sequences(y_win, seq_len, start_abs)
            if X_all is None:
                continue
            
            # Fit on full window I_{k+1}
            SSE_full, m_full, _, _ = fit_lstm_sse(X_all, y_all, epochs=epochs)
            LL_full = log_likelihood(SSE_full, m_full)
            
            # J_k split range
            J_start = max(seq_len, io - n_k)
            J_end = io - n_k_minus1
            
            if J_end <= J_start:
                continue
            
            J_abs = np.arange(J_start, J_end, search_step, dtype=np.int64)
            
            # Compute SupLR over J_k
            T_vals = []
            for i_abs in J_abs:
                Lmask = t_abs <= i_abs
                Rmask = t_abs > i_abs
                
                mL = int(np.sum(Lmask))
                mR = int(np.sum(Rmask))
                
                if mL < MIN_SEG or mR < MIN_SEG:
                    continue
                
                SSE_L, m_L, _, _ = fit_lstm_sse(X_all[Lmask], y_all[Lmask], epochs=epochs)
                SSE_R, m_R, _, _ = fit_lstm_sse(X_all[Rmask], y_all[Rmask], epochs=epochs)
                
                T_i = log_likelihood(SSE_L, m_L) + log_likelihood(SSE_R, m_R) - LL_full
                T_vals.append(max(0.0, T_i))
            
            sup_lr = max(T_vals) if T_vals else 0.0
            
            # Get critical value for this scale
            crit_val = critical_values.get(k, math.inf)
            
            if verbose:
                print(f"step={l:4d} | k={k} | I_k+1=[{start_abs},{io}] | "
                      f"J_k=[{J_start},{J_end}] | SupLR={sup_lr:.3f} | crit={crit_val:.3f}")
            
            # Test against critical value
            if sup_lr > crit_val:
                if verbose:
                    print(f"  -> BREAK detected at step {l}, scale k={k}")
                selected_window_size = n_k
                break
            else:
                # Accept I_k, continue to larger window
                I_k_minus1 = Data_N[max(0, io - n_k):io]
                n_k_minus1 = n_k
                selected_window_size = n_k
        
        # Record selected window
        results["windows_1"][io - 1] = selected_window_size
        results["scaled_windows_1"][io - 1] = selected_window_size / io
        
        if verbose:
            t1 = time.time()
            print(f"  -> Selected window: {selected_window_size} ({t1-t0:.2f}s)")
    
    return pd.DataFrame(results)

In [50]:
# Data. Generated with: https://github.com/QuantLet/AR_sim_p/tree/main
df = pd.read_csv("LPA/Simulation/data.csv")

In [51]:
# =============================================================================
# Run LPA Detection (Complete Pipeline)
# =============================================================================

if __name__ == "__main__":
    # =========================================================================
    # Configuration
    # =========================================================================
    
    # LPA parameters
    n_0 = 100               # Initial window size
    c = np.sqrt(2)          # Geometric ratio
    alpha = 0.95            # Significance level
    
    # Critical value computation
    mc_reps = 100           # Monte Carlo replications
    penalty_factor = 0.25   # Lambda for Spokoiny adjustment (0 = no adjustment)
    
    # Detection parameters
    jump = 1               # Step size through time series
    search_step = 1         # Split search granularity
    
    # Output directory (includes n_0 and lambda)
    lambda_str = str(penalty_factor).replace('.', '')  # e.g., "025" for 0.25
    output_dir = f"LPA/Geometric/Jump_{jump}_N0_{n_0}/lambda{lambda_str}"
    os.makedirs(output_dir, exist_ok=True)
    
    # =========================================================================
    # Step 1: Compute Critical Values
    # =========================================================================
    
    print("="*70)
    print("STEP 1: Computing Critical Values via Monte Carlo")
    print("="*70)
    print(f"  n_0={n_0}, c={c:.4f}, alpha={alpha}")
    print(f"  MC reps={mc_reps}, penalty_factor={penalty_factor}")
    print(f"  Device: {DEVICE}")
    print()
    
    t0_cv = time.time()
    
    # Compute raw critical values
    df_raw = compute_critical_values(
        data=df["N"].to_numpy(dtype=np.float32),
        n_0=n_0,
        c=c,
        mc_reps=mc_reps,
        alpha=alpha,
        search_step=search_step,
        max_len=len(df),
        epochs=LSTM_EPOCHS,
        seed=42,
        verbose=True
    )
    
    # Apply Spokoiny-style adjustment
    print(f"\nApplying adjustment with penalty_factor={penalty_factor}...")
    df_adjusted = adjust_critical_values(df_raw, alpha=alpha, penalty_factor=penalty_factor)
    
    t1_cv = time.time()
    print(f"\nCritical value computation completed in {t1_cv - t0_cv:.1f}s")
    
    # Save critical values
    raw_cv_path = f"{output_dir}/critical_values_raw.csv"
    adj_cv_path = f"{output_dir}/critical_values_adjusted.csv"
    df_raw.to_csv(raw_cv_path, index=False)
    df_adjusted.to_csv(adj_cv_path, index=False)
    print(f"Saved: {raw_cv_path}")
    print(f"Saved: {adj_cv_path}")
    
    # Print summary
    alpha_pct = int(alpha * 100)
    print(f"\nCritical Values Summary (alpha={alpha}):")
    print("-"*50)
    for _, row in df_adjusted.iterrows():
        print(f"  k={int(row['k'])}: n_k={int(row['n_k'])}, "
              f"CV={row[f'critical_value_{alpha_pct}']:.2f} "
              f"(adj={row['adjustment_factor']:.3f})")
    
    # Convert to dict for detection
    critical_values = critical_values_to_dict(df_adjusted, alpha=alpha)
    
    # =========================================================================
    # Step 2: Run LPA Detection
    # =========================================================================
    
    print("\n" + "="*70)
    print("STEP 2: Running LPA-LSTM Detection")
    print("="*70)
    print(f"  jump={jump}, search_step={search_step}")
    print()
    
    t0_det = time.time()
    
    DT_out = detect_changes_with_lstm(
        Data_N=df["N"].to_numpy(dtype=np.float32),
        critical_values=critical_values,
        seq_len=LSTM_SEQ_LEN,
        n_0=n_0,
        jump=jump,
        search_step=search_step,
        c=c,
        epochs=LSTM_EPOCHS,
        verbose=True
    )
    
    t1_det = time.time()
    print(f"\nDetection completed in {t1_det - t0_det:.1f}s")
    
    # =========================================================================
    # Step 3: Save Results
    # =========================================================================
    
    out_path = f"{output_dir}/detection_results.csv"
    DT_out.to_csv(out_path, index=False)
    print(f"\nSaved: {out_path}")
    
    # =========================================================================
    # Summary
    # =========================================================================
    
    print("\n" + "="*70)
    print("COMPLETE")
    print("="*70)
    print(f"Total time: {t1_det - t0_cv:.1f}s")
    print(f"Output directory: {output_dir}")
    print(f"Files:")
    print(f"  - {raw_cv_path}")
    print(f"  - {adj_cv_path}")
    print(f"  - {out_path}")

STEP 1: Computing Critical Values via Monte Carlo
  n_0=100, c=1.4142, alpha=0.95
  MC reps=100, penalty_factor=0.25
  Device: cpu

Step 1: Fitting LSTM on I_0...
  I_0 size: 100
  Residuals: 97 points, std=1.2027

Step 2: Computing critical values for k=1..6
  MC replications per scale: 100
  Device: cpu


                                                                


Applying adjustment with penalty_factor=0.25...

Critical value computation completed in 6326.1s
Saved: LPA/Geometric/Jump_1_N0_100/lambda025/critical_values_raw.csv
Saved: LPA/Geometric/Jump_1_N0_100/lambda025/critical_values_adjusted.csv

Critical Values Summary (alpha=0.95):
--------------------------------------------------
  k=1: n_k=141, CV=87.84 (adj=1.329)
  k=2: n_k=200, CV=116.77 (adj=1.294)
  k=3: n_k=282, CV=170.05 (adj=1.255)
  k=4: n_k=400, CV=244.05 (adj=1.208)
  k=5: n_k=565, CV=338.47 (adj=1.147)
  k=6: n_k=800, CV=417.52 (adj=1.000)

STEP 2: Running LPA-LSTM Detection
  jump=1, search_step=1

step=   0 | k=1 | I_k+1=[1300,1500] | J_k=[1359,1400] | SupLR=49.669 | crit=87.835
step=   0 | k=2 | I_k+1=[1218,1500] | J_k=[1300,1359] | SupLR=71.067 | crit=116.773
step=   0 | k=3 | I_k+1=[1100,1500] | J_k=[1218,1300] | SupLR=117.647 | crit=170.053
step=   0 | k=4 | I_k+1=[935,1500] | J_k=[1100,1218] | SupLR=262.682 | crit=244.048
  -> BREAK detected at step 0, scale k=4
  ->