## **3.1 Load Features & Select Training Cycles (1..N, N=20)**

In [1]:
import pandas as pd
import numpy as np
import pickle
import json
import os
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import pairwise_distances
from qiskit import QuantumCircuit
from qiskit.circuit import ParameterVector
from qiskit.quantum_info import Statevector
import warnings
warnings.filterwarnings('ignore')

# Create Phase 3 output directories
os.makedirs('result/phase_3/data', exist_ok=True)

# Load features
features_df = pd.read_csv('result/phase_2/data/features.csv')
print(f"✓ Loaded features from: result/phase_2/data/features.csv")
print(f"  Shape: {features_df.shape}")
print(f"  Total cycles: {len(features_df)}")

# Set training parameter
N = 20  # Number of nominal training cycles
print(f"\n=== Training Configuration ===")
print(f"Training cycles: 1 to {N} (inclusive)")
print(f"Training samples: {N}")
print(f"Test samples: {len(features_df) - N}")

# Select training subset (cycles 1 to N)
train_mask = (features_df['cycle_idx'] >= 1) & (features_df['cycle_idx'] <= N)
train_df = features_df[train_mask].copy()
print(f"\n✓ Training subset selected")
print(f"  Cycle range: [{train_df['cycle_idx'].min()}, {train_df['cycle_idx'].max()}]")
print(f"  Number of samples: {len(train_df)}")

# Extract feature columns (8 features)
feature_cols = ['capacity_Ah', 'energy_Wh', 'duration_s', 
                'v_min', 'v_max', 'v_mean', 'i_rms', 'dVdt_abs_mean']
print(f"\n✓ Feature columns (n={len(feature_cols)}): {feature_cols}")

# Display training data summary
print(f"\n=== Training Data Summary ===")
print(train_df[['cycle_idx'] + feature_cols].describe())

✓ Loaded features from: result/phase_2/data/features.csv
  Shape: (1241, 9)
  Total cycles: 1241

=== Training Configuration ===
Training cycles: 1 to 20 (inclusive)
Training samples: 20
Test samples: 1221

✓ Training subset selected
  Cycle range: [1, 20]
  Number of samples: 20

✓ Feature columns (n=8): ['capacity_Ah', 'energy_Wh', 'duration_s', 'v_min', 'v_max', 'v_mean', 'i_rms', 'dVdt_abs_mean']

=== Training Data Summary ===
       cycle_idx  capacity_Ah  energy_Wh  duration_s      v_min      v_max  \
count   20.00000    20.000000  20.000000    20.00000  20.000000  20.000000   
mean    10.50000     3.248586  11.613322  3655.70000   2.999985   4.083179   
std      5.91608     0.011178   0.043639    12.57441   0.000046   0.000646   
min      1.00000     3.234622  11.556919  3640.00000   2.999850   4.081560   
25%      5.75000     3.239967  11.579369  3646.00000   3.000000   4.083050   
50%     10.50000     3.246181  11.604447  3653.00000   3.000000   4.083240   
75%     15.25000   

## **3.2 Scale Features to [0, π] Using Training Min–Max (Persist Scaler)**

In [2]:
# Extract feature matrices
X_train = train_df[feature_cols].values
X_full = features_df[feature_cols].values

print(f"=== Feature Scaling to [0, π] ===")
print(f"Training feature matrix shape: {X_train.shape}")
print(f"Full feature matrix shape: {X_full.shape}")

# Compute min-max bounds from training data only
train_min = X_train.min(axis=0)
train_max = X_train.max(axis=0)

print(f"\n--- Training Set Min/Max (per feature) ---")
for i, col in enumerate(feature_cols):
    print(f"{col:20s}: [{train_min[i]:.6f}, {train_max[i]:.6f}]")

# Create sklearn MinMaxScaler fitted to training data
# Scale from training range to [0, π]
scaler = MinMaxScaler(feature_range=(0, np.pi))
scaler.fit(X_train)

# Transform both training and full datasets
X_train_scaled = scaler.transform(X_train)
X_full_scaled = scaler.transform(X_full)

print(f"\n✓ Features scaled to [0, π]")
print(f"  Training scaled shape: {X_train_scaled.shape}")
print(f"  Full scaled shape: {X_full_scaled.shape}")

# Verify scaling range
print(f"\n--- Scaled Training Data Range Check ---")
print(f"  Min values: {X_train_scaled.min(axis=0)}")
print(f"  Max values: {X_train_scaled.max(axis=0)}")
print(f"  Expected: all values in [0, {np.pi:.6f}]")

# Save scaler
scaler_path = 'result/phase_3/data/scaler.pkl'
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"\n✓ Scaler saved to: {scaler_path}")

# Save scaling parameters for documentation
scaling_params = {
    'feature_cols': feature_cols,
    'train_min': train_min.tolist(),
    'train_max': train_max.tolist(),
    'target_range': [0, np.pi],
    'n_features': len(feature_cols)
}
scaling_params_path = 'result/phase_3/data/scaling_params.json'
with open(scaling_params_path, 'w') as f:
    json.dump(scaling_params, f, indent=2)
print(f"✓ Scaling parameters saved to: {scaling_params_path}")

=== Feature Scaling to [0, π] ===
Training feature matrix shape: (20, 8)
Full feature matrix shape: (1241, 8)

--- Training Set Min/Max (per feature) ---
capacity_Ah         : [3.234622, 3.270184]
energy_Wh           : [11.556919, 11.696697]
duration_s          : [3640.000000, 3680.000000]
v_min               : [2.999850, 3.000000]
v_max               : [4.081560, 4.083850]
v_mean              : [3.572743, 3.576643]
i_rms               : [3.199514, 3.199535]
dVdt_abs_mean       : [0.000359, 0.000366]

✓ Features scaled to [0, π]
  Training scaled shape: (20, 8)
  Full scaled shape: (1241, 8)

--- Scaled Training Data Range Check ---
  Min values: [0. 0. 0. 0. 0. 0. 0. 0.]
  Max values: [3.14159265 3.14159265 3.14159265 3.14159265 3.14159265 3.14159265
 3.14159265 3.14159265]
  Expected: all values in [0, 3.141593]

✓ Scaler saved to: result/phase_3/data/scaler.pkl
✓ Scaling parameters saved to: result/phase_3/data/scaling_params.json


## **3.3 Build Quantum Feature Map (ZZ/Pauli, Depth 1–2, 8 Qubits) & Kernel (Precomputed)**

In [3]:
def create_quantum_feature_map(n_qubits=8, depth=2):
    """
    Create a ZZ/Pauli entangling feature map circuit.
    
    Args:
        n_qubits: Number of qubits (must match feature dimension)
        depth: Number of entangling layers (1 or 2)
    
    Returns:
        QuantumCircuit with parameter vector
    """
    qc = QuantumCircuit(n_qubits)
    params = ParameterVector('x', n_qubits)
    
    for d in range(depth):
        # Pauli rotation layer (RZ, RY)
        for i in range(n_qubits):
            qc.rz(params[i], i)
            qc.ry(params[i], i)
        
        # ZZ entangling layer (nearest-neighbor + wrap-around)
        for i in range(n_qubits):
            j = (i + 1) % n_qubits
            # ZZ gate decomposition: CNOT - RZ - CNOT
            qc.cx(i, j)
            qc.rz(2 * (params[i] - params[j]), j)
            qc.cx(i, j)
    
    return qc, params

def quantum_kernel_element(x1, x2, feature_map, params):
    """
    Compute quantum kernel element K(x1, x2) = |⟨φ(x1)|φ(x2)⟩|²
    
    Args:
        x1, x2: Feature vectors (scaled to [0, π])
        feature_map: Quantum circuit
        params: Parameter vector
    
    Returns:
        Kernel value (float)
    """
    # Bind parameters for x1
    qc1 = feature_map.assign_parameters({params[i]: x1[i] for i in range(len(x1))})
    state1 = Statevector.from_instruction(qc1)
    
    # Bind parameters for x2
    qc2 = feature_map.assign_parameters({params[i]: x2[i] for i in range(len(x2))})
    state2 = Statevector.from_instruction(qc2)
    
    # Compute inner product |⟨ψ1|ψ2⟩|²
    overlap = np.abs(state1.inner(state2)) ** 2
    return overlap

def compute_quantum_kernel_matrix(X1, X2, feature_map, params):
    """
    Compute full quantum kernel matrix K[i,j] = K(X1[i], X2[j])
    
    Args:
        X1: First set of samples (n_samples_1, n_features)
        X2: Second set of samples (n_samples_2, n_features)
        feature_map: Quantum circuit
        params: Parameter vector
    
    Returns:
        Kernel matrix (n_samples_1, n_samples_2)
    """
    n1, n2 = len(X1), len(X2)
    K = np.zeros((n1, n2))
    
    for i in range(n1):
        for j in range(n2):
            K[i, j] = quantum_kernel_element(X1[i], X2[j], feature_map, params)
        
        if (i + 1) % 50 == 0 or i == n1 - 1:
            print(f"  Progress: {i+1}/{n1} rows computed")
    
    return K

# Create quantum feature map
n_qubits = 8  # Match feature dimension
depth = 2
print(f"=== Quantum Feature Map Construction ===")
print(f"Number of qubits: {n_qubits}")
print(f"Entangling depth: {depth}")
print(f"Feature encoding: ZZ/Pauli (RZ + RY + ZZ gates)")

feature_map, params = create_quantum_feature_map(n_qubits=n_qubits, depth=depth)
print(f"\n✓ Quantum circuit created")
print(f"  Circuit depth: {feature_map.depth()}")
print(f"  Number of gates: {len(feature_map.data)}")
print(f"  Parameters: {len(params)}")

# Compute quantum kernel matrices
print(f"\n=== Computing Quantum Kernel Matrices ===")
print(f"This may take several minutes...")

print(f"\n--- Training kernel K_train (20 × 20) ---")
K_quantum_train = compute_quantum_kernel_matrix(X_train_scaled, X_train_scaled, 
                                                 feature_map, params)

print(f"\n--- Full kernel K_full (1241 × 20) for scoring ---")
K_quantum_full = compute_quantum_kernel_matrix(X_full_scaled, X_train_scaled, 
                                                feature_map, params)

print(f"\n✓ Quantum kernel matrices computed")
print(f"  K_train shape: {K_quantum_train.shape}")
print(f"  K_full shape: {K_quantum_full.shape}")
print(f"  K_train diagonal (should be ~1.0): {np.diag(K_quantum_train)[:5]}")

# Save quantum kernel matrices
np.save('result/phase_3/data/K_quantum_train.npy', K_quantum_train)
np.save('result/phase_3/data/K_quantum_full.npy', K_quantum_full)
print(f"\n✓ Kernel matrices saved")
print(f"  result/phase_3/data/K_quantum_train.npy")
print(f"  result/phase_3/data/K_quantum_full.npy")

# Save quantum kernel parameters
quantum_params = {
    'type': 'ZZ_Pauli_entangling',
    'n_qubits': n_qubits,
    'depth': depth,
    'circuit_depth': feature_map.depth(),
    'n_gates': len(feature_map.data),
    'encoding': 'RZ + RY rotations with ZZ entanglement'
}
quantum_params_path = 'result/phase_3/data/quantum_kernel_params.json'
with open(quantum_params_path, 'w') as f:
    json.dump(quantum_params, f, indent=2)
print(f"✓ Quantum parameters saved to: {quantum_params_path}")

=== Quantum Feature Map Construction ===
Number of qubits: 8
Entangling depth: 2
Feature encoding: ZZ/Pauli (RZ + RY + ZZ gates)

✓ Quantum circuit created
  Circuit depth: 52
  Number of gates: 80
  Parameters: 8

=== Computing Quantum Kernel Matrices ===
This may take several minutes...

--- Training kernel K_train (20 × 20) ---
  Progress: 20/20 rows computed

--- Full kernel K_full (1241 × 20) for scoring ---
  Progress: 50/1241 rows computed
  Progress: 100/1241 rows computed
  Progress: 150/1241 rows computed
  Progress: 200/1241 rows computed
  Progress: 250/1241 rows computed
  Progress: 300/1241 rows computed
  Progress: 350/1241 rows computed
  Progress: 400/1241 rows computed
  Progress: 450/1241 rows computed
  Progress: 500/1241 rows computed
  Progress: 550/1241 rows computed
  Progress: 600/1241 rows computed
  Progress: 650/1241 rows computed
  Progress: 700/1241 rows computed
  Progress: 750/1241 rows computed
  Progress: 800/1241 rows computed
  Progress: 850/1241 row

## **3.4 Prepare Baseline Kernels (RBF γ=median heuristic; Laplacian; Poly deg 2–3)**

In [4]:
# Compute pairwise distances on training set for median heuristic
train_distances = pairwise_distances(X_train_scaled, metric='euclidean')
# Extract upper triangle (exclude diagonal)
train_dist_flat = train_distances[np.triu_indices_from(train_distances, k=1)]

# Median heuristic: γ = 1 / (2 * median²)
median_dist = np.median(train_dist_flat)
gamma_rbf = 1.0 / (2 * median_dist ** 2)

print(f"=== RBF Kernel (Median Heuristic) ===")
print(f"Training pairwise distances:")
print(f"  Median distance: {median_dist:.6f}")
print(f"  Mean distance: {train_dist_flat.mean():.6f}")
print(f"  Std distance: {train_dist_flat.std():.6f}")
print(f"  Range: [{train_dist_flat.min():.6f}, {train_dist_flat.max():.6f}]")
print(f"\n✓ RBF gamma (median heuristic): {gamma_rbf:.6f}")

# Compute RBF kernel matrices
from sklearn.metrics.pairwise import rbf_kernel, laplacian_kernel, polynomial_kernel

K_rbf_train = rbf_kernel(X_train_scaled, X_train_scaled, gamma=gamma_rbf)
K_rbf_full = rbf_kernel(X_full_scaled, X_train_scaled, gamma=gamma_rbf)

print(f"\n✓ RBF kernel matrices computed")
print(f"  K_train shape: {K_rbf_train.shape}")
print(f"  K_full shape: {K_rbf_full.shape}")
print(f"  K_train diagonal: {np.diag(K_rbf_train)[:5]}")

# Laplacian kernel (same gamma)
print(f"\n=== Laplacian Kernel ===")
print(f"Using same gamma: {gamma_rbf:.6f}")

K_laplacian_train = laplacian_kernel(X_train_scaled, X_train_scaled, gamma=gamma_rbf)
K_laplacian_full = laplacian_kernel(X_full_scaled, X_train_scaled, gamma=gamma_rbf)

print(f"✓ Laplacian kernel matrices computed")
print(f"  K_train shape: {K_laplacian_train.shape}")
print(f"  K_full shape: {K_laplacian_full.shape}")

# Polynomial kernels (degree 2 and 3)
print(f"\n=== Polynomial Kernels ===")

# Degree 2
K_poly2_train = polynomial_kernel(X_train_scaled, X_train_scaled, degree=2, coef0=1)
K_poly2_full = polynomial_kernel(X_full_scaled, X_train_scaled, degree=2, coef0=1)

print(f"Degree 2:")
print(f"  K_train shape: {K_poly2_train.shape}")
print(f"  K_full shape: {K_poly2_full.shape}")

# Degree 3
K_poly3_train = polynomial_kernel(X_train_scaled, X_train_scaled, degree=3, coef0=1)
K_poly3_full = polynomial_kernel(X_full_scaled, X_train_scaled, degree=3, coef0=1)

print(f"Degree 3:")
print(f"  K_train shape: {K_poly3_train.shape}")
print(f"  K_full shape: {K_poly3_full.shape}")

# Save all baseline kernel matrices
print(f"\n=== Saving Baseline Kernel Matrices ===")
np.save('result/phase_3/data/K_rbf_train.npy', K_rbf_train)
np.save('result/phase_3/data/K_rbf_full.npy', K_rbf_full)
np.save('result/phase_3/data/K_laplacian_train.npy', K_laplacian_train)
np.save('result/phase_3/data/K_laplacian_full.npy', K_laplacian_full)
np.save('result/phase_3/data/K_poly2_train.npy', K_poly2_train)
np.save('result/phase_3/data/K_poly2_full.npy', K_poly2_full)
np.save('result/phase_3/data/K_poly3_train.npy', K_poly3_train)
np.save('result/phase_3/data/K_poly3_full.npy', K_poly3_full)

print(f"✓ All baseline kernel matrices saved")

# Save baseline kernel parameters
baseline_params = {
    'rbf': {
        'type': 'radial_basis_function',
        'gamma': gamma_rbf,
        'gamma_method': 'median_heuristic',
        'median_distance': median_dist
    },
    'laplacian': {
        'type': 'laplacian',
        'gamma': gamma_rbf,
        'gamma_method': 'same_as_rbf'
    },
    'polynomial_deg2': {
        'type': 'polynomial',
        'degree': 2,
        'coef0': 1
    },
    'polynomial_deg3': {
        'type': 'polynomial',
        'degree': 3,
        'coef0': 1
    }
}

baseline_params_path = 'result/phase_3/data/baseline_kernel_params.json'
with open(baseline_params_path, 'w') as f:
    json.dump(baseline_params, f, indent=2)
print(f"✓ Baseline parameters saved to: {baseline_params_path}")

=== RBF Kernel (Median Heuristic) ===
Training pairwise distances:
  Median distance: 3.472745
  Mean distance: 3.537774
  Std distance: 1.446672
  Range: [0.766265, 6.955260]

✓ RBF gamma (median heuristic): 0.041460

✓ RBF kernel matrices computed
  K_train shape: (20, 20)
  K_full shape: (1241, 20)
  K_train diagonal: [1. 1. 1. 1. 1.]

=== Laplacian Kernel ===
Using same gamma: 0.041460
✓ Laplacian kernel matrices computed
  K_train shape: (20, 20)
  K_full shape: (1241, 20)

=== Polynomial Kernels ===
Degree 2:
  K_train shape: (20, 20)
  K_full shape: (1241, 20)
Degree 3:
  K_train shape: (20, 20)
  K_full shape: (1241, 20)

=== Saving Baseline Kernel Matrices ===
✓ All baseline kernel matrices saved
✓ Baseline parameters saved to: result/phase_3/data/baseline_kernel_params.json


## **3.5 Train ν-OCSVM for Quantum & Baselines**

In [5]:
# Set nu parameter (target FPR on training data)
nu = 0.05

print(f"=== Training ν-OCSVM Models ===")
print(f"ν parameter: {nu} (target ~5% training outliers)")
print(f"Training samples: {K_quantum_train.shape[0]}")

# Dictionary to store all models
models = {}

# Train Quantum OCSVM
print(f"\n--- Quantum Kernel ---")
ocsvm_quantum = OneClassSVM(kernel='precomputed', nu=nu)
ocsvm_quantum.fit(K_quantum_train)
models['quantum'] = ocsvm_quantum
print(f"✓ Quantum OCSVM trained")
print(f"  Support vectors: {ocsvm_quantum.n_support_}")
print(f"  Support vector indices: {ocsvm_quantum.support_[:5]}... (showing first 5)")

# Train RBF OCSVM
print(f"\n--- RBF Kernel ---")
ocsvm_rbf = OneClassSVM(kernel='precomputed', nu=nu)
ocsvm_rbf.fit(K_rbf_train)
models['rbf'] = ocsvm_rbf
print(f"✓ RBF OCSVM trained")
print(f"  Support vectors: {ocsvm_rbf.n_support_}")

# Train Laplacian OCSVM
print(f"\n--- Laplacian Kernel ---")
ocsvm_laplacian = OneClassSVM(kernel='precomputed', nu=nu)
ocsvm_laplacian.fit(K_laplacian_train)
models['laplacian'] = ocsvm_laplacian
print(f"✓ Laplacian OCSVM trained")
print(f"  Support vectors: {ocsvm_laplacian.n_support_}")

# Train Polynomial degree 2 OCSVM
print(f"\n--- Polynomial (degree 2) ---")
ocsvm_poly2 = OneClassSVM(kernel='precomputed', nu=nu)
ocsvm_poly2.fit(K_poly2_train)
models['poly2'] = ocsvm_poly2
print(f"✓ Poly2 OCSVM trained")
print(f"  Support vectors: {ocsvm_poly2.n_support_}")

# Train Polynomial degree 3 OCSVM
print(f"\n--- Polynomial (degree 3) ---")
ocsvm_poly3 = OneClassSVM(kernel='precomputed', nu=nu)
ocsvm_poly3.fit(K_poly3_train)
models['poly3'] = ocsvm_poly3
print(f"✓ Poly3 OCSVM trained")
print(f"  Support vectors: {ocsvm_poly3.n_support_}")

# Compute decision scores on training data for all models
print(f"\n=== Computing Training Scores ===")
train_scores = {}

# Map kernel matrices to models
kernel_matrices_train = {
    'quantum': K_quantum_train,
    'rbf': K_rbf_train,
    'laplacian': K_laplacian_train,
    'poly2': K_poly2_train,
    'poly3': K_poly3_train
}

for model_name, model in models.items():
    K_train = kernel_matrices_train[model_name]
    scores = model.decision_function(K_train)
    train_scores[model_name] = scores
    
    print(f"\n{model_name.upper()}:")
    print(f"  Score range: [{scores.min():.6f}, {scores.max():.6f}]")
    print(f"  Score mean: {scores.mean():.6f}")
    print(f"  Score std: {scores.std():.6f}")

# Save all models
print(f"\n=== Saving Models ===")
for model_name, model in models.items():
    model_path = f'result/phase_3/data/ocsvm_{model_name}.pkl'
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
    print(f"✓ {model_name}: {model_path}")

# Save training scores
train_scores_path = 'result/phase_3/data/train_scores.pkl'
with open(train_scores_path, 'wb') as f:
    pickle.dump(train_scores, f)
print(f"✓ Training scores saved to: {train_scores_path}")

print(f"\n✓ All models trained and saved successfully")

=== Training ν-OCSVM Models ===
ν parameter: 0.05 (target ~5% training outliers)
Training samples: 20

--- Quantum Kernel ---
✓ Quantum OCSVM trained
  Support vectors: [19]
  Support vector indices: [0 1 2 3 4]... (showing first 5)

--- RBF Kernel ---
✓ RBF OCSVM trained
  Support vectors: [7]

--- Laplacian Kernel ---
✓ Laplacian OCSVM trained
  Support vectors: [9]

--- Polynomial (degree 2) ---
✓ Poly2 OCSVM trained
  Support vectors: [2]

--- Polynomial (degree 3) ---
✓ Poly3 OCSVM trained
  Support vectors: [2]

=== Computing Training Scores ===

QUANTUM:
  Score range: [-0.000408, 0.004158]
  Score mean: 0.000208
  Score std: 0.000929

RBF:
  Score range: [-0.000320, 0.080326]
  Score mean: 0.024584
  Score std: 0.025870

LAPLACIAN:
  Score range: [-0.000428, 0.037609]
  Score mean: 0.007758
  Score std: 0.010630

POLY2:
  Score range: [0.000000, 6.688983]
  Score mean: 2.490045
  Score std: 1.748143

POLY3:
  Score range: [-0.000000, 34.703951]
  Score mean: 12.005305
  Score s

## **3.6 Calibrate Threshold (FPR=5% on Training) & Persist**

In [6]:
# Calibrate thresholds to achieve 5% FPR on training data
target_fpr = 0.05

print(f"=== Threshold Calibration (Target FPR={target_fpr}) ===")
print(f"Method: 95th percentile of training scores")
print(f"  (scores below threshold = nominal)")
print(f"  (scores above threshold = anomaly)")

thresholds = {}

for model_name, scores in train_scores.items():
    # For OCSVM, negative scores typically indicate outliers
    # But we use decision_function which can be positive or negative
    # We want to set threshold such that 5% of training samples are flagged
    # This means threshold at the 95th percentile
    threshold = np.percentile(scores, 100 * (1 - target_fpr))
    thresholds[model_name] = threshold
    
    # Count samples above threshold
    n_flagged = np.sum(scores > threshold)
    actual_fpr = n_flagged / len(scores)
    
    print(f"\n{model_name.upper()}:")
    print(f"  Threshold: {threshold:.6f}")
    print(f"  Training samples above threshold: {n_flagged}/{len(scores)}")
    print(f"  Actual FPR: {actual_fpr:.4f} (target: {target_fpr:.4f})")
    print(f"  Score percentiles:")
    print(f"    5%:  {np.percentile(scores, 5):.6f}")
    print(f"    50%: {np.percentile(scores, 50):.6f}")
    print(f"    95%: {np.percentile(scores, 95):.6f}")
    print(f"    100%: {np.percentile(scores, 100):.6f}")

# Save thresholds
thresholds_path = 'result/phase_3/data/thresholds.json'
with open(thresholds_path, 'w') as f:
    json.dump(thresholds, f, indent=2)
print(f"\n✓ Thresholds saved to: {thresholds_path}")

# Create threshold summary
threshold_summary = {
    'target_fpr': target_fpr,
    'calibration_method': '95th_percentile',
    'n_training_samples': len(train_scores['quantum']),
    'thresholds': thresholds
}

threshold_summary_path = 'result/phase_3/data/threshold_summary.json'
with open(threshold_summary_path, 'w') as f:
    json.dump(threshold_summary, f, indent=2)
print(f"✓ Threshold summary saved to: {threshold_summary_path}")

print(f"\n✓ Threshold calibration complete")

=== Threshold Calibration (Target FPR=0.05) ===
Method: 95th percentile of training scores
  (scores below threshold = nominal)
  (scores above threshold = anomaly)

QUANTUM:
  Threshold: 0.000611
  Training samples above threshold: 1/20
  Actual FPR: 0.0500 (target: 0.0500)
  Score percentiles:
    5%:  -0.000361
    50%: 0.000055
    95%: 0.000611
    100%: 0.004158

RBF:
  Threshold: 0.065176
  Training samples above threshold: 1/20
  Actual FPR: 0.0500 (target: 0.0500)
  Score percentiles:
    5%:  -0.000320
    50%: 0.021346
    95%: 0.065176
    100%: 0.080326

LAPLACIAN:
  Threshold: 0.026493
  Training samples above threshold: 1/20
  Actual FPR: 0.0500 (target: 0.0500)
  Score percentiles:
    5%:  -0.000228
    50%: 0.001985
    95%: 0.026493
    100%: 0.037609

POLY2:
  Threshold: 5.133115
  Training samples above threshold: 1/20
  Actual FPR: 0.0500 (target: 0.0500)
  Score percentiles:
    5%:  0.000000
    50%: 2.273578
    95%: 5.133115
    100%: 6.688983

POLY3:
  Thresh

## **3.7 Save Models, Kernels (optional), and Params**

In [8]:
# Verify all required files are saved
print(f"=== Phase 3 Output Verification ===")
print(f"\nChecking saved artifacts in result/phase_3/data/...")

required_files = [
    'scaler.pkl',
    'scaling_params.json',
    'quantum_kernel_params.json',
    'baseline_kernel_params.json',
    'K_quantum_train.npy',
    'K_quantum_full.npy',
    'K_rbf_train.npy',
    'K_rbf_full.npy',
    'K_laplacian_train.npy',
    'K_laplacian_full.npy',
    'K_poly2_train.npy',
    'K_poly2_full.npy',
    'K_poly3_train.npy',
    'K_poly3_full.npy',
    'ocsvm_quantum.pkl',
    'ocsvm_rbf.pkl',
    'ocsvm_laplacian.pkl',
    'ocsvm_poly2.pkl',
    'ocsvm_poly3.pkl',
    'train_scores.pkl',
    'thresholds.json',
    'threshold_summary.json'
]

all_exist = True
for filename in required_files:
    filepath = f'result/phase_3/data/{filename}'
    exists = os.path.exists(filepath)
    status = '✓' if exists else '✗'
    print(f"{status} {filename}")
    if not exists:
        all_exist = False

if all_exist:
    print(f"\n✓ All required files present")
else:
    print(f"\n✗ WARNING: Some files are missing")

# Summary of saved artifacts
print(f"\n=== Saved Artifacts Summary ===")
print(f"\n1. Feature Scaling:")
print(f"   - scaler.pkl (MinMaxScaler fitted on training cycles 1-20)")
print(f"   - scaling_params.json (feature ranges, target [0, π])")

print(f"\n2. Quantum Kernel:")
print(f"   - quantum_kernel_params.json (8 qubits, depth 2, ZZ/Pauli)")
print(f"   - K_quantum_train.npy (20×20)")
print(f"   - K_quantum_full.npy (1241×20)")

print(f"\n3. Baseline Kernels:")
print(f"   - baseline_kernel_params.json (RBF γ={gamma_rbf:.6f}, etc.)")
print(f"   - K_rbf_train.npy, K_rbf_full.npy")
print(f"   - K_laplacian_train.npy, K_laplacian_full.npy")
print(f"   - K_poly2_train.npy, K_poly2_full.npy")
print(f"   - K_poly3_train.npy, K_poly3_full.npy")

print(f"\n4. Trained Models (ν={nu}):")
print(f"   - ocsvm_quantum.pkl ({models['quantum'].n_support_[0]} support vectors)")
print(f"   - ocsvm_rbf.pkl ({models['rbf'].n_support_[0]} support vectors)")
print(f"   - ocsvm_laplacian.pkl ({models['laplacian'].n_support_[0]} support vectors)")
print(f"   - ocsvm_poly2.pkl ({models['poly2'].n_support_[0]} support vectors)")
print(f"   - ocsvm_poly3.pkl ({models['poly3'].n_support_[0]} support vectors)")

print(f"\n5. Calibration:")
print(f"   - thresholds.json (95th percentile, FPR=5% on training)")
print(f"   - threshold_summary.json")
print(f"   - train_scores.pkl")

print(f"\n=== Anomaly Detection Convention ===")
print(f"IMPORTANT: Score interpretation for anomaly detection:")
print(f"  • HIGHER scores = MORE NOMINAL (closer to training distribution)")
print(f"  • LOWER scores = MORE ANOMALOUS (farther from training distribution)")
print(f"  • Anomaly flagged when: score > threshold (95th percentile)")
print(f"  • This is consistent with OCSVM decision_function behavior")

# Update threshold summary with clarification
threshold_summary_updated = {
    'target_fpr': target_fpr,
    'calibration_method': '95th_percentile',
    'n_training_samples': len(train_scores['quantum']),
    'anomaly_convention': 'score > threshold indicates anomaly',
    'score_interpretation': 'higher score = more nominal, lower score = more anomalous',
    'thresholds': thresholds,
    'actual_fpr_per_model': {
        model_name: float(np.sum(train_scores[model_name] > thresholds[model_name]) / len(train_scores[model_name]))
        for model_name in thresholds.keys()
    }
}

threshold_summary_path = 'result/phase_3/data/threshold_summary.json'
with open(threshold_summary_path, 'w') as f:
    json.dump(threshold_summary_updated, f, indent=2)
print(f"\n✓ Updated threshold_summary.json with convention clarification")

print(f"\n✓ Phase 3 artifacts saved successfully")
print(f"\nTotal files saved: {len(required_files)}")

=== Phase 3 Output Verification ===

Checking saved artifacts in result/phase_3/data/...
✓ scaler.pkl
✓ scaling_params.json
✓ quantum_kernel_params.json
✓ baseline_kernel_params.json
✓ K_quantum_train.npy
✓ K_quantum_full.npy
✓ K_rbf_train.npy
✓ K_rbf_full.npy
✓ K_laplacian_train.npy
✓ K_laplacian_full.npy
✓ K_poly2_train.npy
✓ K_poly2_full.npy
✓ K_poly3_train.npy
✓ K_poly3_full.npy
✓ ocsvm_quantum.pkl
✓ ocsvm_rbf.pkl
✓ ocsvm_laplacian.pkl
✓ ocsvm_poly2.pkl
✓ ocsvm_poly3.pkl
✓ train_scores.pkl
✓ thresholds.json
✓ threshold_summary.json

✓ All required files present

=== Saved Artifacts Summary ===

1. Feature Scaling:
   - scaler.pkl (MinMaxScaler fitted on training cycles 1-20)
   - scaling_params.json (feature ranges, target [0, π])

2. Quantum Kernel:
   - quantum_kernel_params.json (8 qubits, depth 2, ZZ/Pauli)
   - K_quantum_train.npy (20×20)
   - K_quantum_full.npy (1241×20)

3. Baseline Kernels:
   - baseline_kernel_params.json (RBF γ=0.041460, etc.)
   - K_rbf_train.npy, K_rbf_