In [4]:
import logging
import os
import sys
import time
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import polars as pl
import seaborn as sns
from tqdm import tqdm

sys.path.append(os.path.abspath("../.."))
from spectrum.config import WINDOW_SIZE
from spectrum.utils import set_random_state
from spectrum.models import LSTM

logging.basicConfig(level=logging.INFO)

warnings.filterwarnings("ignore")

sns.set_theme(style="whitegrid")
plt.rcParams.update(
    {
        "axes.edgecolor": "0.3",
        "axes.linewidth": 0.8,
        "font.size": 12,
        "axes.titlesize": 14,
        "axes.labelsize": 12,
        "axes.titleweight": "bold",
        "legend.fontsize": 10,
        "figure.dpi": 120,
        "legend.frameon": False,
    }
)
set_random_state()

In [5]:
selected_ids = [1, 2, 3]

results_dir = "../../results/models/lstm"
os.makedirs(results_dir, exist_ok=True)


def find_best_threshold(scores, true_labels, thresholds=None):
    # Safety check for NaN/Inf in scores
    if np.isnan(scores).any() or np.isinf(scores).any():
        print("Warning: Scores contain NaN or Inf. Handling them.")
        scores = np.nan_to_num(scores, nan=0.0, posinf=np.max(scores[np.isfinite(scores)]) if np.any(np.isfinite(scores)) else 1.0)

    if thresholds is None:
        # Handle case where scores are all same (e.g. 0)
        if np.min(scores) == np.max(scores):
            thresholds = [scores[0]]
        else:
            # use percentiles as candidate thresholds
            thresholds = [np.percentile(scores, p) for p in range(0, 90, 5)]
            thresholds.extend([np.percentile(scores, p) for p in range(90, 100, 1)])
            thresholds.extend([np.percentile(scores, p) for p in [99.1, 99.3, 99.5, 99.7, 99.9, 99.95, 99.99]])
    
    # Remove duplicates and sort descending
    thresholds = sorted(list(set(thresholds)), reverse=True)

    best_f1 = -1 
    best_threshold = thresholds[0] if len(thresholds) > 0 else 0.0
    
    best_metrics = {
        'threshold': best_threshold,
        'accuracy': 0.0,
        'precision': 0.0,
        'recall': 0.0,
        'f1': 0.0,
        'fnr': 0.0,
        'fpr': 0.0,
        'tp': 0,
        'fp': 0,
        'tn': 0,
        'fn': 0
    }

    # Ensure true_labels is int
    true_labels = true_labels.astype(int)

    for threshold in thresholds:
        pred_labels = (scores > threshold).astype(int)

        # calculate confusion matrix
        TP = ((true_labels == 1) & (pred_labels == 1)).sum()
        FP = ((true_labels == 0) & (pred_labels == 1)).sum()
        TN = ((true_labels == 0) & (pred_labels == 0)).sum()
        FN = ((true_labels == 1) & (pred_labels == 0)).sum()

        # calculate metrics
        accuracy = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0
        precision = TP / (TP + FP) if (TP + FP) > 0 else 0
        recall = TP / (TP + FN) if (TP + FN) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        fnr = FN / (FN + TP) if (FN + TP) > 0 else 0
        fpr = FP / (FP + TN) if (FP + TN) > 0 else 0
        
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold
            best_metrics = {
                'threshold': threshold,
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1': f1,
                'fnr': fnr,
                'fpr': fpr,
                'tp': TP,
                'fp': FP,
                'tn': TN,
                'fn': FN
            }
            
    return best_threshold, best_metrics


def process_single_id(data_id):
    print(f"processing: {data_id}")

    train_data = pl.read_csv(f"../../datasets/Tencent/train/{data_id}.csv")
    test_data = pl.read_csv(f"../../datasets/Tencent/test/{data_id}.csv")

    print("  training model...")
    # output_dims = [1, 2, 3, 4, 5] (features are cols 1-5, col 0 is timestamp)
    model = LSTM(input_size=5, output_dims=list(range(1, 6)), epochs=100)
    start_time = time.time()
    model.fit(train_data.to_numpy())
    training_time = time.time() - start_time

    print("  anomaly detection...")
    start_time = time.time()
    scores = model.predict(test_data.to_numpy())
    scoring_time = time.time() - start_time
    
    # Debug stats
    print(f"  Scores stats: min={scores.min():.6f}, max={scores.max():.6f}, mean={scores.mean():.6f}")

    scores_array = scores

    test_true_labels = test_data["label"].to_numpy()
    
    # Align labels with scores
    labels_start_idx = len(test_true_labels) - len(scores_array)
    aligned_labels = test_true_labels[labels_start_idx:]

    print("  finding best threshold...")
    best_threshold, best_metrics = find_best_threshold(scores_array, aligned_labels)

    # --- Combine Train and Test for "Complete" data ---
    full_df = pl.concat([train_data, test_data])

    if "value" in full_df.columns:
        complete_values = full_df["value"].to_numpy()
    else:
        # Use column 1 (first feature) as representative value
        complete_values = full_df.to_numpy()[:, 1]

    complete_labels = full_df["label"].to_numpy()
    
    if "timestamp" in full_df.columns:
        complete_timestamps = full_df["timestamp"].to_numpy()
    else:
        complete_timestamps = range(len(complete_values))

    complete_predictions = np.zeros(len(complete_values))
    complete_anomaly_scores = np.zeros(len(complete_values))

    # Align predictions with the full data
    pred_start_idx = len(train_data) + (len(test_data) - len(scores_array))
    
    complete_predictions[pred_start_idx:] = (scores_array > best_threshold).astype(int)
    complete_anomaly_scores[pred_start_idx:] = scores_array

    result_df = pd.DataFrame({
        'timestamp': complete_timestamps,
        'value': complete_values,
        'label': complete_labels,
        'predicted': complete_predictions,
        'anomaly_score': complete_anomaly_scores
    })

    output_file = os.path.join(results_dir, f"{data_id}.csv")
    result_df.to_csv(output_file, index=False)
    print(f"  results saved to: {output_file}")

    return {
        'id': data_id,
        'training_time': training_time,
        'testing_time': scoring_time,
        'total_time': training_time + scoring_time,
        'train_samples': len(train_data),
        'test_samples': len(test_data),
        'best_threshold': best_threshold,
        **best_metrics
    }


all_results = []
print(f"processing {len(selected_ids)} datasets...")

for data_id in tqdm(selected_ids, desc="processing"):
    try:
        result = process_single_id(data_id)
        all_results.append(result)

        print(f"  ID {data_id} completed:")
        print(f"    best_threshold: {result['best_threshold']:.4f}")
        print(f"    f1: {result['f1']:.4f}")
        print(f"    precision: {result['precision']:.4f}")
        print(f"    recall: {result['recall']:.4f}")
        print(f"    accuracy: {result['accuracy']:.4f}")

    except Exception as e:
        print(f"  processing {data_id} failed: {str(e)}")
        import traceback

        traceback.print_exc()
        continue

if all_results:
    summary_df = pd.DataFrame(all_results)
    summary_file = os.path.join(results_dir, "lstm.csv")
    summary_df.to_csv(summary_file, index=False)
    print(f"summary results saved to: {summary_file}")

    print("\n" + "=" * 80)
    print("LSTM anomaly detection results")
    print("=" * 80)
    print(f"processed {len(all_results)} datasets")
    print(f"average F1: {summary_df['f1'].mean():.4f} ± {summary_df['f1'].std():.4f}")
    print(f"average precision: {summary_df['precision'].mean():.4f} ± {summary_df['precision'].std():.4f}")
    print(f"average recall: {summary_df['recall'].mean():.4f} ± {summary_df['recall'].std():.4f}")
    print(f"average accuracy: {summary_df['accuracy'].mean():.4f} ± {summary_df['accuracy'].std():.4f}")
    print(f"average training time: {summary_df['training_time'].mean():.2f}s")
    print(f"average scoring time: {summary_df['testing_time'].mean():.2f}s")
    print("=" * 80)

    print("details:")
    display_cols = ['id', 'f1', 'precision', 'recall', 'accuracy', 'best_threshold', 'tp', 'fp', 'tn', 'fn']
    print(summary_df[display_cols].round(4))
else:
    print("no results")

processing 3 datasets...


processing:   0%|          | 0/3 [00:00<?, ?it/s]

processing: 1
  training model...
  anomaly detection...


processing:  33%|███▎      | 1/3 [00:55<01:51, 56.00s/it]

  Scores stats: min=0.000000, max=9.895761, mean=0.002547
  finding best threshold...
  results saved to: ../../results/models/lstm/1.csv
  ID 1 completed:
    best_threshold: 0.0000
    f1: 0.8571
    precision: 0.7500
    recall: 1.0000
    accuracy: 0.9993
processing: 2
  training model...
  anomaly detection...


processing:  67%|██████▋   | 2/3 [01:52<00:56, 56.30s/it]

  Scores stats: min=0.000000, max=9.916924, mean=0.002551
  finding best threshold...
  results saved to: ../../results/models/lstm/2.csv
  ID 2 completed:
    best_threshold: 0.0000
    f1: 0.8571
    precision: 0.7500
    recall: 1.0000
    accuracy: 0.9993
processing: 3
  training model...
  anomaly detection...


processing: 100%|██████████| 3/3 [03:52<00:00, 77.57s/it]

  Scores stats: min=0.000000, max=9.916239, mean=0.002540
  finding best threshold...
  results saved to: ../../results/models/lstm/3.csv
  ID 3 completed:
    best_threshold: 5.7986
    f1: 0.0000
    precision: 0.0000
    recall: 0.0000
    accuracy: 0.9998
summary results saved to: ../../results/models/lstm/lstm.csv

LSTM anomaly detection results
processed 3 datasets
average F1: 0.5714 ± 0.4949
average precision: 0.5000 ± 0.4330
average recall: 0.6667 ± 0.5774
average accuracy: 0.9995 ± 0.0003
average training time: 77.24s
average scoring time: 0.30s
details:
   id      f1  precision  recall  accuracy  best_threshold  tp  fp    tn  fn
0   1  0.8571       0.75     1.0    0.9993          0.0000   9   3  4292   0
1   2  0.8571       0.75     1.0    0.9993          0.0000   9   3  4292   0
2   3  0.0000       0.00     0.0    0.9998          5.7986   0   1  4303   0





In [6]:
def process_synthetic_data():
    print("\n" + "="*40)
    print("Processing Synthetic Dataset...")
    print("="*40)
    
    # Paths
    train_path = "../../datasets/synthetic/train/train.csv"
    test_path = "../../datasets/synthetic/test/test.csv"
    
    if not os.path.exists(train_path):
        print(f"Error: {train_path} not found. Please run preprocess/synthetic.ipynb first.")
        return

    train_data = pl.read_csv(train_path)
    test_data = pl.read_csv(test_path)

    print(f"  Train shape: {train_data.shape}")
    print(f"  Test shape: {test_data.shape}")

    print("  training model...")
    
    # Prepare numeric data for model
    # Drop timestamp and label
    cols_to_drop = ["timestamp", "label"]
    # Select only feature columns
    # Filter columns that are NOT in cols_to_drop
    feature_cols = [c for c in train_data.columns if c not in cols_to_drop]
    
    train_features = train_data.select(feature_cols)
    test_features = test_data.select(feature_cols)
    
    print(f"  Feature columns: {len(feature_cols)}")
    
    input_size = len(feature_cols)
    output_dims = list(range(input_size)) # All features 0..N-1
    
    model = LSTM(input_size=input_size, output_dims=output_dims, epochs=100)
    start_time = time.time()
    model.fit(train_features.to_numpy())
    training_time = time.time() - start_time

    print("  anomaly detection...")
    start_time = time.time()
    scores = model.predict(test_features.to_numpy())
    scoring_time = time.time() - start_time
    
    print(f"  Scores stats: min={scores.min():.6f}, max={scores.max():.6f}, mean={scores.mean():.6f}")

    scores_array = scores
    test_true_labels = test_data["label"].to_numpy()
    
    # Align labels
    labels_start_idx = len(test_true_labels) - len(scores_array)
    aligned_labels = test_true_labels[labels_start_idx:]

    print("  finding best threshold...")
    best_threshold, best_metrics = find_best_threshold(scores_array, aligned_labels)
    
    # Save Results
    results_dir_syn = "../../results/models/lstm_synthetic"
    os.makedirs(results_dir_syn, exist_ok=True)
    
    full_df = pl.concat([train_data, test_data])
    
    # Use first feature (column 1, or feature_cols[0]) for visualization
    # full_df columns: timestamp, features..., label
    # We want the first feature value
    first_feat_col = feature_cols[0]
    complete_values = full_df[first_feat_col].to_numpy()
    
    complete_labels = full_df["label"].to_numpy()
    if "timestamp" in full_df.columns:
        complete_timestamps = full_df["timestamp"].to_numpy()
    else:
        complete_timestamps = range(len(complete_values))
        
    complete_predictions = np.zeros(len(complete_values))
    complete_anomaly_scores = np.zeros(len(complete_values))
    
    pred_start_idx = len(train_data) + (len(test_data) - len(scores_array))
    
    # Ensure bounds
    end_idx = pred_start_idx + len(scores_array)
    complete_predictions[pred_start_idx:end_idx] = (scores_array > best_threshold).astype(int)
    complete_anomaly_scores[pred_start_idx:end_idx] = scores_array
    
    result_df = pd.DataFrame({
        'timestamp': complete_timestamps,
        'value': complete_values,
        'label': complete_labels,
        'predicted': complete_predictions,
        'anomaly_score': complete_anomaly_scores
    })
    
    output_file = os.path.join(results_dir_syn, "synthetic.csv")
    result_df.to_csv(output_file, index=False)
    print(f"  results saved to: {output_file}")
    
    print("\nResults:")
    print(f"  best_threshold: {best_threshold:.4f}")
    for k, v in best_metrics.items():
        if isinstance(v, float):
            print(f"  {k}: {v:.4f}")
        else:
            print(f"  {k}: {v}")

# Run
process_synthetic_data()


Processing Synthetic Dataset...
  Train shape: (2500, 12)
  Test shape: (2500, 12)
  training model...
  Feature columns: 10
  anomaly detection...
  Scores stats: min=0.000000, max=3.955362, mean=0.214955
  finding best threshold...
  results saved to: ../../results/models/lstm_synthetic/synthetic.csv

Results:
  best_threshold: 0.9562
  threshold: 0.9561876654624939
  accuracy: 0.9722
  precision: 0.6933
  recall: 0.5306
  f1: 0.6012
  fnr: 0.4694
  fpr: 0.0096
  tp: 52
  fp: 23
  tn: 2363
  fn: 46
