In [39]:
import logging
import os
import sys
import time
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import polars as pl
import seaborn as sns
from tqdm import tqdm

sys.path.append(os.path.abspath("../.."))
from spectrum.utils.random import set_random_state
from spectrum.models import SRCNN
from spectrum.dataset import SRYaHooDataset

logging.basicConfig(level=logging.INFO)

warnings.filterwarnings("ignore")

sns.set_theme(style="whitegrid")
plt.rcParams.update(
    {
        "axes.edgecolor": "0.3",
        "axes.linewidth": 0.8,
        "font.size": 12,
        "axes.titlesize": 14,
        "axes.labelsize": 12,
        "axes.titleweight": "bold",
        "legend.fontsize": 10,
        "figure.dpi": 120,
        "legend.frameon": False,
    }
)

set_random_state(42)

In [40]:
selected_ids = [4, 17, 33]

results_dir = "../../results/models/sr_cnn"
os.makedirs(results_dir, exist_ok=True)


def find_best_threshold(scores, true_labels, thresholds=None):
    if thresholds is None:
        # use percentiles as candidate thresholds
        thresholds = [np.percentile(scores, p) for p in range(50, 100, 1)]
        # add some extra threshold points
        thresholds.extend([np.percentile(scores, p) for p in [99.5, 99.9]])

    best_f1 = 0
    best_threshold = thresholds[0]
    best_metrics = {}

    for threshold in thresholds:
        pred_labels = (scores > threshold).astype(int)

        # calculate confusion matrix
        TP = ((true_labels == 1) & (pred_labels == 1)).sum()
        FP = ((true_labels == 0) & (pred_labels == 1)).sum()
        TN = ((true_labels == 0) & (pred_labels == 0)).sum()
        FN = ((true_labels == 1) & (pred_labels == 0)).sum()

        # calculate metrics
        accuracy = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0
        precision = TP / (TP + FP) if (TP + FP) > 0 else 0
        recall = TP / (TP + FN) if (TP + FN) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        fnr = FN / (FN + TP) if (FN + TP) > 0 else 0
        fpr = FP / (FP + TN) if (FP + TN) > 0 else 0
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold
            best_metrics = {
                'threshold': threshold,
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1': f1,
                'fnr': fnr,
                'fpr': fpr,
                'tp': TP,
                'fp': FP,
                'tn': TN,
                'fn': FN
            }

    return best_threshold, best_metrics


def process_single_id(dataset_id):
    print(f"processing: {dataset_id}")

    train_data = SRYaHooDataset(
        _id=dataset_id
    )
    test_data = pl.read_csv(f"../../datasets/Yahoo/test/A1/{dataset_id}.csv")
    complete_data = pl.read_csv(f"../../datasets/Yahoo/data/A1Benchmark/{dataset_id}.csv")

    print("  training model...")
    model = SRCNN(epochs=60)
    start_time = time.time()
    model.fit(train_data)
    training_time = time.time() - start_time

    print("  anomaly detection...")
    start_time = time.time()
    scores = model.predict(test_data['value'])
    scoring_time = time.time() - start_time

    train_len = len(pl.read_csv(f"../../datasets/Yahoo/train/A1/{dataset_id}.csv")['value'])
    test_len = len(complete_data) - train_len
    test_true_labels = complete_data["label"][train_len:train_len + test_len].to_numpy()
    scores_array = scores.to_numpy()

    print("  finding best threshold...")
    best_threshold, best_metrics = find_best_threshold(scores_array, test_true_labels)

    complete_values = complete_data["value"].to_numpy()
    complete_labels = complete_data["label"].to_numpy()
    complete_timestamps = complete_data["timestamp"] if "timestamp" in complete_data.columns else range(
        len(complete_values))

    complete_predictions = np.zeros(len(complete_values))
    complete_predictions[train_len:train_len + test_len] = (scores_array > best_threshold).astype(int)

    result_df = pd.DataFrame({
        'timestamp': complete_timestamps,
        'value': complete_values,
        'label': complete_labels,
        'predicted': complete_predictions,
        'anomaly_score': np.concatenate(
            [np.zeros(train_len), scores_array, np.zeros(len(complete_values) - train_len - test_len)])
    })

    output_file = os.path.join(results_dir, f"{dataset_id}.csv")
    result_df.to_csv(output_file, index=False)
    print(f"  results saved to: {output_file}")

    return {
        'id': dataset_id,
        'training_time': training_time,
        'testing_time': scoring_time,
        'total_time': training_time + scoring_time,
        'train_samples': len(train_data),
        'test_samples': test_len,
        'best_threshold': best_threshold,
        **best_metrics
    }


all_results = []
print(f"processing {len(selected_ids)} datasets...")

for selected_id in tqdm(selected_ids, desc="processing"):
    try:
        result = process_single_id(selected_id)
        all_results.append(result)

        print(f"  ID {selected_id} completed:")
        print(f"    best_threshold: {result['best_threshold']:.4f}")
        print(f"    f1: {result['f1']:.4f}")
        print(f"    precision: {result['precision']:.4f}")
        print(f"    recall: {result['recall']:.4f}")
        print(f"    accuracy: {result['accuracy']:.4f}")

    except Exception as e:
        print(f"  processing {selected_id} failed: {str(e)}")
        import traceback

        traceback.print_exc()
        continue

if all_results:
    summary_df = pd.DataFrame(all_results)
    summary_file = os.path.join(results_dir, "sr_cnn.csv")
    summary_df.to_csv(summary_file, index=False)
    print(f"summary results saved to: {summary_file}")

    print("\n" + "=" * 80)
    print("LSTM anomaly detection results")
    print("=" * 80)
    print(f"processed {len(all_results)} datasets")
    print(f"average F1: {summary_df['f1'].mean():.4f} ± {summary_df['f1'].std():.4f}")
    print(f"average precision: {summary_df['precision'].mean():.4f} ± {summary_df['precision'].std():.4f}")
    print(f"average recall: {summary_df['recall'].mean():.4f} ± {summary_df['recall'].std():.4f}")
    print(f"average accuracy: {summary_df['accuracy'].mean():.4f} ± {summary_df['accuracy'].std():.4f}")
    print(f"average training time: {summary_df['training_time'].mean():.2f}s")
    print(f"average scoring time: {summary_df['testing_time'].mean():.2f}s")
    print("=" * 80)

    print("details:")
    display_cols = ['id', 'f1', 'precision', 'recall', 'accuracy', 'best_threshold', 'tp', 'fp', 'tn', 'fn']
    print(summary_df[display_cols].round(4))
else:
    print("no results")

processing 3 datasets...


processing:   0%|          | 0/3 [00:00<?, ?it/s]

processing: 4
  training model...


Epoch 1/60: 0it [00:00, ?it/s]
Epoch 2/60: 0it [00:00, ?it/s]
Epoch 3/60: 0it [00:00, ?it/s]
Epoch 4/60: 0it [00:00, ?it/s]
Epoch 5/60: 0it [00:00, ?it/s]
Epoch 6/60: 0it [00:00, ?it/s]
Epoch 7/60: 0it [00:00, ?it/s]
Epoch 8/60: 0it [00:00, ?it/s]
Epoch 9/60: 0it [00:00, ?it/s]
Epoch 10/60: 0it [00:00, ?it/s]
Epoch 11/60: 0it [00:00, ?it/s]
Epoch 12/60: 0it [00:00, ?it/s]
Epoch 13/60: 0it [00:00, ?it/s]
Epoch 14/60: 0it [00:00, ?it/s]
Epoch 15/60: 0it [00:00, ?it/s]
Epoch 16/60: 0it [00:00, ?it/s]
Epoch 17/60: 0it [00:00, ?it/s]
Epoch 18/60: 0it [00:00, ?it/s]
Epoch 19/60: 0it [00:00, ?it/s]
Epoch 20/60: 0it [00:00, ?it/s]
Epoch 21/60: 0it [00:00, ?it/s]
Epoch 22/60: 0it [00:00, ?it/s]
Epoch 23/60: 0it [00:00, ?it/s]
Epoch 24/60: 0it [00:00, ?it/s]
Epoch 25/60: 0it [00:00, ?it/s]
Epoch 26/60: 0it [00:00, ?it/s]
Epoch 27/60: 0it [00:00, ?it/s]
Epoch 28/60: 0it [00:00, ?it/s]
Epoch 29/60: 0it [00:00, ?it/s]
Epoch 30/60: 0it [00:00, ?it/s]
Epoch 31/60: 0it [00:00, ?it/s]
Epoch 32/60: 0it 

Training time: 0.14 seconds
  anomaly detection...



processing:  33%|███▎      | 1/3 [00:00<00:01,  1.43it/s]

  finding best threshold...
  results saved to: ../../results/models/sr_cnn/4.csv
  ID 4 completed:
    best_threshold: 0.6565
    f1: 0.0833
    precision: 0.0667
    recall: 0.1111
    accuracy: 0.9691
processing: 17
  training model...


Epoch 1/60: 0it [00:00, ?it/s]
Epoch 2/60: 0it [00:00, ?it/s]
Epoch 3/60: 0it [00:00, ?it/s]
Epoch 4/60: 0it [00:00, ?it/s]
Epoch 5/60: 0it [00:00, ?it/s]
Epoch 6/60: 0it [00:00, ?it/s]
Epoch 7/60: 0it [00:00, ?it/s]
Epoch 8/60: 0it [00:00, ?it/s]
Epoch 9/60: 0it [00:00, ?it/s]
Epoch 10/60: 0it [00:00, ?it/s]
Epoch 11/60: 0it [00:00, ?it/s]
Epoch 12/60: 0it [00:00, ?it/s]
Epoch 13/60: 0it [00:00, ?it/s]
Epoch 14/60: 0it [00:00, ?it/s]
Epoch 15/60: 0it [00:00, ?it/s]
Epoch 16/60: 0it [00:00, ?it/s]
Epoch 17/60: 0it [00:00, ?it/s]
Epoch 18/60: 0it [00:00, ?it/s]
Epoch 19/60: 0it [00:00, ?it/s]
Epoch 20/60: 0it [00:00, ?it/s]
Epoch 21/60: 0it [00:00, ?it/s]
Epoch 22/60: 0it [00:00, ?it/s]
Epoch 23/60: 0it [00:00, ?it/s]
Epoch 24/60: 0it [00:00, ?it/s]
Epoch 25/60: 0it [00:00, ?it/s]
Epoch 26/60: 0it [00:00, ?it/s]
Epoch 27/60: 0it [00:00, ?it/s]
Epoch 28/60: 0it [00:00, ?it/s]
Epoch 29/60: 0it [00:00, ?it/s]
Epoch 30/60: 0it [00:00, ?it/s]
Epoch 31/60: 0it [00:00, ?it/s]
Epoch 32/60: 0it 

Training time: 0.14 seconds
  anomaly detection...



processing:  67%|██████▋   | 2/3 [00:01<00:00,  1.40it/s]

  finding best threshold...
  results saved to: ../../results/models/sr_cnn/17.csv
  ID 17 completed:
    best_threshold: 0.2872
    f1: 0.3347
    precision: 0.2360
    recall: 0.5753
    accuracy: 0.5309
processing: 33
  training model...


Epoch 1/60: 0it [00:00, ?it/s]
Epoch 2/60: 0it [00:00, ?it/s]
Epoch 3/60: 0it [00:00, ?it/s]
Epoch 4/60: 0it [00:00, ?it/s]
Epoch 5/60: 0it [00:00, ?it/s]
Epoch 6/60: 0it [00:00, ?it/s]
Epoch 7/60: 0it [00:00, ?it/s]
Epoch 8/60: 0it [00:00, ?it/s]
Epoch 9/60: 0it [00:00, ?it/s]
Epoch 10/60: 0it [00:00, ?it/s]
Epoch 11/60: 0it [00:00, ?it/s]
Epoch 12/60: 0it [00:00, ?it/s]
Epoch 13/60: 0it [00:00, ?it/s]
Epoch 14/60: 0it [00:00, ?it/s]
Epoch 15/60: 0it [00:00, ?it/s]
Epoch 16/60: 0it [00:00, ?it/s]
Epoch 17/60: 0it [00:00, ?it/s]
Epoch 18/60: 0it [00:00, ?it/s]
Epoch 19/60: 0it [00:00, ?it/s]
Epoch 20/60: 0it [00:00, ?it/s]
Epoch 21/60: 0it [00:00, ?it/s]
Epoch 22/60: 0it [00:00, ?it/s]
Epoch 23/60: 0it [00:00, ?it/s]
Epoch 24/60: 0it [00:00, ?it/s]
Epoch 25/60: 0it [00:00, ?it/s]
Epoch 26/60: 0it [00:00, ?it/s]
Epoch 27/60: 0it [00:00, ?it/s]
Epoch 28/60: 0it [00:00, ?it/s]
Epoch 29/60: 0it [00:00, ?it/s]
Epoch 30/60: 0it [00:00, ?it/s]
Epoch 31/60: 0it [00:00, ?it/s]
Epoch 32/60: 0it 

Training time: 0.14 seconds
  anomaly detection...



processing: 100%|██████████| 3/3 [00:02<00:00,  1.43it/s]

  finding best threshold...
  results saved to: ../../results/models/sr_cnn/33.csv
  ID 33 completed:
    best_threshold: 0.5788
    f1: 0.0870
    precision: 0.0455
    recall: 1.0000
    accuracy: 0.9708
summary results saved to: ../../results/models/sr_cnn/sr_cnn.csv

LSTM anomaly detection results
processed 3 datasets
average F1: 0.1683 ± 0.1441
average precision: 0.1160 ± 0.1044
average recall: 0.5622 ± 0.4446
average accuracy: 0.8236 ± 0.2535
average training time: 0.14s
average scoring time: 0.53s
details:
   id      f1  precision  recall  accuracy  best_threshold  tp   fp   tn  fn
0   4  0.0833     0.0667  0.1111    0.9691          0.6565   1   14  689   8
1  17  0.3347     0.2360  0.5753    0.5309          0.2872  84  272  294  62
2  33  0.0870     0.0455  1.0000    0.9708          0.5788   1   21  698   0



