In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import yaml
import pandas as pd
import numpy as np
import tensorflow as tf
from pathlib import Path
from time import strftime
from shutil import rmtree
import matplotlib.pyplot as plt

In [None]:
from rtapipe.lib.utils.misc import dotdict
from rtapipe.lib.dataset.data_manager import DataManager
from rtapipe.lib.datasource.Photometry3 import OnlinePhotometry, SimulationParams
from rtapipe.lib.plotting.plotting import plot_sequences
from rtapipe.lib.models.anomaly_detector_builder import AnomalyDetectorBuilder
from rtapipe.lib.evaluation.pval import get_pval_table, get_threshold_for_sigma

# Loading the test dataset

In [3]:
output_dir = "./logs/test_models_dataset_itime_5_b_out"

In [4]:
data_manager = DataManager(output_dir)

In [5]:
dataset_id="test_itime_5_b_tsl_5_nbins_3"

In [6]:
test_set_size = 10

In [7]:
dataset_folder = "/data01/homes/baroncelli/phd/rtapipe/scripts/ml/dataset_generation/test/itime_5_b/fits_data"
fits_files = DataManager.load_fits_data(dataset_folder, limit=10)
sim_params = SimulationParams(runid="run0406_ID000126", onset=0, emin=0.04, emax=1, tmin=0, tobs=500, offset=0.5, irf="North_z40_5h_LST", roi=2.5, caldb="prod5-v0.1", simtype="grb")

Loaded 10 files


In [8]:
test_set_size = 10

In [9]:
multiple_templates = False
add_target_region = True
integration_time = 5
number_of_energy_bins = 3
tsl = 100
threads = 30
normalize = True
data_manager.transform_to_timeseries(fits_files, sim_params, add_target_region, integration_time=integration_time, number_of_energy_bins=number_of_energy_bins, tsl=tsl, normalize=normalize, threads=threads, multiple_templates=multiple_templates)
#data_manager.load_saved_data("run0406_ID000126", 5, 5)
data_manager.load_scaler("/data01/homes/baroncelli/phd/rtapipe/notebooks/logs/train_models_new_data_manager_out/fitted_scaler.pickle")
test_x, test_y = data_manager.get_test_set(template="run0406_ID000126", onset=250, integration_time=5, sub_window_size=5, stride=1, verbose=True)

[2022-11-17 15:09:52.709557] Preconfiguring regions. Normalization: True - Template: run0406_ID000126
[2022-11-17 15:09:58.796567] Found 85 regions and 1 target regions


100%|███████████████████████████████████████████| 10/10 [01:05<00:00,  6.59s/it]

[2022-11-17 15:11:04.706172] Saved data to logs/test_models_dataset_itime_5_b_out/data_cache
Pivot index:  50
windows_before_pivot:  (46, 5, 3)
windows_after_pivot:  (50, 5, 3)
windows_before_pivot:  (46, 5, 3)
windows_after_pivot:  (50, 5, 3)
windows_before_pivot:  (46, 5, 3)
windows_after_pivot:  (50, 5, 3)
windows_before_pivot:  (46, 5, 3)
windows_after_pivot:  (50, 5, 3)
windows_before_pivot:  (46, 5, 3)
windows_after_pivot:  (50, 5, 3)
windows_before_pivot:  (46, 5, 3)
windows_after_pivot:  (50, 5, 3)
windows_before_pivot:  (46, 5, 3)
windows_after_pivot:  (50, 5, 3)
windows_before_pivot:  (46, 5, 3)
windows_after_pivot:  (50, 5, 3)
windows_before_pivot:  (46, 5, 3)
windows_after_pivot:  (50, 5, 3)
windows_before_pivot:  (46, 5, 3)
windows_after_pivot:  (50, 5, 3)
[2022-11-17 15:11:04.716533] Loaded 10 timeseries from template run0406_ID000126.
Single file shape before sub-windowing: (100, 3). Single file shape after sub-windowing: (96, 5, 3)
[2022-11-17 15:11:04.716575] test_x sh




In [None]:
data_manager.data["run0406_ID000126"].shape

## Plot some timeseries

In [None]:
features_names = ["EB_0.04-0.117","EB_2-0.117-0.342","EB_0.342-1"]

In [None]:
data_manager.plot_timeseries("run0406_ID000126", data_manager.data["run0406_ID000126"], 10, sim_params, "./logs/test_models_dataset_itime_5_b_out", labels=features_names)

# Loading the models

In [None]:
with open("./trained_models.yaml", "r") as f:
    try:
        configs = yaml.safe_load(f)
    except yaml.YAMLError as exc:
        print(exc)
configs = [dotdict(c) for c in configs["models"]]

# Evaluation metrics

In [None]:
for config in configs:
    print(f"\n\n************** Evaluating {config.name} patience={config.patience} **************")
    ad = AnomalyDetectorBuilder.getAnomalyDetector(name=config.name, timesteps=config.timesteps, nfeatures=config.nfeatures, load_model="True", training_epoch_dir=config.path, training=False)
    metrics = ad.evaluate(test_x, test_y)
    for k,v in metrics.items():
        print(k,v)
    print("detection_delay:",ad.detection_delay(test_y, ad.predict(test_x), test_set_size, config.timesteps))

# Predictions

In [None]:
for config in [c for c in configs if c.patience == 5]:
    print(f"\n\n************** Predictions with {config.name} patience={config.patience} **************")
    ad = AnomalyDetectorBuilder.getAnomalyDetector(name=config.name, timesteps=config.timesteps, nfeatures=config.nfeatures, load_model="True", training_epoch_dir=config.path, training=False)
    ad.plot_predictions(test_x, test_y, max_plots=999, features_names=features_names, epoch=config.epoch, outputDir=output_dir, figName=f"{config.name}_patience_{config.patience}")

# P-value treshold

In [None]:
for config in configs:
    print(f"\n\n************** Predictions with {config.name} patience={config.patience} **************")
    ad = AnomalyDetectorBuilder.getAnomalyDetector(name=config.name, timesteps=config.timesteps, nfeatures=config.nfeatures, load_model="True", training_epoch_dir=config.path, training=False)
    if not config.pval_path:
        print("Pvalues not found. Skipping..")
        continue
    pval_table = get_pval_table(config.pval_path, 4.5, show=True)
    threshold_5s = get_threshold_for_sigma(pval_table, 5)
    print("5sigma threshold=",threshold_5s)
    ad.threshold = threshold_5s
    metrics = ad.evaluate(test_x, test_y)
    for k,v in metrics.items():
        print(k,v)
    print("detection_delay:",ad.detection_delay(test_y, ad.predict(test_x), test_set_size, config.timesteps))    

# Distribution of reco errors on test set

In [None]:
ad