In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import yaml
import pandas as pd
import numpy as np
import tensorflow as tf
from pathlib import Path
from time import strftime
from shutil import rmtree
import matplotlib.pyplot as plt

In [2]:
from rtapipe.lib.utils.misc import dotdict
from rtapipe.lib.dataset.data_manager import DataManager
from rtapipe.lib.datasource.Photometry3 import OnlinePhotometry, SimulationParams
from rtapipe.lib.plotting.plotting import plot_sequences
from rtapipe.lib.models.anomaly_detector_builder import AnomalyDetectorBuilder
from rtapipe.lib.evaluation.pval import get_pval_table, get_threshold_for_sigma

# Loading the test dataset

In [None]:
output_dir = "./logs/test_models_dataset_itime_5_b_out"

In [3]:
data_manager = DataManager(output_dir)

In [4]:
dataset_id="test_itime_5_b_tsl_5_nbins_3"

In [11]:
test_set_size = 10

In [5]:
dataset_folder = "/data01/homes/baroncelli/phd/rtapipe/scripts/ml/dataset_generation/test/itime_5_b/fits_data"
fits_files = DataManager.load_fits_data(dataset_folder, limit=10)
sim_params = SimulationParams(runid="run0406_ID000126", onset=0, emin=0.04, emax=1, tmin=0, tobs=500, offset=0.5, irf="North_z40_5h_LST", roi=2.5, caldb="prod5-v0.1", simtype="grb")

Loaded 10 files


In [6]:
test_set_size = 10

In [7]:
multiple_templates = False
add_target_region = True
integration_time = 5
number_of_energy_bins = 3
tsl = 100
threads = 30
normalize = True
data_manager.transform_to_timeseries(fits_files, sim_params, add_target_region, integration_time=integration_time, number_of_energy_bins=number_of_energy_bins, tsl=tsl, normalize=normalize, threads=threads, multiple_templates=multiple_templates)
#data_manager.load_saved_data("run0406_ID000126", 5, 5)
data_manager.load_scaler("/data01/homes/baroncelli/phd/rtapipe/notebooks/logs/train_models_new_data_manager_out/fitted_scaler.pickle")
test_x, test_y = data_manager.get_test_set(template="run0406_ID000126", onset=250, integration_time=5, sub_window_size=5, stride=1)

[2022-11-16 12:09:02.707432] Preconfiguring regions. Normalization: True - Template: run0406_ID000126
[2022-11-16 12:09:08.111753] Found 85 regions and 1 target regions


100%|███████████████████████████████████████████| 10/10 [01:13<00:00,  7.36s/it]

[2022-11-16 12:10:21.755672] Saved data to logs/test_models_dataset_itime_5_b_out
Pivot index:  50
[2022-11-16 12:10:21.764600] Loaded 10 timeseries from template run0406_ID000126.
Single file shape before sub-windowing: (100, 3). Single file shape after sub-windowing: (96, 5, 3)
[2022-11-16 12:10:21.764682] test_x shape: (960, 5, 3)
[2022-11-16 12:10:21.764711] test_y shape: (960,)





In [None]:
data_manager.data["run0406_ID000126"].shape

## Plot some timeseries

In [8]:
features_names = ["EB_0.04-0.117","EB_2-0.117-0.342","EB_0.342-1"]

In [None]:
data_manager.plot_timeseries("run0406_ID000126", data_manager.data["run0406_ID000126"], 10, sim_params, "./logs/test_models_dataset_itime_5_b_out", labels=features_names)

# Loading the models

In [23]:
with open("./trained_models.yaml", "r") as f:
    try:
        configs = yaml.safe_load(f)
    except yaml.YAMLError as exc:
        print(exc)
configs = [dotdict(c) for c in configs["models"]]

# Evaluation metrics

In [12]:
for config in configs:
    print(f"\n\n************** Evaluating {config.name} patience={config.patience} **************")
    ad = AnomalyDetectorBuilder.getAnomalyDetector(name=config.name, timesteps=config.timesteps, nfeatures=config.nfeatures, load_model="True", training_epoch_dir=config.path, training=False)
    metrics = ad.evaluate(test_x, test_y)
    for k,v in metrics.items():
        print(k,v)
    print("detection_delay:",ad.detection_delay(test_y, ad.predict(test_x), test_set_size, config.timesteps))



************** Evaluating AnomalyDetector_cnn_l2_u32 patience=10 **************
AnomalyDetector_cnn_l2_u32 - input shape: (5,3)
accuracy 0.859375
precision 1.0
recall 0.73
f1 0.8439306358381503
roc_auc 0.865
confusion_matrix [[460, 0], [135, 365]]
false_positive_rate 0.0
detection_delay: 5.5


************** Evaluating AnomalyDetector_rnn_l2_u32 patience=10 **************
AnomalyDetector_rnn_l2_u32 - input shape: (5,3)
accuracy 0.9020833333333333
precision 1.0
recall 0.812
f1 0.8962472406181016
roc_auc 0.906
confusion_matrix [[460, 0], [94, 406]]
false_positive_rate 0.0
detection_delay: 4.2


************** Evaluating AnomalyDetector_cnn_l2_u32 patience=5 **************
AnomalyDetector_cnn_l2_u32 - input shape: (5,3)
accuracy 0.8354166666666667
precision 1.0
recall 0.684
f1 0.8123515439429928
roc_auc 0.8420000000000001
confusion_matrix [[460, 0], [158, 342]]
false_positive_rate 0.0
detection_delay: 5.5


************** Evaluating AnomalyDetector_rnn_l2_u32 patience=5 **************
A

# Predictions

In [36]:
for config in [c for c in configs if c.patience == 5]:
    print(f"\n\n************** Predictions with {config.name} patience={config.patience} **************")
    ad = AnomalyDetectorBuilder.getAnomalyDetector(name=config.name, timesteps=config.timesteps, nfeatures=config.nfeatures, load_model="True", training_epoch_dir=config.path, training=False)
    ad.plot_predictions(test_x, test_y, max_plots=999, features_names=features_names, epoch=config.epoch, outputDir=output_dir, figName=f"{config.name}_patience_{config.patience}")



************** Predictions with AnomalyDetector_cnn_l2_u32 patience=5 **************
AnomalyDetector_cnn_l2_u32 - input shape: (5,3)


  6%|██▍                                       | 11/192 [00:12<03:18,  1.10s/it]


KeyboardInterrupt: 

# P-value treshold

In [14]:
for config in configs:
    print(f"\n\n************** Predictions with {config.name} patience={config.patience} **************")
    ad = AnomalyDetectorBuilder.getAnomalyDetector(name=config.name, timesteps=config.timesteps, nfeatures=config.nfeatures, load_model="True", training_epoch_dir=config.path, training=False)
    if config.pval_path is None:
        print("Pvalues not found. Skipping..")
        continue
    pval_table = get_pval_table(config.pval_path, 4.5, show=True)
    threshold_5s = get_threshold_for_sigma(pval_table, 5)
    print("5sigma threshold=",threshold_5s)
    ad.threshold = threshold_5s
    metrics = ad.evaluate(test_x, test_y)
    for k,v in metrics.items():
        print(k,v)
    print("detection_delay:",ad.detection_delay(test_y, ad.predict(test_x), test_set_size, config.timesteps))    



************** Predictions with AnomalyDetector_cnn_l2_u32 patience=10 **************
AnomalyDetector_cnn_l2_u32 - input shape: (5,3)
pval is np.inf, returning 0
    threshold  threshold_err        pvalue    pvalue_err  sigma
55   0.103781       0.000934  3.121724e-06  3.813793e-07  4.518
56   0.105649       0.000934  2.935352e-06  3.698196e-07  4.531
57   0.107518       0.000934  2.702388e-06  3.548409e-07  4.548
58   0.109387       0.000934  2.283052e-06  3.261503e-07  4.584
59   0.111256       0.000934  1.817123e-06  2.909725e-07  4.631
60   0.113125       0.000934  1.630751e-06  2.756473e-07  4.654
61   0.114993       0.000934  1.304601e-06  2.465464e-07  4.699
62   0.116862       0.000934  1.211415e-06  2.375781e-07  4.715
63   0.118731       0.000934  1.118229e-06  2.282576e-07  4.731
64   0.120600       0.000934  9.784508e-07  2.135155e-07  4.758
65   0.122468       0.000934  7.454863e-07  1.863716e-07  4.812
66   0.124337       0.000934  6.988934e-07  1.804535e-07  4.825
67  

  _warn_prf(average, modifier, msg_start, len(result))
  "false_positive_rate" : cm[0][1] / (cm[0][1] + cm[1][1]),


accuracy 0.4791666666666667
precision 0.0
recall 0.0
f1 0.0
roc_auc 0.5
confusion_matrix [[460, 0], [500, 0]]
false_positive_rate nan
detection_delay: 48.0


************** Predictions with AnomalyDetector_rnn_l2_u32 patience=10 **************
AnomalyDetector_rnn_l2_u32 - input shape: (5,3)
pval is np.inf, returning 0
    threshold  threshold_err        pvalue    pvalue_err  sigma
53   0.089710       0.000838  2.916949e-06  1.681302e-07  4.532
54   0.091387       0.000838  2.490551e-06  1.553563e-07  4.566
55   0.093064       0.000838  2.190135e-06  1.456856e-07  4.592
56   0.094741       0.000838  1.918791e-06  1.363625e-07  4.620
57   0.096418       0.000838  1.686210e-06  1.278312e-07  4.647
58   0.098094       0.000838  1.521465e-06  1.214261e-07  4.668
59   0.099771       0.000838  1.308266e-06  1.125976e-07  4.699
60   0.101448       0.000838  1.172594e-06  1.065995e-07  4.721
61   0.103125       0.000838  1.065995e-06  1.016386e-07  4.740
62   0.104802       0.000838  9.206318e-

  _warn_prf(average, modifier, msg_start, len(result))
  "false_positive_rate" : cm[0][1] / (cm[0][1] + cm[1][1]),


accuracy 0.4791666666666667
precision 0.0
recall 0.0
f1 0.0
roc_auc 0.5
confusion_matrix [[460, 0], [500, 0]]
false_positive_rate nan
detection_delay: 48.0


************** Predictions with AnomalyDetector_cnn_l2_u32 patience=5 **************
AnomalyDetector_cnn_l2_u32 - input shape: (5,3)
Pvalues not found. Skipping..


************** Predictions with AnomalyDetector_rnn_l2_u32 patience=5 **************
AnomalyDetector_rnn_l2_u32 - input shape: (5,3)
Pvalues not found. Skipping..


# Distribution of reco errors on test set

In [None]:
from rtapipe.lib.plotting.PlotConfig import PlotConfig
pc = PlotConfig()

In [None]:
"""
Refactored in 
plotting.struction_errors_distribution()


"""
fig, ax = plt.subplots(1,1,figsize=pc.fig_size)
ax.hist(recostructions, bins=50, **pc.get_histogram_colors())  
fig.suptitle("Distribution of the reconstruction erros", fontsize=pc.fig_suptitle_size)
ax.set_title("Test set", fontsize=pc.fig_title_size)
ax.set_xlabel("Recostruction errors")
ax.set_ylabel("Counts (log)")
ax.set_yscale('log')
fig.savefig("./logs/test_models_dataset_itime_5_b_out/reco_distributions.png")

In [None]:
recostructions.shape