In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import yaml
import pandas as pd
import numpy as np
import tensorflow as tf
from pathlib import Path
from time import strftime
from shutil import rmtree
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("TKAgg", force=True)
%matplotlib inline
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [None]:
from rtapipe.lib.utils.misc import dotdict
from rtapipe.lib.dataset.data_manager import DataManager
from rtapipe.lib.datasource.Photometry3 import OnlinePhotometry, SimulationParams
from rtapipe.lib.plotting.plotting import plot_sequences
from rtapipe.lib.standardanalysis.li_ma import LiMa
from rtapipe.lib.models.anomaly_detector_builder import AnomalyDetectorBuilder
from rtapipe.lib.evaluation.pval import get_pval_table, get_threshold_for_sigma, get_sigma_from_pvalue, get_sigma_for_ts_array

## Loading the test dataset

In [None]:
output_dir = "./logs/test_models_dataset_itime_5_a_out"

In [None]:
data_manager = DataManager("./logs/test_models_dataset_itime_5_a_out")

In [None]:
dataset_id="test_itime_5_a_tsl_5_nbins_3"

In [None]:
features_names = ["EB_0.04-0.117","EB_2-0.117-0.342","EB_0.342-1"]

In [None]:
test_set_size = 1

In [None]:
dataset_folder = "/data01/homes/baroncelli/phd/rtapipe/scripts/ml/dataset_generation/test/itime_5_a/fits_data"
fits_files = DataManager.load_fits_data(dataset_folder, limit=5)
sim_params = SimulationParams(runid="run0406_ID000126", onset=0, emin=0.04, emax=1, tmin=0, tobs=500, offset=0.5, irf="North_z40_5h_LST", roi=2.5, caldb="prod5-v0.1", simtype="grb")

In [None]:
multiple_templates = False
add_target_region = True
integration_time = 5
number_of_energy_bins = 3
tsl = 100
threads = 30
normalize = True
#data_manager.transform_to_timeseries(fits_files, sim_params, add_target_region, integration_time=integration_time, number_of_energy_bins=number_of_energy_bins, tsl=tsl, normalize=normalize, threads=threads, multiple_templates=multiple_templates)
data_manager.load_saved_data(integration_time, tsl)
data_manager.load_scaler("/data01/homes/baroncelli/phd/rtapipe/notebooks/logs/train_models_new_data_manager_out/fitted_scaler.pickle")
test_x, test_y = data_manager.get_test_set(verbose=True, template="run0406_ID000126", onset=250, integration_time=5, sub_window_size=5, stride=1)

In [None]:
data_manager.data["run0406_ID000126"].shape

## Plot the timeseries

In [None]:
data_manager.plot_timeseries("run0406_ID000126", data_manager.data["run0406_ID000126"], 1, sim_params, output_dir, labels=features_names)

## Plot some sub-sequences

In [None]:
fig= plt.figure()
for i in range(3):
    plt.plot(data_manager.data["run0406_ID000126"][0,:,i])
    fig.savefig("./logs/test_models_dataset_itime_5_a_out/save_file_name.png")
plt.close()

In [None]:
for j in range(40,60):
    fig,ax = plt.subplots(1,1)
    ax.plot(test_x[j,:,0])
    ax.plot(test_x[j,:,1])
    ax.plot(test_x[j,:,2])
    ax.set_ylim(0, 0.5)
    fig.savefig(f"./logs/test_models_dataset_itime_5_a_out/save_file_name_{j}.png")
    plt.close()


# Loading the models

In [None]:
with open("./trained_models.yaml", "r") as f:
    try:
        configs = yaml.safe_load(f)
    except yaml.YAMLError as exc:
        print(exc)
configs = [dotdict(c) for c in configs["models"]]

# Evaluation metrics

In [None]:
for config in configs:
    print(f"\n\n************** Evaluating {config.name} patience={config.patience} **************")
    config.ad = AnomalyDetectorBuilder.getAnomalyDetector(name=config.name, timesteps=config.timesteps, nfeatures=config.nfeatures, load_model="True", training_epoch_dir=config.path, training=False)
    metrics = config.ad.evaluate(test_x, test_y)
    for k,v in metrics.items():
        print(k,v)
    print("detection_delay:",config.ad.detection_delay(test_y, config.ad.predict(test_x), test_set_size, config.timesteps))      

In [None]:
for config in [c for c in configs if c.patience == 5]:
    print(f"\n\n************** Predictions with {config.name} patience={config.patience} **************")
    ad = AnomalyDetectorBuilder.getAnomalyDetector(name=config.name, timesteps=config.timesteps, nfeatures=config.nfeatures, load_model="True", training_epoch_dir=config.path, training=False)
    ad.plot_predictions(test_x, test_y, max_plots=999, features_names=features_names, epoch=config.epoch, outputDir=output_dir, figName=f"{config.name}_patience_{config.patience}")

# Evaluation metrics with sigma=5 threshold

In [None]:
SIGMA_THRESHOLD=5

In [None]:
for config in configs:
    print(f"\n\n************** Predictions with {config.name} patience={config.patience} **************")
    config.ad = AnomalyDetectorBuilder.getAnomalyDetector(name=config.name, timesteps=config.timesteps, nfeatures=config.nfeatures, load_model="True", training_epoch_dir=config.path, training=False)
    if not config.pval_path:
        print("Pvalues not found. Skipping..")
        config.ad.threshold = None
        continue
    pval_table = get_pval_table(config.pval_path, SIGMA_THRESHOLD, show=True)
    sigma_threshold = get_threshold_for_sigma(pval_table, SIGMA_THRESHOLD)
    config.ad.threshold = sigma_threshold
    metrics = config.ad.evaluate(test_x, test_y)
    for k,v in metrics.items():
        print(k,v)
    print("detection_delay:",config.ad.detection_delay(test_y, config.ad.predict(test_x), test_set_size, config.timesteps)) 

In [None]:
for config in [c for c in configs if c.patience == 5]:
    print(f"\n\n************** Predictions with {config.name} patience={config.patience} **************")
    ad = AnomalyDetectorBuilder.getAnomalyDetector(name=config.name, timesteps=config.timesteps, nfeatures=config.nfeatures, load_model="True", training_epoch_dir=config.path, training=False)
    ad.plot_predictions(test_x, test_y, max_plots=999, features_names=features_names, epoch=config.epoch, outputDir=output_dir, figName=f"{config.name}_patience_{config.patience}")

# Significance table

In [None]:
model_config = None
for i, config in enumerate(configs):
    if config.name == "AnomalyDetector_cnn_l2_u32" and config.patience == 5:
        break
model_config = configs[i]

In [None]:
pvalues_table = get_pval_table(model_config.pval_path)

In [None]:
pvalues_table

In [None]:
fig,ax=plt.subplots(1,1)
from scipy import interpolate
f = interpolate.interp1d(pvalues_table["threshold"], pvalues_table["pvalue"])
_=ax.plot(pvalues_table["pvalue"].sort_values(ascending=True), pvalues_table["sigma"])
_=ax.scatter(pvalues_table["pvalue"].sort_values(ascending=True), get_sigma_from_pvalue(f(pvalues_table["threshold"])), color="y")
ax.set_xscale("log")
plt.show()

In [None]:
data = {}
templates = ["run0406_ID000126"]

for template in templates:
    test_x, test_y = data_manager.get_test_set(template=template, onset=250, integration_time=5, sub_window_size=5, stride=1)
    model_config.ad.predict(test_x)
    reconstruction_errors = model_config.ad.get_reconstruction_errors()
    pvalues_table = get_pval_table(model_config.pval_path)
    significances = get_sigma_for_ts_array(pvalues_table, reconstruction_errors, verbose=False)
    data[template] = significances

sigma_table = pd.DataFrame(data, index=DataManager.get_tbin_of_sequences(96, 5, 5, 1))
sigma_table

In [None]:
type(sigma_table.index.values)

In [None]:
sigma_table.index.values

In [None]:
li_ma_data = {}
for template in tqdm(templates):
    lima_df = LiMa.detect("binned", DataManager.get_fits_from_template(fits_files, template), temporal_bins=sigma_table.index.values, tobs=500, sigma_gt=0)
    li_ma_data[template] = lima_df["sigma"].values

In [None]:
li_ma_data["run0406_ID000126"]

In [None]:
matplotlib.use("TKAgg", force=True)
%matplotlib inline
from rtapipe.lib.plotting.PlotConfig import PlotConfig
pc = PlotConfig()
fig, ax = plt.subplots(1,1, figsize=pc.fig_size)
ax.plot(sigma_table.index.values, sigma_table["run0406_ID000126"], marker='o', linestyle='--', label="Anomaly Detection")
ax.scatter(sigma_table.index.values, li_ma_data["run0406_ID000126"], marker='x', label="Li&Ma")
ax.tick_params(axis='y', labelsize=15, width=2, grid_alpha=0.5)
ax.tick_params(axis='x', which='major', labelsize=10)
fig.suptitle("Detection plot")
ax.set_title("Template run0406_ID000126")
ax.set_ylabel("Sigma")
ax.set_xlabel("Timeseries")
plt.axvline(x = 45, color = 'grey', linestyle="-.")
plt.axhline(y = 5, color = 'red', linestyle="-.")
mask = [True if i%2==0 else False for i in range(len(sigma_table.index.values))]
ax.set_xticks(range(0, len(sigma_table.index.values), 2), sigma_table.index.values[mask], rotation=45)
ax.legend()
fig.savefig(Path(output_dir).joinpath("detection_plot.png"), dpi=pc.dpi)
plt.show()