In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import json
import tensorflow as tf
import argparse
import numpy as np
from pathlib import Path
from time import strftime
from shutil import rmtree
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
from rtapipe.lib.dataset.data_manager import DataManager
from rtapipe.lib.datasource.Photometry3 import OnlinePhotometry, SimulationParams
from rtapipe.lib.models.anomaly_detector_builder import AnomalyDetectorBuilder
from rtapipe.scripts.ml.offline.callbacks import CustomLogCallback
from rtapipe.lib.evaluation.custom_mse import CustomMSE
from rtapipe.lib.plotting.plotting import plot_sequences, loss_plot

## Configuration

In [3]:
output_dir = "./logs/train_models_new_data_manager_out/"

In [4]:
dataset_folder = "/data01/homes/baroncelli/phd/rtapipe/scripts/ml/dataset_generation/train/North_z40_5h_LST/train_set_c/fits_data"

In [5]:
TRAINING_DATA_DROP=80

In [6]:
SCALER_TYPE="robust"

In [7]:
SCALER_TYPE="minmax"

In [8]:
features_names = ["EB_0.04-0.117","EB_2-0.117-0.342","EB_0.342-1"]

In [9]:
fits_files = DataManager.load_fits_data(dataset_folder, limit=1)

Loaded 1 files


In [10]:
sim_params = SimulationParams(runid="run0406_ID000126", onset=0, emin=0.04, emax=1, tmin=0, tobs=18000, offset=0.5, irf="North_z40_5h_LST", roi=2.5, caldb="prod5-v0.1", simtype="bkg")

In [11]:
dataset_id="train_set_c_tsl_5_nbins_3"

In [12]:
multiple_templates = False
add_target_region = False
integration_time = 5
number_of_energy_bins = 3
tsl = 3600
threads = 30
normalize = True
data_manager = DataManager(output_dir)
#data_manager.transform_to_timeseries(fits_files, sim_params, add_target_region, integration_time=integration_time, number_of_energy_bins=number_of_energy_bins, tsl=tsl, normalize=normalize, threads=threads, multiple_templates=multiple_templates)
data_manager.load_saved_data(5, 3600) # <--- LOAD ME!

[2023-01-26 16:28:25.097603] Loaded data from logs/train_models_new_data_manager_out/data_cache. Loaded 1 templates.


In [13]:
assert data_manager.data["notemplate"].shape == (85, 3600, 3)

In [14]:
train_x, train_y , val_x, val_y = data_manager.get_train_set("notemplate", sub_window_size=5, stride=5, validation_split=80, scaler_type=SCALER_TYPE)

[2023-01-26 16:28:25.105223] Extracting subsequences of 5 points with stride 5 from 85 time series
[2023-01-26 16:28:25.281770] Extracted 61200 subsequences
[2023-01-26 16:28:25.284774] Train set shape: (48960, 5, 3) - Validation set shape: (12240, 5, 3)
[2023-01-26 16:28:25.284809] Data will be scaled to 0-1


In [15]:
drop_train = int(train_x.shape[0] - train_x.shape[0]*TRAINING_DATA_DROP / 100)
drop_val = int(val_x.shape[0] - val_x.shape[0]*TRAINING_DATA_DROP / 100)

train_x = train_x[:drop_train, :, :]
train_y = train_y[:drop_train]
val_x = val_x[:drop_val, :, :]
val_y = val_y[:drop_val]

print(train_x.shape, train_y.shape)
print(val_x.shape, val_y.shape)

(9792, 5, 3) (9792,)
(2448, 5, 3) (2448,)


In [16]:
#import matplotlib
#matplotlib.use("TKAgg", force=True)
#%matplotlib inline
#plot_sequences(train_x[0:5], scaled=True, labels=["first sample of validation set"], features_names=features_names, showFig=True, saveFig=True)

In [17]:
model_names = AnomalyDetectorBuilder.getModelsName()

In [18]:
model_names

['AnomalyDetector_lstm_l2_u8',
 'AnomalyDetector_lstm_l2_u32',
 'AnomalyDetector_lstm_l2_u128',
 'AnomalyDetector_lstm_l4_u8',
 'AnomalyDetector_lstm_l4_u32',
 'AnomalyDetector_lstm_l4_u128',
 'AnomalyDetector_rnn_l2_u8',
 'AnomalyDetector_rnn_l2_u32',
 'AnomalyDetector_rnn_l2_u128',
 'AnomalyDetector_rnn_l4_u8',
 'AnomalyDetector_rnn_l4_u32',
 'AnomalyDetector_rnn_l4_u128',
 'AnomalyDetector_cnn_l2_u8',
 'AnomalyDetector_cnn_l2_u32',
 'AnomalyDetector_cnn_l2_u128']

In [19]:
model_names = [model_name for model_name in model_names if "lstm" in model_name]
model_names = ["AnomalyDetector_cnn_l2_u32", "AnomalyDetector_rnn_l2_u32"]#, "AnomalyDetector_lstm_l2_u32"]

In [20]:
EARLY_STOPPING_PATIENCE=5
EPOCHS=10

In [21]:
timesteps = train_x[0].shape[0]
nfeatures = train_x[0].shape[1]
batch_size = 32

outDirRoot = Path("./").parent.resolve().joinpath(f"run_{strftime('%Y%m%d-%H%M%S')}")

for model_name in model_names:
    
    print(f"\n\n\n********************* {model_name} training *********************\n\n")
    outDirBase = outDirRoot.joinpath(f"model_{model_name}_dataset_{dataset_id}_tsl_{tsl}")
    outDirBase.mkdir(parents=True, exist_ok=True)
    data_manager.store_scaler(integration_time, tsl, SCALER_TYPE, outDirBase)
    with open(outDirBase.joinpath('dataset_params.json'), 'w') as handle:
        json.dump({"id":dataset_id, "path":dataset_folder, "runid": "notemplate", "itime":integration_time, "tsl":5, "normalized":True, "delay":0, "offset":0}, handle)

    
    # Callbacks 
    clc = CustomLogCallback(
            [1, 5, 10, 50, 100 ,200],
            validation_data=(val_x, val_y), 
            out_dir_root=outDirBase, 
            wandb_run=None, 
            metadata={"dataset_id": dataset_id, "model": model_name, "training": "heavy"}
    )
    ea = EarlyStopping(monitor="val_loss", patience=EARLY_STOPPING_PATIENCE, mode="min")
    callbacks = [
        ea, clc
    ]
    
    anomalyDetector = AnomalyDetectorBuilder.getAnomalyDetector(model_name, timesteps, nfeatures)
    anomalyDetector.model.compile(optimizer='adam', loss=CustomMSE(nfeatures, output_dir=outDirBase)) # 
    anomalyDetector.model.summary()
    anomalyDetector.store_parameters(outDirBase)

    history = anomalyDetector.model.fit(train_x, train_x, verbose=0, epochs=EPOCHS, batch_size=batch_size, validation_data=(val_x, val_x), callbacks=callbacks)
    clc.on_epoch_end(None, force=True)
    loss_plot(history.history["loss"], history.history["val_loss"], model_name=model_name, title=f"Training loss", outputDir=outDirBase, figName="train_val_loss.svg", showFig=False)
    loss_plot(history.history["loss"], history.history["val_loss"], model_name=model_name, title=f"Training loss", outputDir=outDirBase, figName="train_val_loss.svg", showFig=False)




********************* AnomalyDetector_cnn_l2_u32 training *********************


Storing scaler to /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-162825/model_AnomalyDetector_cnn_l2_u32_dataset_train_set_c_tsl_5_nbins_3_tsl_3600/fitted_scaler_minmax_itime_5_tsl_3600.pickle
AnomalyDetector_cnn_l2_u32 - input shape: (5,3)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 3, 32)             320       
_________________________________________________________________
dropout (Dropout)            (None, 3, 32)             0         
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 2, 32)             0         
_________________________________________________________________
conv1d_transpose (Conv1DTran (None, 5, 3)              291       
_______________________________________________