In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import json
import tensorflow as tf
import argparse
import numpy as np
from pathlib import Path
from time import strftime
from shutil import rmtree
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping
import wandb
from wandb.keras import WandbCallback

In [2]:
from rtapipe.lib.dataset.data_manager import DataManager
from rtapipe.lib.datasource.Photometry3 import OnlinePhotometry, SimulationParams
from rtapipe.lib.models.anomaly_detector_builder import AnomalyDetectorBuilder
from rtapipe.scripts.ml.offline.callbacks import CustomLogCallback
from rtapipe.lib.evaluation.custom_mse import CustomMSE
from rtapipe.lib.plotting.plotting import plot_sequences, loss_plot

## Configuration

In [3]:
output_dir = "./logs/train_models_new_data_manager_itime_1"

In [4]:
dataset_folder = "/data01/homes/baroncelli/phd/rtapipe/scripts/ml/dataset_generation/train/North_z40_5h_LST/itime_5_b/fits_data"

In [5]:
SCALER_TYPE="minmax"

In [6]:
features_names = ["EB_0.04-0.117","EB_2-0.117-0.342","EB_0.342-1"]

In [7]:
fits_files = DataManager.load_fits_data(dataset_folder, limit=10)

Loaded 10 files


In [8]:
sim_params = SimulationParams(runid="run0406_ID000126", onset=0, emin=0.04, emax=1, tmin=0, tobs=500, offset=0.5, irf="North_z40_5h_LST", roi=2.5, caldb="prod5-v0.1", simtype="bkg")

In [9]:
dataset_id="train_itime_1_a_tsl_5_nbins_3"

In [10]:
multiple_templates = False
add_target_region = False
integration_time = 1
number_of_energy_bins = 3
tsl = 500
threads = 30
normalize = True
data_manager = DataManager(output_dir)
#data_manager.transform_to_timeseries(fits_files, sim_params, add_target_region, integration_time=integration_time, number_of_energy_bins=number_of_energy_bins, tsl=tsl, normalize=normalize, threads=threads, multiple_templates=multiple_templates)
data_manager.load_saved_data(1, 500) # <--- LOAD ME

[2023-01-26 16:45:18.805667] Loaded data from logs/train_models_new_data_manager_itime_1/data_cache. Loaded 1 templates.


In [11]:
assert data_manager.data["notemplate"].shape == (850, 500, 3)

In [12]:
train_x, train_y , val_x, val_y = data_manager.get_train_set("notemplate", sub_window_size=5, stride=5, validation_split=80)

[2023-01-26 16:45:18.820605] Extracting subsequences of 5 points with stride 5 from 850 time series
[2023-01-26 16:45:21.564163] Extracted 85000 subsequences
[2023-01-26 16:45:21.568192] Train set shape: (68000, 5, 3) - Validation set shape: (17000, 5, 3)
[2023-01-26 16:45:21.568218] Data will be scaled to 0-1


In [13]:
TRAINING_DATA_DROP=80

In [14]:
drop_train = int(train_x.shape[0] - train_x.shape[0]*TRAINING_DATA_DROP / 100)
drop_val = int(val_x.shape[0] - val_x.shape[0]*TRAINING_DATA_DROP / 100)

train_x = train_x[:drop_train, :, :]
train_y = train_y[:drop_train]
val_x = val_x[:drop_val, :, :]
val_y = val_y[:drop_val]

print(train_x.shape, train_y.shape)
print(val_x.shape, val_y.shape)

(13600, 5, 3) (13600,)
(3400, 5, 3) (3400,)


In [15]:
train_x.shape

(13600, 5, 3)

In [16]:
val_x.shape

(3400, 5, 3)

In [17]:
#import matplotlib
#matplotlib.use("TKAgg", force=True)
#%matplotlib inline
#plot_sequences(train_x[0:5], scaled=True, labels=["first sample of validation set"], features_names=features_names, showFig=True, saveFig=True)

In [18]:
model_names = AnomalyDetectorBuilder.getModelsName()

In [19]:
model_names = [model_name for model_name in model_names if "lstm" in model_name]
model_names = ["AnomalyDetector_cnn_l2_u32", "AnomalyDetector_rnn_l2_u32"]#, "AnomalyDetector_lstm_l2_u32"]

In [20]:
EARLY_STOPPING_PATIENCE=5
EPOCHS=10

In [21]:
callbacks = []

In [22]:
timesteps = train_x[0].shape[0]
nfeatures = train_x[0].shape[1]
batch_size = 32

outDirRoot = Path("./").parent.resolve().joinpath(f"run_{strftime('%Y%m%d-%H%M%S')}")

for model_name in model_names:
    
    print(f"\n\n\n********************* {model_name} training *********************\n\n")
    outDirBase = outDirRoot.joinpath(f"model_{model_name}_dataset_{dataset_id}_tsl_{tsl}")

    outDirBase.mkdir(parents=True, exist_ok=True)
    data_manager.store_scaler(integration_time, tsl, SCALER_TYPE, outDirBase)
    with open(outDirBase.joinpath('dataset_params.json'), 'w') as handle:
        json.dump({"id":dataset_id, "path":dataset_folder, "runid": "notemplate", "itime":5, "tsl":5, "normalized":True, "delay":0, "offset":0}, handle)

    # Callbacks 
    wandb_config = dict (
        entity="leobaro_",
        dataset_id = dataset_id,
        machine = "agilehost3",
        job_type="train",
        batch_size = batch_size,
        model = model_name
    )
    run = wandb.init(
    project=f"phd-prod5-29-december-2022-run-{strftime('%Y%m%d-%H%M%S')}", config=wandb_config)

    callbacks.append(WandbCallback())

    clc = CustomLogCallback(
            [1, 5, 10, 20],
            validation_data=(val_x, val_y), 
            out_dir_root=outDirBase, 
            wandb_run=None, 
            metadata={"dataset_id": dataset_id, "model": model_name, "training": "heavy"}
    )    
    callbacks.append(clc)


    ea = EarlyStopping(monitor="val_loss", patience=EARLY_STOPPING_PATIENCE, mode="min")
    callbacks.append(ea)

    anomalyDetector = AnomalyDetectorBuilder.getAnomalyDetector(model_name, timesteps, nfeatures)
    anomalyDetector.model.compile(optimizer='adam', loss=CustomMSE(nfeatures, output_dir=outDirBase)) # 
    anomalyDetector.model.summary()
    anomalyDetector.store_parameters(outDirBase)

    history = anomalyDetector.model.fit(train_x, train_x, verbose=0, epochs=EPOCHS, batch_size=batch_size, validation_data=(val_x, val_x), callbacks=callbacks)
    clc.on_epoch_end(None, force=True)
    loss_plot(history.history["loss"], history.history["val_loss"], model_name=model_name, title=f"Training loss", outputDir=outDirBase, figName="train_val_loss.png", showFig=False)




********************* AnomalyDetector_cnn_l2_u32 training *********************


Storing scaler to /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-164521/model_AnomalyDetector_cnn_l2_u32_dataset_train_itime_1_a_tsl_5_nbins_3_tsl_500/fitted_scaler_minmax_itime_1_tsl_500.pickle


[34m[1mwandb[0m: Currently logged in as: [33mleobaro_[0m. Use [1m`wandb login --relogin`[0m to force relogin




AnomalyDetector_cnn_l2_u32 - input shape: (5,3)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 3, 32)             320       
_________________________________________________________________
dropout (Dropout)            (None, 3, 32)             0         
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 2, 32)             0         
_________________________________________________________________
conv1d_transpose (Conv1DTran (None, 5, 3)              291       
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 3)              0         
Total params: 611
Trainable params: 611
Non-trainable params: 0
_________________________________________________________________
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/note

[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.1s




----------------- Checkpoint! Saving data at epoch 1 (Triggered by Early Stopping=False) -----------------
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-164521/model_AnomalyDetector_cnn_l2_u32_dataset_train_itime_1_a_tsl_5_nbins_3_tsl_500/epochs/epoch_1/trained_model/assets
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s




----------------- Checkpoint! Saving data at epoch 5 (Triggered by Early Stopping=False) -----------------
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-164521/model_AnomalyDetector_cnn_l2_u32_dataset_train_itime_1_a_tsl_5_nbins_3_tsl_500/epochs/epoch_5/trained_model/assets
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s




----------------- Checkpoint! Saving data at epoch 10 (Triggered by Early Stopping=False) -----------------
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-164521/model_AnomalyDetector_cnn_l2_u32_dataset_train_itime_1_a_tsl_5_nbins_3_tsl_500/epochs/epoch_10/trained_model/assets


----------------- Checkpoint! Saving data at epoch 11 (Triggered by Early Stopping=True) -----------------
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-164521/model_AnomalyDetector_cnn_l2_u32_dataset_train_itime_1_a_tsl_5_nbins_3_tsl_500/epochs/epoch_11/trained_model/assets
Plot /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-164521/model_AnomalyDetector_cnn_l2_u32_dataset_train_itime_1_a_tsl_5_nbins_3_tsl_500/AnomalyDetector_cnn_l2_u32_train_val_loss.png created.



********************* AnomalyDetector_rnn_l2_u32 training *********************


Storing scaler to /data01/homes/baroncelli/phd/rtapipe

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▃▂▂▁▁▁▂▁▁
val_loss,█▅▃▅▂▂▃▂▂▁

0,1
best_epoch,9.0
best_val_loss,0.00064
epoch,9.0
loss,0.00211
val_loss,0.00064


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016670500766485928, max=1.0…

AnomalyDetector_rnn_l2_u32 - input shape: (5,3)
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 32)                1152      
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 32)             0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5, 32)             2080      
_________________________________________________________________
dropout_3 (Dropout)          (None, 5, 32)             0         
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 3)              99        
Total params: 3,331
Trainable params: 3,331
Non-trainable params: 0
____

[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best)... Done. 0.0s




----------------- Checkpoint! Saving data at epoch 1 (Triggered by Early Stopping=False) -----------------
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-164521/model_AnomalyDetector_rnn_l2_u32_dataset_train_itime_1_a_tsl_5_nbins_3_tsl_500/epochs/epoch_1/trained_model/assets
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best)... Done. 0.0s




----------------- Checkpoint! Saving data at epoch 5 (Triggered by Early Stopping=False) -----------------
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-164521/model_AnomalyDetector_rnn_l2_u32_dataset_train_itime_1_a_tsl_5_nbins_3_tsl_500/epochs/epoch_5/trained_model/assets
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164523-2vu469x1/files/model-best)... Done. 0.0s


INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/data01/homes/baroncelli/phd/rtapipe/notebooks/wandb/run-20230126_164559-s3upibng/files/model-best)... Done. 0.0s




----------------- Checkpoint! Saving data at epoch 20 (Triggered by Early Stopping=False) -----------------
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-164521/model_AnomalyDetector_cnn_l2_u32_dataset_train_itime_1_a_tsl_5_nbins_3_tsl_500/epochs/epoch_20/trained_model/assets


----------------- Checkpoint! Saving data at epoch 10 (Triggered by Early Stopping=False) -----------------
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-164521/model_AnomalyDetector_rnn_l2_u32_dataset_train_itime_1_a_tsl_5_nbins_3_tsl_500/epochs/epoch_10/trained_model/assets


----------------- Checkpoint! Saving data at epoch 11 (Triggered by Early Stopping=True) -----------------
INFO:tensorflow:Assets written to: /data01/homes/baroncelli/phd/rtapipe/notebooks/run_20230126-164521/model_AnomalyDetector_rnn_l2_u32_dataset_train_itime_1_a_tsl_5_nbins_3_tsl_500/epochs/epoch_11/trained_model/assets
Plot /data01/home