# Run model module locally

## Train reconstruction variables

### Dense Autoencoder

In [1]:
%%bash
rm -rf trained_model/dense
export PYTHONPATH=$PYTHONPATH:$PWD/anomaly_detection_module
python3 -m trainer.task \
  --train_file_pattern="data/train_norm_seq.csv" \
  --eval_file_pattern="data/val_norm_1_seq.csv" \
  --output_dir=$PWD/trained_model/dense \
  --job-dir=./tmp \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2000 \
  --learning_rate=0.1 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units="64 32 16" \
  --latent_vector_size=8 \
  --dec_dnn_hidden_units="16 32 64" \
  --time_loss_weight=1.0 \
  --feat_loss_weight=1.0 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.


anomaly_detection: features = 
{'tag_0': <tf.Tensor 'IteratorGetNext:0' shape=(?, 30) dtype=float64>, 'tag_4': <tf.Tensor 'IteratorGetNext:4' shape=(?, 30) dtype=float64>, 'tag_1': <tf.Tensor 'IteratorGetNext:1' shape=(?, 30) dtype=float64>, 'tag_3': <tf.Tensor 'IteratorGetNext:3' shape=(?, 30) dtype=float64>, 'tag_2': <tf.Tensor 'IteratorGetNext:2' shape=(?, 30) dtype=float64>}
anomaly_detection: labels = 
None
anomaly_detection: mode = 
train
anomaly_detection: params = 
{'model_type': 'dense_autoencoder', 'feat_anom_thresh': None, 'eps': 1e-12, 'dnn_hidden_units': [1024, 256, 64], 'learning_rate': 0.1, 'reverse_labels_sequence': True, 'train_batch_size': 32, 'k_principal_components': 3, 'eval_file_pattern': 'data/val_norm_1_seq.csv', 'min_feat_anom_t

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_is_chief': True, '_task_type': 'worker', '_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_log_step_count_steps': 100, '_tf_random_seed': None, '_model_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/dense/', '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_train_distribute': None, '_task_id': 0, '_device_fn': None, '_save_checkpoints_steps': None, '_eval_distribute': None, '_num_worker_replicas': 1, '_protocol': None, '_experimental_distribute': None, '_evaluation_master': '', '_master': '', '_global_id_in_cluster': 0, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f00b85fdc88>}
INFO:tensorflow:Not using D

### LSTM Autoencoder

In [2]:
%%bash
rm -rf trained_model/lstm
export PYTHONPATH=$PYTHONPATH:$PWD/anomaly_detection_module
python3 -m trainer.task \
  --train_file_pattern="data/train_norm_seq.csv" \
  --eval_file_pattern="data/val_norm_1_seq.csv" \
  --output_dir=$PWD/trained_model/lstm \
  --job-dir=./tmp \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2000 \
  --learning_rate=0.1 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=True \
  --enc_lstm_hidden_units="64 32 16" \
  --dec_lstm_hidden_units="16 32 64" \
  --lstm_dropout_output_keep_probs="0.9 0.95 1.0" \
  --dnn_hidden_units="1024 256 64" \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.


anomaly_detection: features = 
{'tag_3': <tf.Tensor 'IteratorGetNext:3' shape=(?, 30) dtype=float64>, 'tag_0': <tf.Tensor 'IteratorGetNext:0' shape=(?, 30) dtype=float64>, 'tag_2': <tf.Tensor 'IteratorGetNext:2' shape=(?, 30) dtype=float64>, 'tag_4': <tf.Tensor 'IteratorGetNext:4' shape=(?, 30) dtype=float64>, 'tag_1': <tf.Tensor 'IteratorGetNext:1' shape=(?, 30) dtype=float64>}
anomaly_detection: labels = 
None
anomaly_detection: mode = 
train
anomaly_detection: params = 
{'seq_len': 30, 'num_time_anom_thresh': 300, 'latent_vector_size': 8, 'start_delay_secs': 60, 'eps': 1e-12, 'output_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/lstm/', 'training_mode': 'reconstructi

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd8087b3c50>, '_save_checkpoints_steps': None, '_num_ps_replicas': 0, '_save_checkpoints_secs': 600, '_keep_checkpoint_every_n_hours': 10000, '_num_worker_replicas': 1, '_protocol': None, '_device_fn': None, '_model_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/lstm/', '_master': '', '_task_type': 'worker', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_global_id_in_cluster': 0, '_eval_distribute': None, '_evaluation_master': '', '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_task_id': 0, '_save_summary_steps': 100, '_log_step_count_steps': 100, '_train_distribute': None, '_is_chief': True, '_service': None, '_experimental_distribute': None}
INFO:tensorflow:Not using Di

### PCA Autoencoder

In [3]:
%%bash
rm -rf trained_model/pca
export PYTHONPATH=$PYTHONPATH:$PWD/anomaly_detection_module
python3 -m trainer.task \
  --train_file_pattern="data/train_norm_seq.csv" \
  --eval_file_pattern="data/val_norm_1_seq.csv" \
  --output_dir=$PWD/trained_model/pca \
  --job-dir=./tmp \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2000 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --model_type="pca" \
  --k_principal_components=3 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300


anomaly_detection: features = 
{'tag_2': <tf.Tensor 'IteratorGetNext:2' shape=(?, 30) dtype=float64>, 'tag_3': <tf.Tensor 'IteratorGetNext:3' shape=(?, 30) dtype=float64>, 'tag_4': <tf.Tensor 'IteratorGetNext:4' shape=(?, 30) dtype=float64>, 'tag_0': <tf.Tensor 'IteratorGetNext:0' shape=(?, 30) dtype=float64>, 'tag_1': <tf.Tensor 'IteratorGetNext:1' shape=(?, 30) dtype=float64>}
anomaly_detection: labels = 
None
anomaly_detection: mode = 
train
anomaly_detection: params = 
{'enc_lstm_hidden_units': [64, 32, 16], 'max_time_anom_thresh': 2000.0, 'labeled_tune_thresh': True, 'eps': 1e-12, 'feat_anom_thresh': None, 'training_mode': 'reconstruction', 'dnn_hidden_units': [1024, 256, 64], 'eval_file_pattern': 'data/val_norm_1_seq.csv', 'output_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/pca/', 'latent_vector_size': 8, 'dec_dnn_hidden_units': [64, 256, 1024], 'num_time_anom_thresh': 300, 'train_batch_size':

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_every_n_hours': 10000, '_task_type': 'worker', '_save_checkpoints_steps': None, '_task_id': 0, '_num_worker_replicas': 1, '_train_distribute': None, '_eval_distribute': None, '_protocol': None, '_save_summary_steps': 100, '_model_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/pca/', '_log_step_count_steps': 100, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3b924fac50>, '_tf_random_seed': None, '_service': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_master': '', '_global_id_in_cluster': 0, '_is_chief': True, '_device_fn': None, '_experimental_distribute': None, '_keep_checkpoint_max': 5, '_num_ps_replicas': 0, '_evaluation_master': '', '_save_checkpoints_secs': 600}
INFO:tensorflow:Not using Dis

## Train error distribution statistics variables

### Dense Autoencoder

In [4]:
%%bash
export PYTHONPATH=$PYTHONPATH:$PWD/anomaly_detection_module
python3 -m trainer.task \
  --train_file_pattern="data/val_norm_1_seq.csv" \
  --eval_file_pattern="data/val_norm_1_seq.csv" \
  --output_dir=$PWD/trained_model/dense \
  --job-dir=./tmp \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2200 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units="64 32 16" \
  --latent_vector_size=8 \
  --dec_dnn_hidden_units="16 32 64" \
  --time_loss_weight=1.0 \
  --feat_loss_weight=1.0 \
  --training_mode="calculate_error_distribution_statistics" \
  --labeled_tune_thresh=True \
  --eps="1e-12" \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300


anomaly_detection: features = 
{'tag_2': <tf.Tensor 'IteratorGetNext:2' shape=(?, 30) dtype=float64>, 'tag_0': <tf.Tensor 'IteratorGetNext:0' shape=(?, 30) dtype=float64>, 'tag_1': <tf.Tensor 'IteratorGetNext:1' shape=(?, 30) dtype=float64>, 'tag_4': <tf.Tensor 'IteratorGetNext:4' shape=(?, 30) dtype=float64>, 'tag_3': <tf.Tensor 'IteratorGetNext:3' shape=(?, 30) dtype=float64>}
anomaly_detection: labels = 
None
anomaly_detection: mode = 
train
anomaly_detection: params = 
{'f_score_beta': 0.05, 'k_principal_components': 3, 'training_mode': 'calculate_error_distribution_statistics', 'train_batch_size': 32, 'enc_lstm_hidden_units': [64, 32, 16], 'feat_anom_thresh': None, 'train_steps': 2200, 'lstm_dropout_output_keep_probs': [1.0, 1.0, 1.0], 'model_type': 'dense_autoencoder', 'eval_file_pattern': 'data/val_norm_1_seq.csv', 'labeled_tune_thresh': True, 'min_time_anom_thresh': 100.0, 'feat_loss_weight': 1.0, 'seq_len': 30, 'dec_lstm_hidden_units': [16, 32, 64], 'time_thresh_scl': 2.0, 'd

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_global_id_in_cluster': 0, '_num_worker_replicas': 1, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/dense/', '_protocol': None, '_task_type': 'worker', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7efbe1a73a58>, '_save_checkpoints_secs': 600, '_experimental_distribute': None, '_num_ps_replicas': 0, '_evaluation_master': '', '_device_fn': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_log_step_count_steps': 100, '_save_summary_steps': 100, '_train_distribute': None, '_service': None, '_task_id': 0, '_is_chief': True, '_save_checkpoints_steps': None, '_tf_random_seed': None, '_master': '', '_eval_distribute': None}
INFO:tensorflow:Not using D

### LSTM Autoencoder

In [23]:
%%bash
export PYTHONPATH=$PYTHONPATH:$PWD/anomaly_detection_module
python3 -m trainer.task \
  --train_file_pattern="data/val_norm_1_seq.csv" \
  --eval_file_pattern="data/val_norm_1_seq.csv" \
  --output_dir=$PWD/trained_model/lstm \
  --job-dir=./tmp \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2200 \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=True \
  --enc_lstm_hidden_units="64 32 16" \
  --dec_lstm_hidden_units="16 32 64" \
  --lstm_dropout_output_keep_probs="0.9 0.95 1.0" \
  --dnn_hidden_units="1024 256 64" \
  --training_mode="calculate_error_distribution_statistics" \
  --labeled_tune_thresh=True \
  --eps="1e-12" \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300


anomaly_detection: features = 
{'tag_2': <tf.Tensor 'IteratorGetNext:2' shape=(?, 30) dtype=float64>, 'tag_1': <tf.Tensor 'IteratorGetNext:1' shape=(?, 30) dtype=float64>, 'tag_3': <tf.Tensor 'IteratorGetNext:3' shape=(?, 30) dtype=float64>, 'tag_0': <tf.Tensor 'IteratorGetNext:0' shape=(?, 30) dtype=float64>, 'tag_4': <tf.Tensor 'IteratorGetNext:4' shape=(?, 30) dtype=float64>}
anomaly_detection: labels = 
None
anomaly_detection: mode = 
train
anomaly_detection: params = 
{'dec_dnn_hidden_units': [64, 256, 1024], 'eval_batch_size': 32, 'start_delay_secs': 60, 'learning_rate': 0.1, 'model_type': 'lstm_enc_dec_autoencoder', 'max_time_anom_thresh': 2000.0, 'time_loss_weight': 1.0, 'throttle_secs': 120, 'train_batch_size': 32, 'min_feat_anom_thresh': 100.0, 'output_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/lstm/', 'min_time_anom_thresh': 100.0, 'max_feat_anom_thresh': 2000.0, 'training_mode': 'calcul

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_steps': None, '_task_type': 'worker', '_device_fn': None, '_experimental_distribute': None, '_train_distribute': None, '_is_chief': True, '_evaluation_master': '', '_master': '', '_num_ps_replicas': 0, '_service': None, '_model_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/lstm/', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_log_step_count_steps': 100, '_save_summary_steps': 100, '_protocol': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f65502e9a58>, '_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_secs': 600, '_num_worker_replicas': 1, '_global_id_in_cluster': 0, '_task_id': 0, '_tf_random_seed': None, '_eval_distribute': None}
INFO:tensorflow:Not using Di

### PCA Autoencoder

In [6]:
%%bash
export PYTHONPATH=$PYTHONPATH:$PWD/anomaly_detection_module
python3 -m trainer.task \
  --train_file_pattern="data/val_norm_1_seq.csv" \
  --eval_file_pattern="data/val_norm_1_seq.csv" \
  --output_dir=$PWD/trained_model/pca \
  --job-dir=./tmp \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2200 \
  --model_type="pca" \
  --k_principal_components=3 \
  --training_mode="calculate_error_distribution_statistics" \
  --labeled_tune_thresh=True \
  --eps="1e-12" \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300


anomaly_detection: features = 
{'tag_3': <tf.Tensor 'IteratorGetNext:3' shape=(?, 30) dtype=float64>, 'tag_0': <tf.Tensor 'IteratorGetNext:0' shape=(?, 30) dtype=float64>, 'tag_4': <tf.Tensor 'IteratorGetNext:4' shape=(?, 30) dtype=float64>, 'tag_1': <tf.Tensor 'IteratorGetNext:1' shape=(?, 30) dtype=float64>, 'tag_2': <tf.Tensor 'IteratorGetNext:2' shape=(?, 30) dtype=float64>}
anomaly_detection: labels = 
None
anomaly_detection: mode = 
train
anomaly_detection: params = 
{'lstm_dropout_output_keep_probs': [1.0, 1.0, 1.0], 'feat_anom_thresh': None, 'enc_dnn_hidden_units': [1024, 256, 64], 'feat_thresh_scl': 2.0, 'num_time_anom_thresh': 300, 'train_file_pattern': 'data/val_norm_1_seq.csv', 'labeled_tune_thresh': True, 'dnn_hidden_units': [1024, 256, 64], 'num_feat_anom_thresh': 300, 'dec_dnn_hidden_units': [64, 256, 1024], 'train_steps': 2200, 'enc_lstm_hidden_units': [64, 32, 16], 'reverse_labels_sequence': True, 'dec_lstm_hidden_units': [16, 32, 64], 'min_feat_anom_thresh': 100.0, '

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_global_id_in_cluster': 0, '_log_step_count_steps': 100, '_eval_distribute': None, '_model_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/pca/', '_protocol': None, '_save_summary_steps': 100, '_evaluation_master': '', '_service': None, '_is_chief': True, '_num_ps_replicas': 0, '_device_fn': None, '_master': '', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fcde1d4ca58>, '_keep_checkpoint_every_n_hours': 10000, '_num_worker_replicas': 1, '_tf_random_seed': None, '_task_id': 0, '_keep_checkpoint_max': 5, '_train_distribute': None, '_experimental_distribute': None, '_save_checkpoints_steps': None, '_task_type': 'worker', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_save_checkpoints_secs': 600}
INFO:tensorflow:Not using Dis

## Tune anomaly thresholds

### Dense Autoencoder

In [7]:
%%bash
export PYTHONPATH=$PYTHONPATH:$PWD/anomaly_detection_module
python3 -m trainer.task \
  --train_file_pattern="data/labeled_val_mixed_seq.csv" \
  --eval_file_pattern="data/labeled_val_mixed_seq.csv" \
  --output_dir=$PWD/trained_model/dense \
  --job-dir=./tmp \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2400 \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units="64 32 16" \
  --latent_vector_size=8 \
  --dec_dnn_hidden_units="16 32 64" \
  --time_loss_weight=1.0 \
  --feat_loss_weight=1.0 \
  --training_mode="tune_anomaly_thresholds" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300 \
  --min_time_anom_thresh=1.0 \
  --max_time_anom_thresh=20.0 \
  --min_feat_anom_thresh=20.0 \
  --max_feat_anom_thresh=80.0 \
  --f_score_beta=0.05


anomaly_detection: features = 
{'tag_4': <tf.Tensor 'IteratorGetNext:4' shape=(?, 30) dtype=float64>, 'tag_3': <tf.Tensor 'IteratorGetNext:3' shape=(?, 30) dtype=float64>, 'tag_0': <tf.Tensor 'IteratorGetNext:0' shape=(?, 30) dtype=float64>, 'tag_1': <tf.Tensor 'IteratorGetNext:1' shape=(?, 30) dtype=float64>, 'tag_2': <tf.Tensor 'IteratorGetNext:2' shape=(?, 30) dtype=float64>}
anomaly_detection: labels = 
Tensor("IteratorGetNext:5", shape=(?,), dtype=float64, device=/device:CPU:0)
anomaly_detection: mode = 
train
anomaly_detection: params = 
{'enc_dnn_hidden_units': [64, 32, 16], 'dec_lstm_hidden_units': [16, 32, 64], 'training_mode': 'tune_anomaly_thresholds', 'min_feat_anom_thresh': 20.0, 'model_type': 'dense_autoencoder', 'learning_rate': 0.1, 'dnn_hidden_units': [1024, 256, 64], 'enc_lstm_hidden_units': [64, 32, 16], 'start_delay_secs': 60, 'max_feat_anom_thresh': 80.0, 'train_steps': 2400, 'max_time_anom_thresh': 20.0, 'reverse_labels_sequence': True, 'min_time_anom_thresh': 1.

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_master': '', '_model_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/dense/', '_log_step_count_steps': 100, '_service': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff3ddb11cf8>, '_save_checkpoints_steps': None, '_tf_random_seed': None, '_save_checkpoints_secs': 600, '_task_type': 'worker', '_protocol': None, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_keep_checkpoint_max': 5, '_train_distribute': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_device_fn': None, '_evaluation_master': '', '_save_summary_steps': 100, '_experimental_distribute': None, '_eval_distribute': None, '_is_chief': True, '_global_id_in_cluster': 0}
INFO:tensorflow:Not using D

### LSTM Autoencoder

In [24]:
%%bash
export PYTHONPATH=$PYTHONPATH:$PWD/anomaly_detection_module
python3 -m trainer.task \
  --train_file_pattern="data/labeled_val_mixed_seq.csv" \
  --eval_file_pattern="data/labeled_val_mixed_seq.csv" \
  --output_dir=$PWD/trained_model/lstm \
  --job-dir=./tmp \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2400 \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=True \
  --enc_lstm_hidden_units="64 32 16" \
  --dec_lstm_hidden_units="16 32 64" \
  --lstm_dropout_output_keep_probs="0.9 0.95 1.0" \
  --dnn_hidden_units="1024 256 64" \
  --training_mode="tune_anomaly_thresholds" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300 \
  --min_time_anom_thresh=1.0 \
  --max_time_anom_thresh=20.0 \
  --min_feat_anom_thresh=20.0 \
  --max_feat_anom_thresh=80.0 \
  --f_score_beta=0.05


anomaly_detection: features = 
{'tag_3': <tf.Tensor 'IteratorGetNext:3' shape=(?, 30) dtype=float64>, 'tag_2': <tf.Tensor 'IteratorGetNext:2' shape=(?, 30) dtype=float64>, 'tag_1': <tf.Tensor 'IteratorGetNext:1' shape=(?, 30) dtype=float64>, 'tag_4': <tf.Tensor 'IteratorGetNext:4' shape=(?, 30) dtype=float64>, 'tag_0': <tf.Tensor 'IteratorGetNext:0' shape=(?, 30) dtype=float64>}
anomaly_detection: labels = 
Tensor("IteratorGetNext:5", shape=(?,), dtype=float64, device=/device:CPU:0)
anomaly_detection: mode = 
train
anomaly_detection: params = 
{'k_principal_components': 3, 'train_batch_size': 32, 'f_score_beta': 0.05, 'throttle_secs': 120, 'feat_thresh_scl': 2.0, 'train_file_pattern': 'data/labeled_val_mixed_seq.csv', 'num_time_anom_thresh': 300, 'dnn_hidden_units': [1024, 256, 64], 'max_feat_anom_thresh': 80.0, 'num_feat_anom_thresh': 300, 'training_mode': 'tune_anomaly_thresholds', 'start_delay_secs': 60, 'time_loss_weight': 1.0, 'enc_lstm_hidden_units': [64, 32, 16], 'train_steps':

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_train_distribute': None, '_device_fn': None, '_num_ps_replicas': 0, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_task_type': 'worker', '_keep_checkpoint_max': 5, '_num_worker_replicas': 1, '_task_id': 0, '_evaluation_master': '', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fdf80b6ccc0>, '_eval_distribute': None, '_tf_random_seed': None, '_experimental_distribute': None, '_save_summary_steps': 100, '_service': None, '_protocol': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_global_id_in_cluster': 0, '_save_checkpoints_steps': None, '_model_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/lstm/', '_is_chief': True, '_master': '', '_keep_checkpoint_every_n_hours': 10000}
INFO:tensorflow:Not using Di

### PCA Autoencoder

In [9]:
%%bash
export PYTHONPATH=$PYTHONPATH:$PWD/anomaly_detection_module
python3 -m trainer.task \
  --train_file_pattern="data/labeled_val_mixed_seq.csv" \
  --eval_file_pattern="data/labeled_val_mixed_seq.csv" \
  --output_dir=$PWD/trained_model/pca \
  --job-dir=./tmp \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2400 \
  --model_type="pca" \
  --k_principal_components=3 \
  --training_mode="tune_anomaly_thresholds" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300 \
  --min_time_anom_thresh=1.0 \
  --max_time_anom_thresh=20.0 \
  --min_feat_anom_thresh=20.0 \
  --max_feat_anom_thresh=80.0 \
  --f_score_beta=0.05


anomaly_detection: features = 
{'tag_1': <tf.Tensor 'IteratorGetNext:1' shape=(?, 30) dtype=float64>, 'tag_3': <tf.Tensor 'IteratorGetNext:3' shape=(?, 30) dtype=float64>, 'tag_4': <tf.Tensor 'IteratorGetNext:4' shape=(?, 30) dtype=float64>, 'tag_2': <tf.Tensor 'IteratorGetNext:2' shape=(?, 30) dtype=float64>, 'tag_0': <tf.Tensor 'IteratorGetNext:0' shape=(?, 30) dtype=float64>}
anomaly_detection: labels = 
Tensor("IteratorGetNext:5", shape=(?,), dtype=float64, device=/device:CPU:0)
anomaly_detection: mode = 
train
anomaly_detection: params = 
{'dec_lstm_hidden_units': [16, 32, 64], 'training_mode': 'tune_anomaly_thresholds', 'output_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/pca/', 'throttle_secs': 120, 'seq_len': 30, 'train_batch_size': 32, 'train_file_pattern': 'data/labeled_val_mixed_seq.csv', 'latent_vector_size': 8, 'eps': 1e-12, 'max_time_anom_thresh': 20.0, 'learning_rate': 0.1, 'start_dela

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_eval_distribute': None, '_num_ps_replicas': 0, '_train_distribute': None, '_experimental_distribute': None, '_global_id_in_cluster': 0, '_log_step_count_steps': 100, '_keep_checkpoint_max': 5, '_model_dir': '/home/jupyter/artificial_intelligence/machine_learning/anomaly_detection/tf_anomaly_detection_model_selection/trained_model/pca/', '_service': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_evaluation_master': '', '_save_summary_steps': 100, '_task_id': 0, '_master': '', '_tf_random_seed': None, '_protocol': None, '_save_checkpoints_secs': 600, '_task_type': 'worker', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff8a2336cc0>, '_save_checkpoints_steps': None, '_device_fn': None, '_num_worker_replicas': 1, '_is_chief': True}
INFO:tensorflow:Not using Dis

## Prediction

In [10]:
UNLABELED_CSV_COLUMNS = ["tag_{0}".format(tag) for tag in range(0, 5)]

In [11]:
import numpy as np
labeled_test_mixed_sequences_array = np.loadtxt(
    fname="data/labeled_test_mixed_seq.csv", dtype=str, delimiter=",")
print("labeled_test_mixed_sequences_array.shape = {}".format(
    labeled_test_mixed_sequences_array.shape))

labeled_test_mixed_sequences_array.shape = (12800, 6)


In [12]:
number_of_prediction_instances = 10
print("labels = {}".format(
  labeled_test_mixed_sequences_array[0:number_of_prediction_instances, -1]))

labels = ['1' '1' '1' '0' '0' '0' '1' '1' '1' '1']


### Local prediction from local model

In [13]:
with open('test_sequences.json', 'w') as outfile:
  test_data_normal_string_list = labeled_test_mixed_sequences_array.tolist()[0:number_of_prediction_instances]
  json_string = ""
  for example in test_data_normal_string_list:
    json_string += "{" + ','.join(["{0}: \"{1}\"".format('\"' + UNLABELED_CSV_COLUMNS[i] + '\"', example[i]) 
                                   for i in range(len(UNLABELED_CSV_COLUMNS))]) + "}\n"
  json_string = json_string.replace(' ', '').replace(':', ': ').replace(',', ', ')
  print(json_string)
  outfile.write("%s" % json_string)

{"tag_0": "0.69531315;1.13063381;1.46212831;0.72515986;-0.72285522;-0.48608379;0.66008214;1.39027465;0.84784508;0.15064029;-0.86813038;-0.19376341;1.33326618;1.22342092;0.73982218;-0.2687485;0.09466024;0.62380527;1.9415848;1.20261854;0.2626776;10.47418518;-0.73694674;8.68714408;-17.87462629;-8.19666606;-6.97080639;16.66439155;9.85067447;12.07285213", "tag_1": "0.46246721;1.98239347;-0.1224569;-0.38657805;1.81182516;1.03408165;-1.48166642;1.27670001;1.91766873;-0.58962765;-0.08759287;1.97567123;-0.44497447;-1.07307304;1.57404985;1.01763811;-0.83888524;1.25820476;2.03154979;-1.02087098;-0.16190532;-22.80037943;4.01040199;-10.56761472;21.24277391;-28.76669081;-9.91877985;8.1263494;17.63656283;6.75155559", "tag_2": "0.83586452;1.42071377;0.27984876;0.14046254;1.84246963;0.843375;-1.07942606;0.4688217;1.26009013;-0.37743756;0.0258541;1.16555777;0.84946758;-0.72737033;0.74098267;1.80826962;0.06362211;-0.43868715;1.2166949;0.28497154;-0.26348582;-13.79850614;11.03782568;-6.36478403;-13.461771

### Dense Autoencoder

In [14]:
%%bash
model_dir=$(ls ${PWD}/trained_model/dense/export/exporter | tail -1)
gcloud ml-engine local predict \
  --model-dir=${PWD}/trained_model/dense/export/exporter/${model_dir} \
  --json-instances=./test_sequences.json

X_FEAT_ABS_RECON_ERR                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    

2019-07-04 03:06:02.806050: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2000189999 Hz
2019-07-04 03:06:02.818459: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x5589f89e0910 executing computations on platform Host. Devices:
2019-07-04 03:06:02.818494: I tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device (0): <undefined>, <undefined>
2019-07-04 03:06:02.830056: I tensorflow/core/common_runtime/process_util.cc:71] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.

### LSTM Autoencoder

In [25]:
%%bash
model_dir=$(ls ${PWD}/trained_model/lstm/export/exporter | tail -1)
gcloud ml-engine local predict \
  --model-dir=${PWD}/trained_model/lstm/export/exporter/${model_dir} \
  --json-instances=./test_sequences.json

X_FEAT_ABS_RECON_ERR                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    

2019-07-04 03:24:20.374915: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2000189999 Hz
2019-07-04 03:24:20.388719: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x56250cbcd700 executing computations on platform Host. Devices:
2019-07-04 03:24:20.388768: I tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device (0): <undefined>, <undefined>
2019-07-04 03:24:20.400799: I tensorflow/core/common_runtime/process_util.cc:71] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.

### PCA Autoencoder

In [16]:
%%bash
model_dir=$(ls ${PWD}/trained_model/pca/export/exporter | tail -1)
gcloud ml-engine local predict \
  --model-dir=${PWD}/trained_model/pca/export/exporter/${model_dir} \
  --json-instances=./test_sequences.json

X_FEAT_ABS_RECON_ERR                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    

2019-07-04 03:09:47.625244: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2000189999 Hz
2019-07-04 03:09:47.636998: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x55f264ecd940 executing computations on platform Host. Devices:
2019-07-04 03:09:47.637053: I tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device (0): <undefined>, <undefined>
2019-07-04 03:09:47.649416: I tensorflow/core/common_runtime/process_util.cc:71] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.