# Run model module on GCP with labeled threshold tuning

In [None]:
import os
PROJECT = "PROJECT" # REPLACE WITH YOUR PROJECT ID
BUCKET = "BUCKET" # REPLACE WITH A BUCKET NAME
REGION = "us-central1" # REPLACE WITH YOUR REGION e.g. us-central1

# Import os environment variables
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] =  BUCKET
os.environ["REGION"] = REGION
os.environ["TFVERSION"] = "1.13"

Copy data over to bucket

In [None]:
%%bash
gcloud storage cp --recursive data/* gs://${BUCKET}/anomaly_detection/data

In [None]:
# Import os environment variables for global sequence shape hyperparameters
os.environ["SEQ_LEN"] = str(30)
os.environ["NUM_FEAT"] = str(5)

# Import os environment variables for global feature hyperparameters
os.environ["FEAT_NAMES"] = (",").join(["tag_{}".format(i) for i in range(int(os.environ["NUM_FEAT"]))])
os.environ["FEAT_DEFAULTS"] = (",").join([(";").join(["0.0"] * int(os.environ["SEQ_LEN"]))] * int(os.environ["NUM_FEAT"]))

# Import os environment variables for global training hyperparameters
os.environ["START_DELAY_SECS"] = str(60)
os.environ["THROTTLE_SECS"] = str(120)

# Import os environment variables for global threshold hyperparameters
os.environ["LABELED_TUNE_THRESH"] = "True"
os.environ["NUM_TIME_ANOM_THRESH"] = str(300)
os.environ["NUM_FEAT_ANOM_THRESH"] = str(300)

# Import global dense hyperparameters
os.environ["ENC_DNN_HIDDEN_UNITS"] = "64,32,16"
os.environ["LATENT_VECTOR_SIZE"] = str(8)
os.environ["DEC_DNN_HIDDEN_UNITS"] = "16,32,64"
os.environ["TIME_LOSS_WEIGHT"] = str(1.0)
os.environ["FEAT_LOSS_WEIGHT"] = str(1.0)

# Import global lstm hyperparameters
os.environ["REVERSE_LABELS_SEQUENCE"] = "True"
os.environ["ENC_LSTM_HIDDEN_UNITS"] = "64,32,16"
os.environ["DEC_LSTM_HIDDEN_UNITS"] = "16,32,64"
os.environ["LSTM_DROPOUT_OUTPUT_KEEP_PROBS"] = "0.9,0.95,1.0"
os.environ["DNN_HIDDEN_UNITS"] = "1024,256,64"

## Train reconstruction variables

In [None]:
# Import os environment variables for reconstruction training hyperparameters
os.environ["TRAIN_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/train_norm_seq.csv".format(BUCKET)
os.environ["EVAL_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/val_norm_1_seq.csv".format(BUCKET)
os.environ["PREVIOUS_TRAIN_STEPS"] = str(0)
os.environ["RECONSTRUCTION_EPOCHS"] = str(1.0)
os.environ["TRAIN_EXAMPLES"] = str(64000)
os.environ["LEARNING_RATE"] = str(0.1)
os.environ["TRAINING_MODE"] = "reconstruction"

### Dense Autoencoder

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/trained_model/dense_labeled
JOBNAME=job_anomaly_detection_reconstruction_dense_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud storage rm --recursive --continue-on-error ${OUTDIR}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=${TRAIN_FILE_PATTERN} \
  --eval_file_pattern=${EVAL_FILE_PATTERN} \
  --output_dir=${OUTDIR} \
  --job-dir=./tmp \
  --seq_len=${SEQ_LEN} \
  --num_feat=${NUM_FEAT} \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=${PREVIOUS_TRAIN_STEPS} \
  --reconstruction_epochs=${RECONSTRUCTION_EPOCHS} \
  --train_examples=${TRAIN_EXAMPLES} \
  --learning_rate=${LEARNING_RATE} \
  --start_delay_secs=${START_DELAY_SECS} \
  --throttle_secs=${THROTTLE_SECS} \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units=${ENC_DNN_HIDDEN_UNITS} \
  --latent_vector_size=${LATENT_VECTOR_SIZE} \
  --dec_dnn_hidden_units=${DEC_DNN_HIDDEN_UNITS} \
  --time_loss_weight=${TIME_LOSS_WEIGHT} \
  --feat_loss_weight=${FEAT_LOSS_WEIGHT} \
  --training_mode=${TRAINING_MODE} \
  --labeled_tune_thresh=${LABELED_TUNE_THRESH} \
  --num_time_anom_thresh=${NUM_TIME_ANOM_THRESH} \
  --num_feat_anom_thresh=${NUM_FEAT_ANOM_THRESH}

### LSTM Autoencoder

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/trained_model/lstm_labeled
JOBNAME=job_anomaly_detection_reconstruction_lstm_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud storage rm --recursive --continue-on-error ${OUTDIR}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=${TRAIN_FILE_PATTERN} \
  --eval_file_pattern=${EVAL_FILE_PATTERN} \
  --output_dir=${OUTDIR} \
  --job-dir=./tmp \
  --seq_len=${SEQ_LEN} \
  --num_feat=${NUM_FEAT} \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=${PREVIOUS_TRAIN_STEPS} \
  --reconstruction_epochs=${RECONSTRUCTION_EPOCHS} \
  --train_examples=${TRAIN_EXAMPLES} \
  --learning_rate=${LEARNING_RATE} \
  --start_delay_secs=${START_DELAY_SECS} \
  --throttle_secs=${THROTTLE_SECS} \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=${REVERSE_LABELS_SEQUENCE} \
  --enc_lstm_hidden_units=${ENC_LSTM_HIDDEN_UNITS} \
  --dec_lstm_hidden_units=${DEC_LSTM_HIDDEN_UNITS} \
  --lstm_dropout_output_keep_probs=${LSTM_DROPOUT_OUTPUT_KEEP_PROBS} \
  --dnn_hidden_units=${DNN_HIDDEN_UNITS} \
  --training_mode=${TRAINING_MODE} \
  --labeled_tune_thresh=${LABELED_TUNE_THRESH} \
  --num_time_anom_thresh=${NUM_TIME_ANOM_THRESH} \
  --num_feat_anom_thresh=${NUM_FEAT_ANOM_THRESH}

### PCA Autoencoder

Reconstruction

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/trained_model/pca_labeled
JOBNAME=job_anomaly_detection_reconstruction_pca_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud storage rm --recursive --continue-on-error ${OUTDIR}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=${TRAIN_FILE_PATTERN} \
  --eval_file_pattern=${EVAL_FILE_PATTERN} \
  --output_dir=${OUTDIR} \
  --job-dir=./tmp \
  --seq_len=${SEQ_LEN} \
  --num_feat=${NUM_FEAT} \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=${PREVIOUS_TRAIN_STEPS} \
  --reconstruction_epochs=1.0 \
  --train_examples=${TRAIN_EXAMPLES} \
  --eval_examples=6400 \
  --start_delay_secs=${START_DELAY_SECS} \
  --throttle_secs=${THROTTLE_SECS} \
  --model_type="pca" \
  --training_mode=${TRAINING_MODE} \
  --autotune_principal_components="False" \
  --labeled_tune_thresh=${LABELED_TUNE_THRESH} \
  --num_time_anom_thresh=${NUM_TIME_ANOM_THRESH} \
  --num_feat_anom_thresh=${NUM_FEAT_ANOM_THRESH}

Autotune principal components

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/trained_model/pca_labeled
JOBNAME=job_anomaly_detection_reconstruction_pca_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=${EVAL_FILE_PATTERN} \
  --eval_file_pattern=${EVAL_FILE_PATTERN} \
  --output_dir=${OUTDIR} \
  --job-dir=./tmp \
  --seq_len=${SEQ_LEN} \
  --num_feat=${NUM_FEAT} \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=2000 \
  --reconstruction_epochs=1.0 \
  --train_examples=6400 \
  --eval_examples=6400 \
  --start_delay_secs=${START_DELAY_SECS} \
  --throttle_secs=${THROTTLE_SECS} \
  --model_type="pca" \
  --training_mode=${TRAINING_MODE} \
  --autotune_principal_components="True" \
  --labeled_tune_thresh=${LABELED_TUNE_THRESH} \
  --num_time_anom_thresh=${NUM_TIME_ANOM_THRESH} \
  --num_feat_anom_thresh=${NUM_FEAT_ANOM_THRESH}

## Hyperparameter tuning of reconstruction hyperparameters

### Dense Autoencoder

In [None]:
%%writefile hyperparam_reconstruction_dense.yaml
trainingInput:
  scaleTier: STANDARD_1
  hyperparameters:
    hyperparameterMetricTag: rmse
    goal: MINIMIZE
    maxTrials: 30
    maxParallelTrials: 1
    params:
    - parameterName: enc_dnn_hidden_units
      type: CATEGORICAL
      categoricalValues: ["64 32 16", "256 128 16", "64 64 64"]
    - parameterName: latent_vector_size
      type: INTEGER
      minValue: 8
      maxValue: 16
      scaleType: UNIT_LINEAR_SCALE
    - parameterName: dec_dnn_hidden_units
      type: CATEGORICAL
      categoricalValues: ["16 32 64", "16 128 256", "64 64 64"]
    - parameterName: train_batch_size
      type: INTEGER
      minValue: 8
      maxValue: 512
      scaleType: UNIT_LOG_SCALE
    - parameterName: learning_rate
      type: DOUBLE
      minValue: 0.001
      maxValue: 0.1
      scaleType: UNIT_LINEAR_SCALE

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/hyperparam_reconstruction_dense_labeled
JOBNAME=job_anomaly_detection_hyperparam_reconstruction_dense_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud storage rm --recursive --continue-on-error ${OUTDIR}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --config=hyperparam_reconstruction_dense.yaml \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=gs://${BUCKET}/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://${BUCKET}/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=${OUTDIR} \
  --job-dir=${OUTDIR} \
  --seq_len=30 \
  --num_feat=5 \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=0 \
  --reconstruction_epochs=1.0 \
  --train_examples=64000 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

### LSTM Autoencoder

In [None]:
%%writefile hyperparam_reconstruction_lstm.yaml
trainingInput:
  scaleTier: STANDARD_1
  hyperparameters:
    hyperparameterMetricTag: rmse
    goal: MINIMIZE
    maxTrials: 30
    maxParallelTrials: 1
    params:
    - parameterName: enc_lstm_hidden_units
      type: CATEGORICAL
      categoricalValues: ["64 32 16", "256 128 16", "64 64 64"]
    - parameterName: dec_lstm_hidden_units
      type: CATEGORICAL
      categoricalValues: ["16 32 64", "16 128 256", "64 64 64"]
    - parameterName: lstm_dropout_output_keep_probs
      type: CATEGORICAL
      categoricalValues: ["0.9 1.0 1.0", "0.95 0.95 1.0", "0.95 0.95 0.95"]
    - parameterName: dnn_hidden_units
      type: CATEGORICAL
      categoricalValues: ["256 128 64", "256 128 16", "64 64 64"]
    - parameterName: train_batch_size
      type: INTEGER
      minValue: 8
      maxValue: 512
      scaleType: UNIT_LOG_SCALE
    - parameterName: learning_rate
      type: DOUBLE
      minValue: 0.001
      maxValue: 0.1
      scaleType: UNIT_LINEAR_SCALE

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/hyperparam_reconstruction_lstm_labeled
JOBNAME=job_anomaly_detection_hyperparam_reconstruction_lstm_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud storage rm --recursive --continue-on-error ${OUTDIR}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --config=hyperparam_reconstruction_lstm.yaml \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=gs://${BUCKET}/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://${BUCKET}/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=${OUTDIR} \
  --job-dir=${OUTDIR} \
  --seq_len=30 \
  --num_feat=5 \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=0 \
  --reconstruction_epochs=1.0 \
  --train_examples=64000 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

### PCA Autoencoder

In [None]:
%%writefile hyperparam_reconstruction_pca.yaml
trainingInput:
  scaleTier: STANDARD_1
  hyperparameters:
    hyperparameterMetricTag: rmse
    goal: MINIMIZE
    maxTrials: 30
    maxParallelTrials: 1
    params:
    - parameterName: k_principal_components_time
      type: INTEGER
      minValue: 2
      maxValue: 10
      scaleType: UNIT_LINEAR_SCALE
    - parameterName: k_principal_components_feat
      type: INTEGER
      minValue: 2
      maxValue: 10
      scaleType: UNIT_LINEAR_SCALE

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/hyperparam_reconstruction_pca_labeled
JOBNAME=job_anomaly_detection_hyperparam_reconstruction_pca_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud storage rm --recursive --continue-on-error ${OUTDIR}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --config=hyperparam_reconstruction_pca.yaml \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=gs://${BUCKET}/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://${BUCKET}/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=${OUTDIR} \
  --job-dir=${OUTDIR} \
  --seq_len=30 \
  --num_feat=5 \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=0 \
  --reconstruction_epochs=1.0 \
  --train_examples=64000 \
  --eval_examples=6400 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

## Train error distribution variables

In [None]:
# Import os environment variables for error dist training hyperparameters
os.environ["TRAIN_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/val_norm_1_seq.csv".format(BUCKET)
os.environ["EVAL_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/val_norm_1_seq.csv".format(BUCKET)
os.environ["PREVIOUS_TRAIN_STEPS"] = str(2000)
os.environ["TRAIN_EXAMPLES"] = str(6400)
os.environ["TRAINING_MODE"] = "calculate_error_distribution_statistics"
os.environ["EPS"] = "1e-12"

### Dense Autoencoder

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/trained_model/dense_labeled
JOBNAME=job_anomaly_detection_calculate_error_distribution_statistics_dense_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=${TRAIN_FILE_PATTERN} \
  --eval_file_pattern=${EVAL_FILE_PATTERN} \
  --output_dir=${OUTDIR} \
  --job-dir=./tmp \
  --seq_len=${SEQ_LEN} \
  --num_feat=${NUM_FEAT} \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=${PREVIOUS_TRAIN_STEPS} \
  --train_examples=${TRAIN_EXAMPLES} \
  --start_delay_secs=${START_DELAY_SECS} \
  --throttle_secs=${THROTTLE_SECS} \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units=${ENC_DNN_HIDDEN_UNITS} \
  --latent_vector_size=${LATENT_VECTOR_SIZE} \
  --dec_dnn_hidden_units=${DEC_DNN_HIDDEN_UNITS} \
  --time_loss_weight=${TIME_LOSS_WEIGHT} \
  --feat_loss_weight=${FEAT_LOSS_WEIGHT} \
  --training_mode=${TRAINING_MODE} \
  --labeled_tune_thresh=${LABELED_TUNE_THRESH} \
  --eps=${EPS} \
  --num_time_anom_thresh=${NUM_TIME_ANOM_THRESH} \
  --num_feat_anom_thresh=${NUM_FEAT_ANOM_THRESH}

### LSTM Autoencoder

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/trained_model/lstm_labeled
JOBNAME=job_anomaly_detection_calculate_error_distribution_statistics_lstm_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=${TRAIN_FILE_PATTERN} \
  --eval_file_pattern=${EVAL_FILE_PATTERN} \
  --output_dir=${OUTDIR} \
  --job-dir=./tmp \
  --seq_len=${SEQ_LEN} \
  --num_feat=${NUM_FEAT} \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=${PREVIOUS_TRAIN_STEPS} \
  --train_examples=${TRAIN_EXAMPLES} \
  --start_delay_secs=${START_DELAY_SECS} \
  --throttle_secs=${THROTTLE_SECS} \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=${REVERSE_LABELS_SEQUENCE} \
  --enc_lstm_hidden_units=${ENC_LSTM_HIDDEN_UNITS} \
  --dec_lstm_hidden_units=${DEC_LSTM_HIDDEN_UNITS} \
  --lstm_dropout_output_keep_probs=${LSTM_DROPOUT_OUTPUT_KEEP_PROBS} \
  --dnn_hidden_units=${DNN_HIDDEN_UNITS} \
  --training_mode=${TRAINING_MODE} \
  --labeled_tune_thresh=${LABELED_TUNE_THRESH} \
  --eps=${EPS} \
  --num_time_anom_thresh=${NUM_TIME_ANOM_THRESH} \
  --num_feat_anom_thresh=${NUM_FEAT_ANOM_THRESH}

### PCA Autoencoder

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/trained_model/pca_labeled
JOBNAME=job_anomaly_detection_calculate_error_distribution_statistics_pca_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=${TRAIN_FILE_PATTERN} \
  --eval_file_pattern=${EVAL_FILE_PATTERN} \
  --output_dir=${OUTDIR} \
  --job-dir=./tmp \
  --seq_len=${SEQ_LEN} \
  --num_feat=${NUM_FEAT} \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=2200 \
  --train_examples=${TRAIN_EXAMPLES} \
  --start_delay_secs=${START_DELAY_SECS} \
  --throttle_secs=${THROTTLE_SECS} \
  --model_type="pca" \
  --training_mode=${TRAINING_MODE} \
  --labeled_tune_thresh=${LABELED_TUNE_THRESH} \
  --eps=${EPS} \
  --num_time_anom_thresh=${NUM_TIME_ANOM_THRESH} \
  --num_feat_anom_thresh=${NUM_FEAT_ANOM_THRESH}

## Tune anomaly thresholds

In [None]:
# Import os environment variables for tune threshold training hyperparameters
os.environ["PREVIOUS_TRAIN_STEPS"] = str(2200)
os.environ["TRAIN_EXAMPLES"] = str(12800)
os.environ["TRAINING_MODE"] = "tune_anomaly_thresholds"

## Labeled

In [None]:
# Import os environment variables for labeled tune threshold training hyperparameters
os.environ["TRAIN_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/labeled_val_mixed_seq.csv".format(BUCKET)
os.environ["EVAL_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/labeled_val_mixed_seq.csv".format(BUCKET)
os.environ["MIN_TIME_ANOM_THRESH"] = str(1.0)
os.environ["MAX_TIME_ANOM_THRESH"] = str(20.0)
os.environ["MIN_FEAT_ANOM_THRESH"] = str(20.0)
os.environ["MAX_FEAT_ANOM_THRESH"] = str(80.0)
os.environ["F_SCORE_BETA"] = str(0.05)

### Dense Autoencoder

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/trained_model/dense_labeled
JOBNAME=job_anomaly_detection_tune_anomaly_thresholds_dense_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=${TRAIN_FILE_PATTERN} \
  --eval_file_pattern=${EVAL_FILE_PATTERN} \
  --output_dir=${OUTDIR} \
  --job-dir=./tmp \
  --seq_len=${SEQ_LEN} \
  --num_feat=${NUM_FEAT} \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=${PREVIOUS_TRAIN_STEPS} \
  --train_examples=${TRAIN_EXAMPLES} \
  --start_delay_secs=${START_DELAY_SECS} \
  --throttle_secs=${THROTTLE_SECS} \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units=${ENC_DNN_HIDDEN_UNITS} \
  --latent_vector_size=${LATENT_VECTOR_SIZE} \
  --dec_dnn_hidden_units=${DEC_DNN_HIDDEN_UNITS} \
  --time_loss_weight=${TIME_LOSS_WEIGHT} \
  --feat_loss_weight=${FEAT_LOSS_WEIGHT} \
  --training_mode=${TRAINING_MODE} \
  --labeled_tune_thresh=${LABELED_TUNE_THRESH} \
  --num_time_anom_thresh=${NUM_TIME_ANOM_THRESH} \
  --num_feat_anom_thresh=${NUM_FEAT_ANOM_THRESH} \
  --min_time_anom_thresh=${MIN_TIME_ANOM_THRESH} \
  --max_time_anom_thresh=${MAX_TIME_ANOM_THRESH} \
  --min_feat_anom_thresh=${MIN_FEAT_ANOM_THRESH} \
  --max_feat_anom_thresh=${MAX_FEAT_ANOM_THRESH} \
  --f_score_beta=${F_SCORE_BETA}

### LSTM Autoencoder

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/trained_model/lstm_labeled
JOBNAME=job_anomaly_detection_tune_anomaly_thresholds_lstm_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=${TRAIN_FILE_PATTERN} \
  --eval_file_pattern=${EVAL_FILE_PATTERN} \
  --output_dir=${OUTDIR} \
  --job-dir=./tmp \
  --seq_len=${SEQ_LEN} \
  --num_feat=${NUM_FEAT} \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=${PREVIOUS_TRAIN_STEPS} \
  --train_examples=${TRAIN_EXAMPLES} \
  --start_delay_secs=${START_DELAY_SECS} \
  --throttle_secs=${THROTTLE_SECS} \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=${REVERSE_LABELS_SEQUENCE} \
  --enc_lstm_hidden_units=${ENC_LSTM_HIDDEN_UNITS} \
  --dec_lstm_hidden_units=${DEC_LSTM_HIDDEN_UNITS} \
  --lstm_dropout_output_keep_probs=${LSTM_DROPOUT_OUTPUT_KEEP_PROBS} \
  --dnn_hidden_units=${DNN_HIDDEN_UNITS} \
  --training_mode=${TRAINING_MODE} \
  --labeled_tune_thresh=${LABELED_TUNE_THRESH} \
  --num_time_anom_thresh=${NUM_TIME_ANOM_THRESH} \
  --num_feat_anom_thresh=${NUM_FEAT_ANOM_THRESH} \
  --min_time_anom_thresh=${MIN_TIME_ANOM_THRESH} \
  --max_time_anom_thresh=${MAX_TIME_ANOM_THRESH} \
  --min_feat_anom_thresh=${MIN_FEAT_ANOM_THRESH} \
  --max_feat_anom_thresh=${MAX_FEAT_ANOM_THRESH} \
  --f_score_beta=${F_SCORE_BETA}

### PCA Autoencoder

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/anomaly_detection/trained_model/pca_labeled
JOBNAME=job_anomaly_detection_tune_anomaly_thresholds_pca_labeled_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gcloud ml-engine jobs submit training ${JOBNAME} \
  --region=${REGION} \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=${OUTDIR} \
  --staging-bucket=gs://${BUCKET} \
  --scale-tier=STANDARD_1 \
  --runtime-version=${TFVERSION} \
  -- \
  --train_file_pattern=${TRAIN_FILE_PATTERN} \
  --eval_file_pattern=${EVAL_FILE_PATTERN} \
  --output_dir=${OUTDIR} \
  --job-dir=./tmp \
  --seq_len=${SEQ_LEN} \
  --num_feat=${NUM_FEAT} \
  --feat_names=${FEAT_NAMES} \
  --feat_defaults=${FEAT_DEFAULTS} \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=2400 \
  --train_examples=${TRAIN_EXAMPLES} \
  --start_delay_secs=${START_DELAY_SECS} \
  --throttle_secs=${THROTTLE_SECS} \
  --model_type="pca" \
  --training_mode=${TRAINING_MODE} \
  --labeled_tune_thresh=${LABELED_TUNE_THRESH} \
  --num_time_anom_thresh=${NUM_TIME_ANOM_THRESH} \
  --num_feat_anom_thresh=${NUM_FEAT_ANOM_THRESH} \
  --min_time_anom_thresh=${MIN_TIME_ANOM_THRESH} \
  --max_time_anom_thresh=${MAX_TIME_ANOM_THRESH} \
  --min_feat_anom_thresh=${MIN_FEAT_ANOM_THRESH} \
  --max_feat_anom_thresh=${MAX_FEAT_ANOM_THRESH} \
  --f_score_beta=${F_SCORE_BETA}

## Deploy

### Dense Autoencoder

In [None]:
%%bash
MODEL_NAME="anomaly_detection_dense_labeled"
MODEL_VERSION="v1"
MODEL_LOCATION=$(gcloud storage ls gs://${BUCKET}/anomaly_detection/trained_model/dense_labeled/export/exporter/ | tail -1)
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
#gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
#gcloud ml-engine models delete ${MODEL_NAME}
gcloud ml-engine models create ${MODEL_NAME} --regions ${REGION}
gcloud ml-engine versions create ${MODEL_VERSION} --model ${MODEL_NAME} --origin ${MODEL_LOCATION} --runtime-version ${TFVERSION}

### LSTM Autoencoder

In [None]:
%%bash
MODEL_NAME="anomaly_detection_lstm_labeled"
MODEL_VERSION="v1"
MODEL_LOCATION=$(gcloud storage ls gs://${BUCKET}/anomaly_detection/trained_model/lstm_labeled/export/exporter/ | tail -1)
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
#gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
#gcloud ml-engine models delete ${MODEL_NAME}
gcloud ml-engine models create ${MODEL_NAME} --regions ${REGION}
gcloud ml-engine versions create ${MODEL_VERSION} --model ${MODEL_NAME} --origin ${MODEL_LOCATION} --runtime-version ${TFVERSION}

### PCA Autoencoder

In [None]:
%%bash
MODEL_NAME="anomaly_detection_pca_labeled"
MODEL_VERSION="v1"
MODEL_LOCATION=$(gcloud storage ls gs://${BUCKET}/anomaly_detection/trained_model/pca_labeled/export/exporter/ | tail -1)
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
#gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
#gcloud ml-engine models delete ${MODEL_NAME}
gcloud ml-engine models create ${MODEL_NAME} --regions ${REGION}
gcloud ml-engine versions create ${MODEL_VERSION} --model ${MODEL_NAME} --origin ${MODEL_LOCATION} --runtime-version ${TFVERSION}

## Prediction

In [29]:
UNLABELED_CSV_COLUMNS = ["tag_{0}".format(tag) for tag in range(0, 5)]

In [30]:
import numpy as np
labeled_test_mixed_sequences_array = np.loadtxt(
    fname="data/labeled_test_mixed_seq.csv", dtype=str, delimiter=",")
print("labeled_test_mixed_sequences_array.shape = {}".format(
    labeled_test_mixed_sequences_array.shape))

labeled_test_mixed_sequences_array.shape = (12800, 6)


In [31]:
number_of_prediction_instances = 10
print("labels = {}".format(
  labeled_test_mixed_sequences_array[0:number_of_prediction_instances, -1]))

labels = ['0' '0' '0' '0' '0' '0' '1' '1' '1' '0']


### GCloud ML-Engine prediction from deployed model

In [32]:
test_data_normal_string_list = labeled_test_mixed_sequences_array.tolist()[0:number_of_prediction_instances]

In [33]:
# Format dataframe to instances list to get sent to ML-Engine
instances = [{UNLABELED_CSV_COLUMNS[i]: example[i]
              for i in range(len(UNLABELED_CSV_COLUMNS))} 
             for example in labeled_test_mixed_sequences_array.tolist()[0:number_of_prediction_instances]]

In [34]:
instances

[{'tag_0': '0.66491856;1.9145195;1.75234653;-0.78400576;-0.75609776;0.84197907;2.33069302;0.67758695;-1.60418555;-0.16573821;1.65752209;1.38322258;-0.65379731;-0.98907577;0.85974301;2.28947191;1.30827902;-1.28121539;-0.84517686;1.16128253;2.54710653;-0.07715313;-0.99222116;-0.14657169;2.05595855;1.51383899;-0.53623582;-0.75514892;0.48173323;2.3100255',
  'tag_1': '0.14611996;1.91715205;-0.37882894;0.21415584;2.12791802;-0.0619942;0.11572278;1.93012199;0.6541758;-0.68204207;1.56716207;0.75419849;-0.37778253;1.57716806;1.37025903;-1.14077495;1.15195495;1.68996712;-0.81952461;0.85177925;1.21750258;-0.15392611;-0.18813038;2.1498899;0.22421678;-0.30835851;1.58292183;-0.08034281;-0.51606627;2.0111497',
  'tag_2': '0.93582327;1.42382616;2.07416534;0.66961254;-0.45530108;-0.33779627;1.29564858;1.57718875;1.62194094;-0.50371325;-0.68795334;-0.21823157;1.67758936;1.87901361;0.89202323;-1.15923235;-0.39328089;1.03425895;1.62499189;1.47040884;0.40736898;-1.1339056;-0.29514493;1.74666228;2.22234859

### Dense Autoencoder

In [35]:
# Send instance dictionary to receive response from ML-Engine for online prediction
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

credentials = GoogleCredentials.get_application_default()
api = discovery.build("ml", "v1", credentials = credentials)

request_data = {"instances": instances}

parent = "projects/%s/models/%s/versions/%s" % (PROJECT, "anomaly_detection_dense_labeled", "v1")
response = api.projects().predict(body = request_data, name = parent).execute()
print("response = {}".format(response))

response = {'predictions': [{'time_anom_flags': 0, 'X_time_abs_recon_err': [[0.66491856, 0.14611996, 0.93582327, 0.55496827, 0.09135211], [1.9145195, 1.91715205, 1.42382616, 1.63017531, 2.01834776], [1.75234653, 0.37882894, 2.07416534, 0.17487062, 0.97056041], [0.78400576, 0.21415584, 0.66961254, 0.14967874, 0.1594919], [0.75609776, 2.12791802, 0.45530108, 2.12154598, 0.62361487], [0.84197907, 0.0619942, 0.33779627, 0.98646297, 0.47084415], [2.33069302, 0.11572278, 1.29564858, 0.89614069, 1.44724272], [0.67758695, 1.93012199, 1.57718875, 0.16462197, 1.15545605], [1.60418555, 0.6541758, 1.62194094, 1.48088977, 0.64437793], [0.16573821, 0.68204207, 0.50371325, 0.2009814, 0.18398029], [1.65752209, 1.56716207, 0.68795334, 0.7492661, 1.1087019], [1.38322258, 0.75419849, 0.21823157, 1.41074009, 2.06215052], [0.65379731, 0.37778253, 1.67758936, 1.49459742, 0.45822295], [0.98907577, 1.57716806, 1.87901361, 0.78390433, 0.61682919], [0.85974301, 1.37025903, 0.89202323, 0.96362317, 0.29801659], [

### LSTM Autoencoder

In [36]:
# Send instance dictionary to receive response from ML-Engine for online prediction
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

credentials = GoogleCredentials.get_application_default()
api = discovery.build("ml", "v1", credentials = credentials)

request_data = {"instances": instances}

parent = "projects/%s/models/%s/versions/%s" % (PROJECT, "anomaly_detection_lstm_labeled", "v1")
response = api.projects().predict(body = request_data, name = parent).execute()
print("response = {}".format(response))

response = {'predictions': [{'time_anom_flags': 0, 'X_time_abs_recon_err': [[0.41491645314022535, 0.17814782047161393, 0.6791140559628012, 0.1871596458471131, 0.3532771634912568], [1.6645173931402253, 1.5928842695283862, 1.167116945962801, 1.262366685847113, 1.5737184865087435], [1.5023444231402254, 0.7030967204716139, 1.817456125962801, 0.5426792441528869, 0.5259311365087432], [1.0340078668597747, 0.11011194047161393, 0.4129033259628012, 0.517487364152887, 0.6041211734912568], [1.0060998668597747, 1.8036502395283862, 0.7120102940371988, 1.7537373558471132, 1.0682441434912568], [0.5919769631402254, 0.3862619804716139, 0.5945054840371988, 0.6186543458471131, 0.026214876508743212], [2.0806909131402254, 0.20854500047161392, 1.038939365962801, 1.263949314152887, 1.002613446508743], [0.4275848431402253, 1.605854209528386, 1.320479535962801, 0.20318665415288695, 0.7108267765087432], [1.8541876568597746, 0.32990801952838605, 1.365231725962801, 1.1130811458471133, 1.0890072034912568], [0.41574

### PCA Autoencoder

In [37]:
# Send instance dictionary to receive response from ML-Engine for online prediction
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

credentials = GoogleCredentials.get_application_default()
api = discovery.build("ml", "v1", credentials = credentials)

request_data = {"instances": instances}

parent = "projects/%s/models/%s/versions/%s" % (PROJECT, "anomaly_detection_pca_labeled", "v1")
response = api.projects().predict(body = request_data, name = parent).execute()
print("response = {}".format(response))

response = {'predictions': [{'time_anom_flags': 0, 'X_time_abs_recon_err': [[0.1728792878886382, 0.38050960188316113, 0.4029056164752808, 0.014843864134493515, 0.37395442642699095], [0.4715622077460193, 1.347448091416575, 0.785070509241718, 0.8864118912527625, 0.5991388918543972], [0.07542798900316616, 0.9324050785330078, 1.4750373173605118, 0.842387971324521, 0.09149046337009026], [0.03565646553488011, 0.28264294850716104, 0.20999200847594207, 0.548774838816692, 0.03581776391927072], [0.04857788467463031, 1.6307784060092017, 0.9157590530057195, 1.7208385907098551, 0.43585294321592793], [0.08302349375710694, 0.594693984257074, 0.8856290524187558, 0.4176409110762759, 0.12889025356158673], [0.09530165447389516, 0.45055417216711735, 0.665313418520593, 1.623702553827909, 0.10392648557085626], [0.2739479466248842, 1.3930425583357011, 1.0185947463993117, 0.4249053603401392, 0.45873233970642757], [0.13365573982912693, 0.17217907084086995, 1.1986905131399577, 1.1517720570728396, 0.121269851988