# Run model module on GCP with unlabeled threshold tuning

In [None]:
import os
PROJECT = "PROJECT" # REPLACE WITH YOUR PROJECT ID
BUCKET = "BUCKET" # REPLACE WITH A BUCKET NAME
REGION = "us-central1" # REPLACE WITH YOUR REGION e.g. us-central1

# Import os environment variables
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] =  BUCKET
os.environ["REGION"] = REGION
os.environ["TFVERSION"] = "1.13"

Copy data over to bucket

In [None]:
%%bash
gsutil -m cp -r data/* gs://$BUCKET/anomaly_detection/data

In [None]:
# Import os environment variables for global sequence shape hyperparameters
os.environ["SEQ_LEN"] = str(30)
os.environ["NUM_FEAT"] = str(5)

# Import os environment variables for global training hyperparameters
os.environ["START_DELAY_SECS"] = str(60)
os.environ["THROTTLE_SECS"] = str(120)

# Import os environment variables for global threshold hyperparameters
os.environ["LABELED_TUNE_THRESH"] = "False"

# Import global dense hyperparameters
os.environ["ENC_DNN_HIDDEN_UNITS"] = "64,32,16"
os.environ["LATENT_VECTOR_SIZE"] = str(8)
os.environ["DEC_DNN_HIDDEN_UNITS"] = "16,32,64"
os.environ["TIME_LOSS_WEIGHT"] = str(1.0)
os.environ["FEAT_LOSS_WEIGHT"] = str(1.0)

# Import global lstm hyperparameters
os.environ["REVERSE_LABELS_SEQUENCE"] = "True"
os.environ["ENC_LSTM_HIDDEN_UNITS"] = "64,32,16"
os.environ["DEC_LSTM_HIDDEN_UNITS"] = "16,32,64"
os.environ["LSTM_DROPOUT_OUTPUT_KEEP_PROBS"] = "0.9,0.95,1.0"
os.environ["DNN_HIDDEN_UNITS"] = "1024,256,64"

## Train reconstruction variables

In [None]:
# Import os environment variables for reconstruction training hyperparameters
os.environ["TRAIN_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/train_norm_seq.csv".format(BUCKET)
os.environ["EVAL_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/val_norm_1_seq.csv".format(BUCKET)
os.environ["PREVIOUS_TRAIN_STEPS"] = str(0)
os.environ["RECONSTRUCTION_EPOCHS"] = str(1.0)
os.environ["TRAIN_EXAMPLES"] = str(64000)
os.environ["LEARNING_RATE"] = str(0.1)
os.environ["TRAINING_MODE"] = "reconstruction"

### Dense Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/dense_unlabeled
JOBNAME=job_anomaly_detection_reconstruction_dense_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=$TRAIN_FILE_PATTERN \
  --eval_file_pattern=$EVAL_FILE_PATTERN \
  --output_dir=$OUTDIR \
  --job-dir=./tmp \
  --seq_len=$SEQ_LEN \
  --num_feat=$NUM_FEAT \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=$PREVIOUS_TRAIN_STEPS \
  --reconstruction_epochs=$RECONSTRUCTION_EPOCHS \
  --train_examples=$TRAIN_EXAMPLES \
  --learning_rate=$LEARNING_RATE \
  --start_delay_secs=$START_DELAY_SECS \
  --throttle_secs=$THROTTLE_SECS \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units=$ENC_DNN_HIDDEN_UNITS \
  --latent_vector_size=$LATENT_VECTOR_SIZE \
  --dec_dnn_hidden_units=$DEC_DNN_HIDDEN_UNITS \
  --time_loss_weight=$TIME_LOSS_WEIGHT \
  --feat_loss_weight=$FEAT_LOSS_WEIGHT \
  --training_mode=$TRAINING_MODE \
  --labeled_tune_thresh=$LABELED_TUNE_THRESH

### LSTM Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/lstm_unlabeled
JOBNAME=job_anomaly_detection_reconstruction_lstm_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=$TRAIN_FILE_PATTERN \
  --eval_file_pattern=$EVAL_FILE_PATTERN \
  --output_dir=$OUTDIR \
  --job-dir=./tmp \
  --seq_len=$SEQ_LEN \
  --num_feat=$NUM_FEAT \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=$PREVIOUS_TRAIN_STEPS \
  --reconstruction_epochs=$RECONSTRUCTION_EPOCHS \
  --train_examples=$TRAIN_EXAMPLES \
  --learning_rate=$LEARNING_RATE \
  --start_delay_secs=$START_DELAY_SECS \
  --throttle_secs=$THROTTLE_SECS \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=$REVERSE_LABELS_SEQUENCE \
  --enc_lstm_hidden_units=$ENC_LSTM_HIDDEN_UNITS \
  --dec_lstm_hidden_units=$DEC_LSTM_HIDDEN_UNITS \
  --lstm_dropout_output_keep_probs=$LSTM_DROPOUT_OUTPUT_KEEP_PROBS \
  --dnn_hidden_units=$DNN_HIDDEN_UNITS \
  --training_mode=$TRAINING_MODE \
  --labeled_tune_thresh=$LABELED_TUNE_THRESH

### PCA Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/pca_unlabeled
JOBNAME=job_anomaly_detection_reconstruction_pca_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=$TRAIN_FILE_PATTERN \
  --eval_file_pattern=$EVAL_FILE_PATTERN \
  --output_dir=$OUTDIR \
  --job-dir=./tmp \
  --seq_len=$SEQ_LEN \
  --num_feat=$NUM_FEAT \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=$PREVIOUS_TRAIN_STEPS \
  --reconstruction_epochs=1.0 \
  --train_examples=$TRAIN_EXAMPLES \
  --eval_examples=6400 \
  --start_delay_secs=$START_DELAY_SECS \
  --throttle_secs=$THROTTLE_SECS \
  --model_type="pca" \
  --training_mode=$TRAINING_MODE \
  --labeled_tune_thresh=$LABELED_TUNE_THRESH

## Hyperparameter tuning of reconstruction hyperparameters

### Dense Autoencoder

In [None]:
%%writefile hyperparam_reconstruction_dense.yaml
trainingInput:
  scaleTier: STANDARD_1
  hyperparameters:
    hyperparameterMetricTag: rmse
    goal: MINIMIZE
    maxTrials: 30
    maxParallelTrials: 1
    params:
    - parameterName: enc_dnn_hidden_units
      type: CATEGORICAL
      categoricalValues: ["64 32 16", "256 128 16", "64 64 64"]
    - parameterName: latent_vector_size
      type: INTEGER
      minValue: 8
      maxValue: 16
      scaleType: UNIT_LINEAR_SCALE
    - parameterName: dec_dnn_hidden_units
      type: CATEGORICAL
      categoricalValues: ["16 32 64", "16 128 256", "64 64 64"]
    - parameterName: train_batch_size
      type: INTEGER
      minValue: 8
      maxValue: 512
      scaleType: UNIT_LOG_SCALE
    - parameterName: learning_rate
      type: DOUBLE
      minValue: 0.001
      maxValue: 0.1
      scaleType: UNIT_LINEAR_SCALE

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/hyperparam_reconstruction_dense_unlabeled
JOBNAME=job_anomaly_detection_hyperparam_reconstruction_dense_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --config=hyperparam_reconstruction_dense.yaml \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --num_feat=5 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=0 \
  --reconstruction_epochs=1.0 \
  --train_examples=64000 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

### LSTM Autoencoder

In [None]:
%%writefile hyperparam_reconstruction_lstm.yaml
trainingInput:
  scaleTier: STANDARD_1
  hyperparameters:
    hyperparameterMetricTag: rmse
    goal: MINIMIZE
    maxTrials: 30
    maxParallelTrials: 1
    params:
    - parameterName: enc_lstm_hidden_units
      type: CATEGORICAL
      categoricalValues: ["64 32 16", "256 128 16", "64 64 64"]
    - parameterName: dec_lstm_hidden_units
      type: CATEGORICAL
      categoricalValues: ["16 32 64", "16 128 256", "64 64 64"]
    - parameterName: lstm_dropout_output_keep_probs
      type: CATEGORICAL
      categoricalValues: ["0.9 1.0 1.0", "0.95 0.95 1.0", "0.95 0.95 0.95"]
    - parameterName: dnn_hidden_units
      type: CATEGORICAL
      categoricalValues: ["256 128 64", "256 128 16", "64 64 64"]
    - parameterName: train_batch_size
      type: INTEGER
      minValue: 8
      maxValue: 512
      scaleType: UNIT_LOG_SCALE
    - parameterName: learning_rate
      type: DOUBLE
      minValue: 0.001
      maxValue: 0.1
      scaleType: UNIT_LINEAR_SCALE

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/hyperparam_reconstruction_lstm_unlabeled
JOBNAME=job_anomaly_detection_hyperparam_reconstruction_lstm_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --config=hyperparam_reconstruction_lstm.yaml \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --num_feat=5 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=0 \
  --reconstruction_epochs=1.0 \
  --train_examples=64000 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

### PCA Autoencoder

In [None]:
%%writefile hyperparam_reconstruction_pca.yaml
trainingInput:
  scaleTier: STANDARD_1
  hyperparameters:
    hyperparameterMetricTag: rmse
    goal: MINIMIZE
    maxTrials: 30
    maxParallelTrials: 1
    params:
    - parameterName: k_principal_components_time
      type: INTEGER
      minValue: 2
      maxValue: 10
      scaleType: UNIT_LINEAR_SCALE
    - parameterName: k_principal_components_feat
      type: INTEGER
      minValue: 2
      maxValue: 10
      scaleType: UNIT_LINEAR_SCALE

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/hyperparam_reconstruction_pca_unlabeled
JOBNAME=job_anomaly_detection_hyperparam_reconstruction_pca_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --config=hyperparam_reconstruction_pca.yaml \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --num_feat=5 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=0 \
  --reconstruction_epochs=1.0 \
  --train_examples=64000 \
  --eval_examples=6400 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

## Train error distribution variables

In [None]:
# Import os environment variables for error dist training hyperparameters
os.environ["TRAIN_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/val_norm_1_seq.csv".format(BUCKET)
os.environ["EVAL_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/val_norm_1_seq.csv".format(BUCKET)
os.environ["PREVIOUS_TRAIN_STEPS"] = str(2000)
os.environ["TRAIN_EXAMPLES"] = str(6400)
os.environ["TRAINING_MODE"] = "calculate_error_distribution_statistics"
os.environ["EPS"] = "1e-12"

### Dense Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/dense_unlabeled
JOBNAME=job_anomaly_detection_calculate_error_distribution_statistics_dense_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=$TRAIN_FILE_PATTERN \
  --eval_file_pattern=$EVAL_FILE_PATTERN \
  --output_dir=$PWD/trained_model/dense_unlabeled \
  --job-dir=./tmp \
  --seq_len=$SEQ_LEN \
  --num_feat=$NUM_FEAT \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=$PREVIOUS_TRAIN_STEPS \
  --train_examples=$TRAIN_EXAMPLES \
  --start_delay_secs=$START_DELAY_SECS \
  --throttle_secs=$THROTTLE_SECS \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units=$ENC_DNN_HIDDEN_UNITS \
  --latent_vector_size=$LATENT_VECTOR_SIZE \
  --dec_dnn_hidden_units=$DEC_DNN_HIDDEN_UNITS \
  --time_loss_weight=$TIME_LOSS_WEIGHT \
  --feat_loss_weight=$FEAT_LOSS_WEIGHT \
  --training_mode=$TRAINING_MODE \
  --labeled_tune_thresh=$LABELED_TUNE_THRESH \
  --eps=$EPS

### LSTM Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/lstm_unlabeled
JOBNAME=job_anomaly_detection_calculate_error_distribution_statistics_lstm_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=$TRAIN_FILE_PATTERN \
  --eval_file_pattern=$EVAL_FILE_PATTERN \
  --output_dir=$PWD/trained_model/lstm_unlabeled \
  --job-dir=./tmp \
  --seq_len=$SEQ_LEN \
  --num_feat=$NUM_FEAT \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=$PREVIOUS_TRAIN_STEPS \
  --train_examples=$TRAIN_EXAMPLES \
  --start_delay_secs=$START_DELAY_SECS \
  --throttle_secs=$THROTTLE_SECS \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=$REVERSE_LABELS_SEQUENCE \
  --enc_lstm_hidden_units=$ENC_LSTM_HIDDEN_UNITS \
  --dec_lstm_hidden_units=$DEC_LSTM_HIDDEN_UNITS \
  --lstm_dropout_output_keep_probs=$LSTM_DROPOUT_OUTPUT_KEEP_PROBS \
  --dnn_hidden_units=$DNN_HIDDEN_UNITS \
  --training_mode=$TRAINING_MODE \
  --labeled_tune_thresh=$LABELED_TUNE_THRESH \
  --eps=$EPS

### PCA Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/pca_unlabeled
JOBNAME=job_anomaly_detection_calculate_error_distribution_statistics_pca_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=$TRAIN_FILE_PATTERN \
  --eval_file_pattern=$EVAL_FILE_PATTERN \
  --output_dir=$PWD/trained_model/pca_unlabeled \
  --job-dir=./tmp \
  --seq_len=$SEQ_LEN \
  --num_feat=$NUM_FEAT \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=2200 \
  --train_examples=$TRAIN_EXAMPLES \
  --start_delay_secs=$START_DELAY_SECS \
  --throttle_secs=$THROTTLE_SECS \
  --model_type="pca" \
  --training_mode=$TRAINING_MODE \
  --labeled_tune_thresh=$LABELED_TUNE_THRESH \
  --eps=$EPS

## Tune anomaly thresholds

In [None]:
# Import os environment variables for tune threshold training hyperparameters
os.environ["PREVIOUS_TRAIN_STEPS"] = str(2200)
os.environ["TRAIN_EXAMPLES"] = str(12800)
os.environ["TRAINING_MODE"] = "tune_anomaly_thresholds"

## Unlabeled

In [None]:
# Import os environment variables for unlabeled tune threshold training hyperparameters
os.environ["TRAIN_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/unlabeled_val_mixed_seq.csv".format(BUCKET)
os.environ["EVAL_FILE_PATTERN"] = "gs://{}/anomaly_detection/data/unlabeled_val_mixed_seq.csv".format(BUCKET)
os.environ["TIME_THRESH_SCL"] = str(2.0)
os.environ["FEAT_THRESH_SCL"] = str(2.0)

### Dense Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/dense_unlabeled
JOBNAME=job_anomaly_detection_tune_anomaly_thresholds_dense_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=$TRAIN_FILE_PATTERN \
  --eval_file_pattern=$EVAL_FILE_PATTERN \
  --output_dir=$PWD/trained_model/dense_unlabeled \
  --job-dir=./tmp \
  --seq_len=$SEQ_LEN \
  --num_feat=$NUM_FEAT \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=$PREVIOUS_TRAIN_STEPS \
  --train_examples=$TRAIN_EXAMPLES \
  --start_delay_secs=$START_DELAY_SECS \
  --throttle_secs=$THROTTLE_SECS \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units=$ENC_DNN_HIDDEN_UNITS \
  --latent_vector_size=$LATENT_VECTOR_SIZE \
  --dec_dnn_hidden_units=$DEC_DNN_HIDDEN_UNITS \
  --time_loss_weight=$TIME_LOSS_WEIGHT \
  --feat_loss_weight=$FEAT_LOSS_WEIGHT \
  --training_mode=$TRAINING_MODE \
  --labeled_tune_thresh=$LABELED_TUNE_THRESH \
  --time_thresh_scl=$TIME_THRESH_SCL \
  --feat_thresh_scl=$FEAT_THRESH_SCL

### LSTM Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/lstm_unlabeled
JOBNAME=job_anomaly_detection_tune_anomaly_thresholds_lstm_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=$TRAIN_FILE_PATTERN \
  --eval_file_pattern=$EVAL_FILE_PATTERN \
  --output_dir=$PWD/trained_model/lstm_unlabeled \
  --job-dir=./tmp \
  --seq_len=$SEQ_LEN \
  --num_feat=$NUM_FEAT \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=$PREVIOUS_TRAIN_STEPS \
  --train_examples=$TRAIN_EXAMPLES \
  --start_delay_secs=$START_DELAY_SECS \
  --throttle_secs=$THROTTLE_SECS \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=$REVERSE_LABELS_SEQUENCE \
  --enc_lstm_hidden_units=$ENC_LSTM_HIDDEN_UNITS \
  --dec_lstm_hidden_units=$DEC_LSTM_HIDDEN_UNITS \
  --lstm_dropout_output_keep_probs=$LSTM_DROPOUT_OUTPUT_KEEP_PROBS \
  --dnn_hidden_units=$DNN_HIDDEN_UNITS \
  --training_mode=$TRAINING_MODE \
  --labeled_tune_thresh=$LABELED_TUNE_THRESH \
  --time_thresh_scl=$TIME_THRESH_SCL \
  --feat_thresh_scl=$FEAT_THRESH_SCL

### PCA Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/pca_unlabeled
JOBNAME=job_anomaly_detection_tune_anomaly_thresholds_pca_unlabeled_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=$TRAIN_FILE_PATTERN \
  --eval_file_pattern=$EVAL_FILE_PATTERN \
  --output_dir=$PWD/trained_model/pca_unlabeled \
  --job-dir=./tmp \
  --seq_len=$SEQ_LEN \
  --num_feat=$NUM_FEAT \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --previous_train_steps=2400 \
  --train_examples=$TRAIN_EXAMPLES \
  --start_delay_secs=$START_DELAY_SECS \
  --throttle_secs=$THROTTLE_SECS \
  --model_type="pca" \
  --training_mode=$TRAINING_MODE \
  --labeled_tune_thresh=$LABELED_TUNE_THRESH \
  --time_thresh_scl=$TIME_THRESH_SCL \
  --feat_thresh_scl=$FEAT_THRESH_SCL

## Deploy

### Dense Autoencoder

In [None]:
%%bash
MODEL_NAME="anomaly_detection_dense_unlabeled"
MODEL_VERSION="v1"
MODEL_LOCATION=$(gsutil ls gs://$BUCKET/anomaly_detection/trained_model/dense_unlabeled/export/exporter/ | tail -1)
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
#gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
#gcloud ml-engine models delete ${MODEL_NAME}
gcloud ml-engine models create $MODEL_NAME --regions $REGION
gcloud ml-engine versions create $MODEL_VERSION --model $MODEL_NAME --origin $MODEL_LOCATION --runtime-version 1.13

### LSTM Autoencoder

In [None]:
%%bash
MODEL_NAME="anomaly_detection_lstm_unlabeled"
MODEL_VERSION="v1"
MODEL_LOCATION=$(gsutil ls gs://$BUCKET/anomaly_detection/trained_model/lstm_unlabeled/export/exporter/ | tail -1)
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
#gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
#gcloud ml-engine models delete ${MODEL_NAME}
gcloud ml-engine models create $MODEL_NAME --regions $REGION
gcloud ml-engine versions create $MODEL_VERSION --model $MODEL_NAME --origin $MODEL_LOCATION --runtime-version 1.13

### PCA Autoencoder

In [None]:
%%bash
MODEL_NAME="anomaly_detection_pca_unlabeled"
MODEL_VERSION="v1"
MODEL_LOCATION=$(gsutil ls gs://$BUCKET/anomaly_detection/trained_model/pca_unlabeled/export/exporter/ | tail -1)
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
#gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
#gcloud ml-engine models delete ${MODEL_NAME}
gcloud ml-engine models create $MODEL_NAME --regions $REGION
gcloud ml-engine versions create $MODEL_VERSION --model $MODEL_NAME --origin $MODEL_LOCATION --runtime-version 1.13

## Prediction

In [None]:
UNLABELED_CSV_COLUMNS = ["tag_{0}".format(tag) for tag in range(0, 5)]

In [None]:
import numpy as np
unlabeled_test_mixed_sequences_array = np.loadtxt(
    fname="data/unlabeled_test_mixed_seq.csv", dtype=str, delimiter=",")
print("unlabeled_test_mixed_sequences_array.shape = {}".format(
    unlabeled_test_mixed_sequences_array.shape))

In [None]:
number_of_prediction_instances = 10
print("labels = {}".format(
  labeled_test_mixed_sequences_array[0:number_of_prediction_instances, -1]))

### GCloud ML-Engine prediction from deployed model

In [None]:
test_data_normal_string_list = unlabeled_test_mixed_sequences_array.tolist()[0:number_of_prediction_instances]

In [None]:
# Format dataframe to instances list to get sent to ML-Engine
instances = [{UNLABELED_CSV_COLUMNS[i]: example[i]
              for i in range(len(UNLABELED_CSV_COLUMNS))} 
             for example in unlabeled_test_mixed_sequences_array.tolist()[0:number_of_prediction_instances]]

In [None]:
instances

### Dense Autoencoder

In [None]:
# Send instance dictionary to receive response from ML-Engine for online prediction
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

credentials = GoogleCredentials.get_application_default()
api = discovery.build("ml", "v1", credentials = credentials)

request_data = {"instances": instances}

parent = "projects/%s/models/%s/versions/%s" % (PROJECT, "anomaly_detection_dense_unlabeled", "v1")
response = api.projects().predict(body = request_data, name = parent).execute()
print("response = {}".format(response))

### LSTM Autoencoder

In [None]:
# Send instance dictionary to receive response from ML-Engine for online prediction
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

credentials = GoogleCredentials.get_application_default()
api = discovery.build("ml", "v1", credentials = credentials)

request_data = {"instances": instances}

parent = "projects/%s/models/%s/versions/%s" % (PROJECT, "anomaly_detection_lstm_unlabeled", "v1")
response = api.projects().predict(body = request_data, name = parent).execute()
print("response = {}".format(response))

### PCA Autoencoder

In [None]:
# Send instance dictionary to receive response from ML-Engine for online prediction
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

credentials = GoogleCredentials.get_application_default()
api = discovery.build("ml", "v1", credentials = credentials)

request_data = {"instances": instances}

parent = "projects/%s/models/%s/versions/%s" % (PROJECT, "anomaly_detection_pca_unlabeled", "v1")
response = api.projects().predict(body = request_data, name = parent).execute()
print("response = {}".format(response))