# Run model module on GCP

In [1]:
import os
PROJECT = "PROJECT" # REPLACE WITH YOUR PROJECT ID
BUCKET = "BUCKET" # REPLACE WITH A BUCKET NAME
REGION = "us-central1" # REPLACE WITH YOUR REGION e.g. us-central1

# Import os environment variables
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] =  BUCKET
os.environ["REGION"] = REGION
os.environ["TFVERSION"] = "1.13"

Copy data over to bucket

In [None]:
%%bash
gsutil -m cp -r data/* gs://$BUCKET/anomaly_detection/data

## Train reconstruction variables

### Dense Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/dense
JOBNAME=job_anomaly_detection_reconstruction_dense_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2000 \
  --learning_rate=0.1 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units="64 32 16" \
  --latent_vector_size=8 \
  --dec_dnn_hidden_units="16 32 64" \
  --time_loss_weight=1.0 \
  --feat_loss_weight=1.0 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

### LSTM Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/lstm
JOBNAME=job_anomaly_detection_reconstruction_lstm_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2000 \
  --learning_rate=0.1 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=True \
  --enc_lstm_hidden_units="64 32 16" \
  --dec_lstm_hidden_units="16 32 64" \
  --lstm_dropout_output_keep_probs="0.9 0.95 1.0" \
  --dnn_hidden_units="1024 256 64" \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

### PCA Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/pca
JOBNAME=job_anomaly_detection_reconstruction_pca_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2000 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --model_type="pca" \
  --k_principal_components=3 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

## Hyperparameter tuning of reconstruction hyperparameters

### Dense Autoencoder

In [None]:
%%writefile hyperparam_reconstruction_dense.yaml
trainingInput:
  scaleTier: STANDARD_1
  hyperparameters:
    hyperparameterMetricTag: rmse
    goal: MINIMIZE
    maxTrials: 30
    maxParallelTrials: 1
    params:
    - parameterName: enc_dnn_hidden_units
      type: CATEGORICAL
      categoricalValues: ["64 32 16", "256 128 16", "64 64 64"]
    - parameterName: latent_vector_size
      type: INTEGER
      minValue: 8
      maxValue: 16
      scaleType: UNIT_LINEAR_SCALE
    - parameterName: dec_dnn_hidden_units
      type: CATEGORICAL
      categoricalValues: ["16 32 64", "16 128 256", "64 64 64"]
    - parameterName: train_batch_size
      type: INTEGER
      minValue: 8
      maxValue: 512
      scaleType: UNIT_LOG_SCALE
    - parameterName: learning_rate
      type: DOUBLE
      minValue: 0.001
      maxValue: 0.1
      scaleType: UNIT_LINEAR_SCALE

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/hyperparam_reconstruction_dense
JOBNAME=job_anomaly_detection_hyperparam_reconstruction_dense_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --config=hyperparam_reconstruction_dense.yaml \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2000 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

### LSTM Autoencoder

In [None]:
%%writefile hyperparam_reconstruction_lstm.yaml
trainingInput:
  scaleTier: STANDARD_1
  hyperparameters:
    hyperparameterMetricTag: rmse
    goal: MINIMIZE
    maxTrials: 30
    maxParallelTrials: 1
    params:
    - parameterName: enc_lstm_hidden_units
      type: CATEGORICAL
      categoricalValues: ["64 32 16", "256 128 16", "64 64 64"]
    - parameterName: dec_lstm_hidden_units
      type: CATEGORICAL
      categoricalValues: ["16 32 64", "16 128 256", "64 64 64"]
    - parameterName: lstm_dropout_output_keep_probs
      type: CATEGORICAL
      categoricalValues: ["0.9 1.0 1.0", "0.95 0.95 1.0", "0.95 0.95 0.95"]
    - parameterName: dnn_hidden_units
      type: CATEGORICAL
      categoricalValues: ["256 128 64", "256 128 16", "64 64 64"]
    - parameterName: train_batch_size
      type: INTEGER
      minValue: 8
      maxValue: 512
      scaleType: UNIT_LOG_SCALE
    - parameterName: learning_rate
      type: DOUBLE
      minValue: 0.001
      maxValue: 0.1
      scaleType: UNIT_LINEAR_SCALE

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/hyperparam_reconstruction_lstm
JOBNAME=job_anomaly_detection_hyperparam_reconstruction_lstm_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --config=hyperparam_reconstruction_lstm.yaml \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2000 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

### PCA Autoencoder

In [None]:
%%writefile hyperparam_reconstruction_pca.yaml
trainingInput:
  scaleTier: STANDARD_1
  hyperparameters:
    hyperparameterMetricTag: rmse
    goal: MINIMIZE
    maxTrials: 30
    maxParallelTrials: 1
    params:
    - parameterName: k_principal_components
      type: INTEGER
      minValue: 2
      maxValue: 10
      scaleType: UNIT_LINEAR_SCALE

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/hyperparam_reconstruction_pca
JOBNAME=job_anomaly_detection_hyperparam_reconstruction_pca_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --config=hyperparam_reconstruction_pca.yaml \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/train_norm_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2000 \
  --start_delay_secs=60 \
  --throttle_secs=120 \
  --training_mode="reconstruction" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

## Train error distribution variables

### Dense Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/dense
JOBNAME=job_anomaly_detection_calculate_error_distribution_statistics_dense_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2200 \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units="64 32 16" \
  --latent_vector_size=8 \
  --dec_dnn_hidden_units="16 32 64" \
  --time_loss_weight=1.0 \
  --feat_loss_weight=1.0 \
  --training_mode="calculate_error_distribution_statistics" \
  --labeled_tune_thresh=True \
  --eps="1e-12" \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

### LSTM Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/lstm
JOBNAME=job_anomaly_detection_calculate_error_distribution_statistics_lstm_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2200 \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=True \
  --enc_lstm_hidden_units="64 32 16" \
  --dec_lstm_hidden_units="16 32 64" \
  --lstm_dropout_output_keep_probs="0.9 0.95 1.0" \
  --dnn_hidden_units="1024 256 64" \
  --training_mode="calculate_error_distribution_statistics" \
  --labeled_tune_thresh=True \
  --eps="1e-12" \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

### PCA Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/pca
JOBNAME=job_anomaly_detection_calculate_error_distribution_statistics_pca_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/val_norm_1_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2200 \
  --model_type="pca" \
  --k_principal_components=3 \
  --training_mode="calculate_error_distribution_statistics" \
  --labeled_tune_thresh=True \
  --eps="1e-12" \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300

## Tune anomaly thresholds

### Dense Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/dense
JOBNAME=job_anomaly_detection_tune_anomaly_thresholds_dense_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/labeled_val_mixed_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/labeled_val_mixed_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2400 \
  --model_type="dense_autoencoder" \
  --enc_dnn_hidden_units="64 32 16" \
  --latent_vector_size=8 \
  --dec_dnn_hidden_units="16 32 64" \
  --time_loss_weight=1.0 \
  --feat_loss_weight=1.0 \
  --training_mode="tune_anomaly_thresholds" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300 \
  --min_time_anom_thresh=1.0 \
  --max_time_anom_thresh=20.0 \
  --min_feat_anom_thresh=20.0 \
  --max_feat_anom_thresh=80.0 \
  --f_score_beta=0.05

### LSTM Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/lstm
JOBNAME=job_anomaly_detection_tune_anomaly_thresholds_lstm_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/labeled_val_mixed_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/labeled_val_mixed_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2400 \
  --model_type="lstm_enc_dec_autoencoder" \
  --reverse_labels_sequence=True \
  --enc_lstm_hidden_units="64 32 16" \
  --dec_lstm_hidden_units="16 32 64" \
  --lstm_dropout_output_keep_probs="0.9 0.95 1.0" \
  --dnn_hidden_units="1024 256 64" \
  --training_mode="tune_anomaly_thresholds" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300 \
  --min_time_anom_thresh=1.0 \
  --max_time_anom_thresh=20.0 \
  --min_feat_anom_thresh=20.0 \
  --max_feat_anom_thresh=80.0 \
  --f_score_beta=0.05

### PCA Autoencoder

In [None]:
%%bash
OUTDIR=gs://$BUCKET/anomaly_detection/trained_model/pca
JOBNAME=job_anomaly_detection_tune_anomaly_thresholds_pca_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gcloud ml-engine jobs submit training $JOBNAME \
  --region=$REGION \
  --module-name=trainer.task \
  --package-path=$PWD/anomaly_detection_module/trainer \
  --job-dir=$OUTDIR \
  --staging-bucket=gs://$BUCKET \
  --scale-tier=STANDARD_1 \
  --runtime-version=1.13 \
  -- \
  --train_file_pattern=gs://$BUCKET/anomaly_detection/data/labeled_val_mixed_seq.csv \
  --eval_file_pattern=gs://$BUCKET/anomaly_detection/data/labeled_val_mixed_seq.csv \
  --output_dir=$OUTDIR \
  --job-dir=$OUTDIR \
  --seq_len=30 \
  --train_batch_size=32 \
  --eval_batch_size=32 \
  --train_steps=2400 \
  --model_type="pca" \
  --k_principal_components=3 \
  --training_mode="tune_anomaly_thresholds" \
  --labeled_tune_thresh=True \
  --num_time_anom_thresh=300 \
  --num_feat_anom_thresh=300 \
  --min_time_anom_thresh=1.0 \
  --max_time_anom_thresh=20.0 \
  --min_feat_anom_thresh=20.0 \
  --max_feat_anom_thresh=80.0 \
  --f_score_beta=0.05

## Deploy

### Dense Autoencoder

In [None]:
%%bash
MODEL_NAME="anomaly_detection_dense"
MODEL_VERSION="v1"
MODEL_LOCATION=$(gsutil ls gs://$BUCKET/anomaly_detection/trained_model/dense/export/exporter/ | tail -1)
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
#gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
#gcloud ml-engine models delete ${MODEL_NAME}
gcloud ml-engine models create $MODEL_NAME --regions $REGION
gcloud ml-engine versions create $MODEL_VERSION --model $MODEL_NAME --origin $MODEL_LOCATION --runtime-version 1.13

### LSTM Autoencoder

In [None]:
%%bash
MODEL_NAME="anomaly_detection_lstm"
MODEL_VERSION="v1"
MODEL_LOCATION=$(gsutil ls gs://$BUCKET/anomaly_detection/trained_model/lstm/export/exporter/ | tail -1)
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
#gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
#gcloud ml-engine models delete ${MODEL_NAME}
gcloud ml-engine models create $MODEL_NAME --regions $REGION
gcloud ml-engine versions create $MODEL_VERSION --model $MODEL_NAME --origin $MODEL_LOCATION --runtime-version 1.13

### PCA Autoencoder

In [None]:
%%bash
MODEL_NAME="anomaly_detection_pca"
MODEL_VERSION="v1"
MODEL_LOCATION=$(gsutil ls gs://$BUCKET/anomaly_detection/trained_model/pca/export/exporter/ | tail -1)
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
#gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
#gcloud ml-engine models delete ${MODEL_NAME}
gcloud ml-engine models create $MODEL_NAME --regions $REGION
gcloud ml-engine versions create $MODEL_VERSION --model $MODEL_NAME --origin $MODEL_LOCATION --runtime-version 1.13

## Prediction

In [14]:
UNLABELED_CSV_COLUMNS = ["tag_{0}".format(tag) for tag in range(0, 5)]

In [15]:
import numpy as np
labeled_test_mixed_sequences_array = np.loadtxt(
    fname="data/labeled_test_mixed_seq.csv", dtype=str, delimiter=",")
print("labeled_test_mixed_sequences_array.shape = {}".format(
    labeled_test_mixed_sequences_array.shape))

labeled_test_mixed_sequences_array.shape = (12800, 6)


In [16]:
number_of_prediction_instances = 10
print("labels = {}".format(
  labeled_test_mixed_sequences_array[0:number_of_prediction_instances, -1]))

labels = ['1' '1' '1' '0' '0' '0' '1' '1' '1' '1']


### GCloud ML-Engine prediction from deployed model

In [17]:
test_data_normal_string_list = labeled_test_mixed_sequences_array.tolist()[0:number_of_prediction_instances]

In [18]:
# Format dataframe to instances list to get sent to ML-Engine
instances = [{UNLABELED_CSV_COLUMNS[i]: example[i]
              for i in range(len(UNLABELED_CSV_COLUMNS))} 
             for example in labeled_test_mixed_sequences_array.tolist()[0:number_of_prediction_instances]]

In [19]:
instances

[{'tag_0': '0.69531315;1.13063381;1.46212831;0.72515986;-0.72285522;-0.48608379;0.66008214;1.39027465;0.84784508;0.15064029;-0.86813038;-0.19376341;1.33326618;1.22342092;0.73982218;-0.2687485;0.09466024;0.62380527;1.9415848;1.20261854;0.2626776;10.47418518;-0.73694674;8.68714408;-17.87462629;-8.19666606;-6.97080639;16.66439155;9.85067447;12.07285213',
  'tag_1': '0.46246721;1.98239347;-0.1224569;-0.38657805;1.81182516;1.03408165;-1.48166642;1.27670001;1.91766873;-0.58962765;-0.08759287;1.97567123;-0.44497447;-1.07307304;1.57404985;1.01763811;-0.83888524;1.25820476;2.03154979;-1.02087098;-0.16190532;-22.80037943;4.01040199;-10.56761472;21.24277391;-28.76669081;-9.91877985;8.1263494;17.63656283;6.75155559',
  'tag_2': '0.83586452;1.42071377;0.27984876;0.14046254;1.84246963;0.843375;-1.07942606;0.4688217;1.26009013;-0.37743756;0.0258541;1.16555777;0.84946758;-0.72737033;0.74098267;1.80826962;0.06362211;-0.43868715;1.2166949;0.28497154;-0.26348582;-13.79850614;11.03782568;-6.36478403;-13.4

### Dense Autoencoder

In [20]:
# Send instance dictionary to receive response from ML-Engine for online prediction
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

credentials = GoogleCredentials.get_application_default()
api = discovery.build("ml", "v1", credentials = credentials)

request_data = {"instances": instances}

parent = "projects/%s/models/%s/versions/%s" % (PROJECT, "anomaly_detection_dense", "v1")
response = api.projects().predict(body = request_data, name = parent).execute()
print("response = {}".format(response))

response = {'predictions': [{'X_feat_abs_recon_err': [[0.69531315, 0.46246721, 0.83586452, 0.74390452, 0.301915072], [1.13063381, 1.98239347, 1.42071377, 1.92560347, 2.31332898], [1.46212831, 0.1224569, 0.27984876, 0.22207084, 0.545217009], [0.72515986, 0.38657805, 0.14046254, 0.95415575, 0.539524824], [2.3481991343731186, 0.1864812456268814, 0.21712571562688154, 0.5692517843731186, 0.17331221562688137], [0.48608379, 1.03408165, 0.843375, 2.57393237, 0.000359134973], [0.66008214, 1.48166642, 1.07942606, 0.18518758, 0.680317702], [1.39027465, 1.27670001, 0.4688217, 1.02718177, 1.86590218], [0.84784508, 1.91766873, 1.26009013, 1.57433792, 1.28621946], [0.15064029, 0.58962765, 0.37743756, 2.0535059, 1.32070497], [0.86813038, 0.08759287, 0.0258541, 0.47719256, 0.815005512], [2.8716783672970645, 0.7022437272970647, 1.5123571872970647, 3.5248392472970647, 0.9968171172970646], [1.33326618, 0.44497447, 0.84946758, 1.92646145, 0.631576853], [1.22342092, 1.07307304, 0.72737033, 1.70267046, 0.040

### LSTM Autoencoder

In [21]:
# Send instance dictionary to receive response from ML-Engine for online prediction
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

credentials = GoogleCredentials.get_application_default()
api = discovery.build("ml", "v1", credentials = credentials)

request_data = {"instances": instances}

parent = "projects/%s/models/%s/versions/%s" % (PROJECT, "anomaly_detection_lstm", "v1")
response = api.projects().predict(body = request_data, name = parent).execute()
print("response = {}".format(response))

response = {'predictions': [{'X_feat_abs_recon_err': [[0.16616036403680623, 0.058959444183014686, 0.2985932999340962, 0.21003013500334522, 0.184673088027292], [0.6014810240368061, 1.4609668158169855, 0.8834425499340963, 1.3917290850033452, 1.826740819972708], [0.9329755240368062, 0.6438835541830147, 0.2574224600659038, 0.31180354499665475, 1.031805169027292], [0.19600707403680617, 0.9080047041830147, 0.39680868006590375, 1.4880301349966547, 1.026112984027292], [1.252008005963194, 1.2903985058169853, 1.3051984099340963, 0.5222177450033452, 1.3120679699727078], [1.0152365759631938, 0.5126549958169854, 0.3061037799340962, 2.0400579850033456, 0.486947295000292], [0.13092935403680617, 2.0030930741830146, 1.6166972800659039, 0.7190619649966548, 1.166905862027292], [0.8611218640368062, 0.7552733558169854, 0.06844952006590377, 1.5610561549966548, 1.3793140199727079], [0.3186922940368062, 1.3962420758169851, 0.7228189099340963, 1.0404635350033453, 0.7996312999727081], [0.3785124959631938, 1.111

### PCA Autoencoder

In [22]:
# Send instance dictionary to receive response from ML-Engine for online prediction
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

credentials = GoogleCredentials.get_application_default()
api = discovery.build("ml", "v1", credentials = credentials)

request_data = {"instances": instances}

parent = "projects/%s/models/%s/versions/%s" % (PROJECT, "anomaly_detection_pca", "v1")
response = api.projects().predict(body = request_data, name = parent).execute()
print("response = {}".format(response))

response = {'predictions': [{'X_feat_abs_recon_err': [[0.19490119690290958, 0.0368342804848188, 0.33572427496585394, 0.2414722933474028, 0.20091689404983057], [0.9052575055950052, 0.7639029706465598, 2.562809087836545, 0.17127307173152262, 0.09349729466177237], [1.4645049962584662, 0.3414034844009865, 3.6169931777696434, 0.7504305490834053, 2.8761596795440805], [1.443058894629989, 0.5005681667860475, 2.4627171462944384, 0.9145501858153062, 1.4193612962891813], [2.104227205374042, 0.8138247659460361, 5.942700871771371, 1.1456673530093142, 4.568524269766747], [1.6899390664281113, 0.3228649805144591, 2.728895469919194, 1.6338835134386207, 3.117654535555128], [1.2005714070362983, 1.4412347672173662, 4.438785495355476, 1.2848457915729725, 3.906919266925668], [1.0881343353649022, 0.3299784758151567, 2.0282934353797777, 2.507356286635048, 2.6810180104526897], [0.6045858304439521, 1.0778010747576667, 0.18569187744468574, 1.0339918419421743, 1.6511927212126905], [0.21931364649001983, 0.96191444