## Train Eval and Deploy Credit Card Fraud Detection Model to Cloud ML

Import libraries

In [5]:
# import required libraries
from google.datalab.ml import TensorBoard

import os

  from ._conv import register_converters as _register_converters


Set Environment Variables

In [6]:
# set google cloud project, bucket and region infromation
PROJECT = 'qwiklabs-gcp-33cc08e2c9cb0695' # REPLACE WITH YOUR PROJECT ID
BUCKET = 'qwiklabs-gcp-33cc08e2c9cb0695-projects' # REPLACE WITH YOUR BUCKET NAME
REGION = 'europe-west1' # REPLACE WITH YOUR BUCKET REGION e.g. us-central1

In [2]:
# Python variables
# Model Info
MODEL_NAME = 'credit_card_fraud_detection'
# Model Version
MODEL_VERSION = 'v1'
# Training Directory name
OUTPUT_DIR = 'trained_model'

In [7]:
# For Bash Code
os.environ['PROJECT'] = PROJECT
os.environ['BUCKET'] = BUCKET
os.environ['REGION'] = REGION
os.environ['MODEL_NAME'] = MODEL_NAME
os.environ['MODEL_VERSION'] = MODEL_VERSION
os.environ['OUTPUT_DIR'] = OUTPUT_DIR 
os.environ['TFVERSION'] = '1.8'  # Tensorflow version

Configure gcloud

In [8]:
%%bash
gcloud config set project $PROJECT
gcloud config set compute/region $REGION

Updated property [core/project].
Updated property [compute/region].


Create google cloud storage bucket

In [None]:
%%bash
# The bucket needs to exist for the gsutil commands in next cell to work
gsutil mb -p ${PROJECT} gs://${BUCKET}

### Explore the code

In [5]:
%%bash
find ${MODEL_NAME}

credit-card-fraud-detection
credit-card-fraud-detection/images
credit-card-fraud-detection/images/accuracy.PNG
credit-card-fraud-detection/images/dnn.PNG
credit-card-fraud-detection/images/auc_precision.PNG
credit-card-fraud-detection/images/auc.PNG
credit-card-fraud-detection/images/recall.png
credit-card-fraud-detection/images/average_loss.PNG
credit-card-fraud-detection/images/loss.PNG
credit-card-fraud-detection/images/graph.png
credit-card-fraud-detection/images/precision.PNG
credit-card-fraud-detection/.ipynb_checkpoints
credit-card-fraud-detection/setup.py
credit-card-fraud-detection/trainer
credit-card-fraud-detection/trainer/__init__.py
credit-card-fraud-detection/trainer/task.py
credit-card-fraud-detection/trainer/__pycache__
credit-card-fraud-detection/trainer/__pycache__/__init__.cpython-35.pyc
credit-card-fraud-detection/trainer/__pycache__/task.cpython-35.pyc
credit-card-fraud-detection/trainer/model.py
credit-card-fraud-detection/README.md
credit-card-fraud-detection/req

In [6]:
%%bash
cat ${MODEL_NAME}/trainer/model.py

#!/usr/bin/env python

# Copyright 2018 Atos. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Credit Card Fraud Detection using DNNClassifier estimator
"""

# import required libraries
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

# set global configuration
tf.logging.set_verbosity(tf.logging.INFO)

# set global variables
CSV_COLUMNS = ["Time","V1","V2","V3","V4","V5","V6","V7","V9","

In [8]:
%%bash
cat ${MODEL_NAME}/trainer/task.py

#!/usr/bin/env python

# Copyright 2018 Atos. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Task implementation code to run on the Cloud ML Service.
"""

import traceback
import argparse
import json
import os

import model

import tensorflow as tf

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    # Input Arguments
    parser.add_argument(
        '--train_data_paths',
        help = 'GCS or local path to training data',
        required = True
    )

### Find absolute paths to your data

In [7]:
%%bash
echo "Working Directory: ${PWD}"

Working Directory: /content/datalab/CreditCardFraud


In [8]:
%%bash
echo "Head of creditcard_train.csv"
head -1 $PWD/data/preprocess/creditcard_train.csv

Head of creditcard_train.csv
Time,V1,V2,V3,V4,V5,V6,V7,V9,V10,V11,V12,V14,V16,V17,V18,V19,V21,Amount,Class


In [9]:
%%bash
echo "Head of creditcard_test.csv"
head -1 $PWD/data/preprocess/creditcard_test.csv

Head of creditcard_test.csv
Time,V1,V2,V3,V4,V5,V6,V7,V9,V10,V11,V12,V14,V16,V17,V18,V19,V21,Amount,Class


### Running the Python module from the command-line

#### Clean model training dir/output dir

In [38]:
%%bash
# This is so that the trained model is started fresh each time. However, this needs to be done before 
# tensorboard is started
rm -rf $PWD/${OUTPUT_DIR}

#### Monitor using Tensorboard

In [None]:
TensorBoard().start('./'+ TRAINING_DIR)

In [39]:
%%bash
# Setup python so it sees the task module which controls the model.py
export PYTHONPATH=${PYTHONPATH}:${PWD}/${MODEL_NAME}
# Currently set for python 2.  To run with python 3 
#    1.  Replace 'python' with 'python3' in the following command
#    2.  Edit trainer/task.py to reflect proper module import method 
python -m trainer.task \
   --train_data_paths="${PWD}/data/preprocess/creditcard_train*" \
   --eval_data_paths="${PWD}/data/preprocess/creditcard_test*" \
   --output_dir=${PWD}/${OUTPUT_DIR} \
   --train_steps=10000 --job-dir=./tmp

  from ._conv import register_converters as _register_converters
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd3a1aa7cd0>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '/content/datalab/CreditCardFraud/trained_model/', '_global_id_in_cluster': 0, '_save_summary_steps': 100}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 60 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Calling model_fn

In [40]:
%%bash
ls $PWD/${OUTPUT_DIR}/export/exporter/

1542288505
1542288577
1542288649
1542288721
1542288775


In [36]:
%%writefile ./test_fraud.json
{"Time":472,"V1":-3.043541,"V2":-3.157307,"V3":1.088463,"V4":2.288644,"V5":1.359805,"V6":-1.064823,"V7":0.325574,"V9":-0.270953,"V10":-0.838587,"V11":-0.414575,"V12":-0.503141,"V14":-1.692029,"V16":0.66678,"V17":0.599717,"V18":1.725321,"V19":0.283345,"V21":0.661696,"Amount":529}

Writing ./test_fraud.json


In [41]:
%%bash
# This model dir is the model exported after training and is used for prediction
#
model_dir=$(ls ${PWD}/${OUTPUT_DIR}/export/exporter | tail -1)
# predict using the trained model
gcloud ml-engine local predict  \
    --model-dir=${PWD}/${OUTPUT_DIR}/export/exporter/${model_dir} \
    --json-instances=./test_fraud.json

CLASS_IDS  CLASSES  LOGISTIC  LOGITS                PROBABILITIES
[0]        [u'0']   [0.0]     [-737.0037231445312]  [1.0, 0.0]


  from ._conv import register_converters as _register_converters



#### Stop Tensorflow board

In [45]:
pids_df = TensorBoard.list()
if not pids_df.empty:
    for pid in pids_df['pid']:
        TensorBoard().stop(pid)
        print('Stopped TensorBoard with pid {}'.format(pid))

Stopped TensorBoard with pid 10030


#### Clean model training dir/output dir

In [None]:
%%bash
# This is so that the trained model is started fresh each time. However, this needs to be done before 
# tensorboard is started
rm -rf $PWD/${OUTPUT_DIR}

### Running locally using gcloud

In [None]:
%%bash
# Use Cloud Machine Learning Engine to train the model in local file system
gcloud ml-engine local train \
   --module-name=trainer.task \
   --package-path=${PWD}/${MODEL_NAME}/trainer \
   -- \
   --train_data_paths="${PWD}/data/preprocess/creditcard_train*" \
   --eval_data_paths="${PWD}/data/preprocess/creditcard_test*" \
   --train_steps=1000 \
   --output_dir=${PWD}/${OUTPUT_DIR} 

### Submit training job using gcloud

#### Copy training data to google cloud storage

In [19]:
%%bash
# Clear Cloud Storage bucket and copy the CSV files to Cloud Storage bucket
echo $BUCKET
gsutil -m rm -rf gs://${BUCKET}/${MODEL_NAME}/data/

qwiklabs-gcp-33cc08e2c9cb0695-projects


Removing gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/data/creditcard_header_test.csv#1542290265744706...
Removing gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/data/creditcard_header_train.csv#1542290267318670...
Removing gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/data/creditcard_test.csv#1542290265983312...
Removing gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/data/creditcard_train.csv#1542290267248669...
/ [1/4 objects]  25% Done                                                       / [2/4 objects]  50% Done                                                       / [3/4 objects]  75% Done                                                       / [4/4 objects] 100% Done                                                       
Operation completed over 4 objects.                                              


In [20]:
%%bash
# Clear Cloud Storage bucket and copy the CSV files to Cloud Storage bucket
echo $BUCKET
gsutil -m cp ${PWD}/data/preprocess/*.csv gs://${BUCKET}/${MODEL_NAME}/data/

qwiklabs-gcp-33cc08e2c9cb0695-projects


Copying file:///content/datalab/CreditCardFraud/data/preprocess/creditcard_header_test.csv [Content-Type=text/csv]...
Copying file:///content/datalab/CreditCardFraud/data/preprocess/creditcard_header_train.csv [Content-Type=text/csv]...
/ [0/4 files][    0.0 B/205.0 MiB]   0% Done                                    / [0/4 files][    0.0 B/205.0 MiB]   0% Done                                    Copying file:///content/datalab/CreditCardFraud/data/preprocess/creditcard_test.csv [Content-Type=text/csv]...
Copying file:///content/datalab/CreditCardFraud/data/preprocess/creditcard_train.csv [Content-Type=text/csv]...
/ [0/4 files][    0.0 B/205.0 MiB]   0% Done                                    / [0/4 files][    0.0 B/205.0 MiB]   0% Done                                    -- [0/4 files][ 84.4 MiB/205.0 MiB]  41% Done                                    \\ [1/4 files][142.6 MiB/205.0 MiB]  69% Done                                    \ [2/4 files][146.7 MiB/205.0 MiB]  71% Done      

In [16]:
%%bash
JOBNAME=${MODEL_NAME}_$(date -u +%y%m%d_%H%M%S)
echo $JOBNAME

credit_card_fraud_detection_181115_135615


In [21]:
%%bash
OUTDIR=gs://${BUCKET}/${MODEL_NAME}/${OUTPUT_DIR}
JOBNAME=${MODEL_NAME}_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
# Clear the Cloud Storage Bucket used for the training job
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
   --region=$REGION \
   --module-name=trainer.task \
   --package-path=${PWD}/${MODEL_NAME}/trainer \
   --job-dir=$OUTDIR \
   --staging-bucket=gs://$BUCKET \
   --scale-tier=BASIC \
   --runtime-version=$TFVERSION \
   -- \
   --train_data_paths="gs://${BUCKET}/${MODEL_NAME}/data/creditcard_train*" \
   --eval_data_paths="gs://${BUCKET}/${MODEL_NAME}/data/creditcard_test*"  \
   --output_dir=$OUTDIR \
   --train_steps=10000

gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/trained_model europe-west1 credit_card_fraud_detection_181115_140341
jobId: credit_card_fraud_detection_181115_140341
state: QUEUED


Removing gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/trained_model/#1542290302514641...
/ [1/1 objects] 100% Done                                                       
Operation completed over 1 objects.                                              
Job [credit_card_fraud_detection_181115_140341] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ml-engine jobs describe credit_card_fraud_detection_181115_140341

or continue streaming the logs with the command

  $ gcloud ml-engine jobs stream-logs credit_card_fraud_detection_181115_140341


### Deploy Model

#### Explore trained model

In [22]:
%%bash
gsutil ls gs://${BUCKET}/${MODEL_NAME}/${OUTPUT_DIR}/export/exporter

gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/trained_model/export/exporter/
gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/trained_model/export/exporter/1542290769/
gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/trained_model/export/exporter/1542291363/
gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/trained_model/export/exporter/1542291963/
gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/trained_model/export/exporter/1542292363/


#### Deploy model : step 1 - remove version info 
Before an existing cloud model can be removed, it must have any version info removed.  If an existing model does not exist, this command will generate an error but that is ok.

In [None]:
%%bash
MODEL_LOCATION=$(gsutil ls gs://${BUCKET}/${MODEL_NAME}/${OUTPUT_DIR}/export/exporter | tail -1)

echo "MODEL_LOCATION = ${MODEL_LOCATION}"

gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME}

#### Deploy model: step 2 - remove existing model
Now that the version info is removed from an existing model, the actual model can be removed.  If an existing model is not deployed, this command will generate an error but that is ok.  It just means the model with the given name is not deployed.

%%bash
gcloud ml-engine models delete ${MODEL_NAME}

#### Deploy new model

In [23]:
%%bash
gcloud ml-engine models create ${MODEL_NAME} --regions $REGION

Created ml engine model [projects/qwiklabs-gcp-33cc08e2c9cb0695/models/credit_card_fraud_detection].


#### Deploy model: step 4 - add version info to the new model

In [24]:
%%bash
MODEL_LOCATION=$(gsutil ls gs://${BUCKET}/${MODEL_NAME}/${OUTPUT_DIR}/export/exporter | tail -1)

echo "MODEL_LOCATION = ${MODEL_LOCATION}"

gcloud ml-engine versions create ${MODEL_VERSION} --model ${MODEL_NAME} --origin ${MODEL_LOCATION} --runtime-version $TFVERSION

MODEL_LOCATION = gs://qwiklabs-gcp-33cc08e2c9cb0695-projects/credit_card_fraud_detection/trained_model/export/exporter/1542292363/


Creating version (this might take a few minutes)......
...................................................................................................done.


### Prediction

#### Using gcloud cli

In [25]:
%%bash
gcloud ml-engine predict --model=${MODEL_NAME} --version=${MODEL_VERSION} --json-instances=./test_fraud.json

CLASS_IDS  CLASSES  LOGISTIC  LOGITS             PROBABILITIES
[0]        [u'0']   [0.0]     [-1186.728515625]  [1.0, 0.0]


#### Using google api client

In [26]:
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

credentials = GoogleCredentials.get_application_default()
api = discovery.build('ml', 'v1', credentials=credentials,
            discoveryServiceUrl='https://storage.googleapis.com/cloud-ml/discovery/ml_v1_discovery.json')

request_data = {'instances':
  [
    {
      'Time': 472.0,
      'V1': -3.043541,
      'V2': -3.157307,
      'V3': 1.088463,
      'V4': 2.288644, 
      'V5': 1.359805,
      'V6': -1.064823,
      'V7': 0.325574,
      'V9': -0.270953,
      'V10': -0.838587,
      'V11': -0.414575,
      'V12': -0.503141,
      'V14': -1.692029,
      'V16': 0.666780,
      'V17': 0.599717,
      'V18': 1.725321,
      'V19': 0.283345,
      'V21': 0.661696, 
      'Amount':529.00
    }
  ]
}

parent = 'projects/%s/models/%s/versions/%s' % (PROJECT, MODEL_NAME, MODEL_VERSION)
response = api.projects().predict(body=request_data, name=parent).execute()
print "response={0}".format(response)

response={u'predictions': [{u'probabilities': [1.0, 0.0], u'class_ids': [0], u'classes': [u'0'], u'logits': [-1186.728515625], u'logistic': [0.0]}]}


### Cleanup Cloud ML

#### Delete model verson info

In [None]:
%%bash
gcloud ml-engine versions delete ${MODEL_VERSION} --model ${MODEL_NAME}

#### Delete model

In [None]:
%%bash
gcloud ml-engine models delete ${MODEL_NAME}

<pre>
# Copyright 2018 Atos. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
</pre>