# Sample Cloud AI Platform Notebook: Predicting Visitor Behaviour
Predict if a visitor will add items to the cart using their browsing session data

# Import

In [None]:
import os
import time
from datetime import datetime

import pandas as pd
import numpy as np

from tqdm import tqdm

import tensorflow as tf
from tensorflow import feature_column
import tensorflow_io as tfio

# Load and plot some data


Let's use the Google Cloud datawharehouse [BigQuery](https://cloud.google.com/bigquery/docs/introduction) to query the data.  
The BigQuery client library provides a cell magic ```%%bigquery``` which runs a SQL query and returns the results as a Pandas DataFrame.  
Use the cell magic to query a sample of data and save the results in the ```train_df``` DataFrame:

In [None]:
%%bigquery train_df
SELECT 
*
FROM
  `challenge.training_data`

Show the first few rows of the DataFrame:

In [None]:
train_df.head()

Unnamed: 0,JOBID,DAY,HOUR,VERSION,PERFORMANCE,PRECISION,MPLINK,NTNU,MPLINK_NTNU,MBS,...,NBNODES,NBELEM1D,NBELEM2D,NBELEM3D,CLUSTER,NBSERVERS,NBCORE,DATACHECK_TIME,ELAPSEDTIME,TZC_FINAL
0,972300,03/03/2020,17:50:30,2018.0.1,POLE,1,YES,NO,NO,NO,...,4507694,14570,4037272,1570802,HPC3,8.0,288,446.0,15000,0.0114
1,975299,03/04/2020,09:38:09,2018.0.1,POLE,1,YES,NO,NO,NO,...,4507694,14570,4037272,1570802,HPC3,8.0,288,446.0,15600,0.0114
2,993587,03/06/2020,10:53:17,2018.0.1,POLE,1,YES,NO,NO,NO,...,4507694,14570,4037272,1570802,HPC3,8.0,288,446.0,14600,0.0115
3,972300,03/03/2020,17:50:30,2018.0.1,POLE,1,YES,NO,NO,NO,...,4507694,14570,4037272,1570802,HPC3,8.0,288,446.0,15000,0.0114
4,975299,03/04/2020,09:38:09,2018.0.1,POLE,1,YES,NO,NO,NO,...,4507694,14570,4037272,1570802,HPC3,8.0,288,446.0,15600,0.0114


Show the DataFrame details:

In [None]:
df = train_df.drop(["JOBID","DAY","HOUR","TZC_FINAL"], axis=1)
df.head()

Unnamed: 0,VERSION,PERFORMANCE,PRECISION,MPLINK,NTNU,MPLINK_NTNU,MBS,RUNEND,TIMESTEP,NBNODES,NBELEM1D,NBELEM2D,NBELEM3D,CLUSTER,NBSERVERS,NBCORE,DATACHECK_TIME,ELAPSEDTIME
0,2018.0.1,POLE,1,YES,NO,NO,NO,110.01,0.0005,4507694,14570,4037272,1570802,HPC3,8.0,288,446.0,15000
1,2018.0.1,POLE,1,YES,NO,NO,NO,110.01,0.0005,4507694,14570,4037272,1570802,HPC3,8.0,288,446.0,15600
2,2018.0.1,POLE,1,YES,NO,NO,NO,110.01,0.0005,4507694,14570,4037272,1570802,HPC3,8.0,288,446.0,14600
3,2018.0.1,POLE,1,YES,NO,NO,NO,110.01,0.0005,4507694,14570,4037272,1570802,HPC3,8.0,288,446.0,15000
4,2018.0.1,POLE,1,YES,NO,NO,NO,110.01,0.0005,4507694,14570,4037272,1570802,HPC3,8.0,288,446.0,15600


In [None]:
print(df.columns)

Index(['VERSION', 'PERFORMANCE', 'PRECISION', 'MPLINK', 'NTNU', 'MPLINK_NTNU',
       'MBS', 'RUNEND', 'TIMESTEP', 'NBNODES', 'NBELEM1D', 'NBELEM2D',
       'NBELEM3D', 'CLUSTER', 'NBSERVERS', 'NBCORE', 'DATACHECK_TIME',
       'ELAPSEDTIME'],
      dtype='object')


printing catagorical and numerical features

In [None]:
numerical = list(df.describe().columns)
categorical = [col for col in df.columns if col not in numerical]

In [None]:
print(categorical)
print(numerical)

['VERSION', 'PERFORMANCE', 'MPLINK', 'NTNU', 'MPLINK_NTNU', 'MBS', 'CLUSTER']
['PRECISION', 'RUNEND', 'TIMESTEP', 'NBNODES', 'NBELEM1D', 'NBELEM2D', 'NBELEM3D', 'NBSERVERS', 'NBCORE', 'DATACHECK_TIME', 'ELAPSEDTIME']


Show unique values for each catagorical features

In [1]:
df[categorical].describe()
for x in categorical:
    print ({x: list(df[x].unique())})

NameError: ignored

# Local training

First, let's try to train the model locally (from within the notebook).

## Create a Google Cloud Storage bucket

We need a way to centrally store and share data across services.  
Let's use [Google Cloud Storage](https://cloud.google.com/storage) which is the blob storage service from Google Cloud.

```gsutil``` is a command line tool for Google Cloud Storage.  
In Google Cloud Storage, URI are in the format ```gs://bucket/folder/file``` .  
Use ```gsutil mb gs://YourBucketName``` to create a Google Cloud Storage bucket.

Your bucket name must be **globally** unique and must contain only lowercase letters, numbers, dashes, underscores, and dots.  
**!!! Change the bucket name below with your own!!!**

In [None]:
# Prepare Google Cloud Storage directory to save logs and model
BUCKET_NAME = 'ml-competition-001' # Create your own unique bucket name
!gsutil mb -l EU gs://{BUCKET_NAME}

Creating gs://ml-competition-001/...
ServiceException: 409 Bucket ml-competition-001 already exists.


Use ```gsutil ls gs://YourBucketName``` to list your bucket and make sure it has been correctly created.  
Expect no output if the bucket is correctly created.

In [None]:
!gsutil ls gs://{BUCKET_NAME}

gs://ml-competition-001/latest_model/
gs://ml-competition-001/training_job_2021_01_29_170545/
gs://ml-competition-001/training_job_2021_01_29_172946/
gs://ml-competition-001/training_job_2021_01_31_143525/
gs://ml-competition-001/training_job_2021_01_31_144654/
gs://ml-competition-001/training_job_2021_01_31_145423/
gs://ml-competition-001/training_job_2021_01_31_150103/


## Create your trainer package structure

Now let's create your trainer package structure.

In [None]:
# Create the Trainer package structure
!mkdir ./trainer
!touch ./trainer/__init__.py

mkdir: cannot create directory ‘./trainer’: File exists


**!!! Change the bucket name below with your own!!!**

In [None]:
%%writefile ./trainer/task.py

# Imports
import os
import pandas as pd
import tensorflow as tf
from google.cloud import bigquery
from sklearn.model_selection import train_test_split

# Declare constants
BUCKET_NAME = 'ml-competition-001'  # change to YOUR bucket name
JOB_DIR = f'gs://{BUCKET_NAME}/latest_model' 
CATEGORICAL_TYPES = {'VERSION': pd.api.types.CategoricalDtype(['2018.0.1', '2016.05', '2016.06', '2019.0.2', '2012.7', '2019.0', '2018.0', '2016.01', '2019.0.1', '2019']),
                     'PERFORMANCE': pd.api.types.CategoricalDtype(['POLE', 'PEDESTRIAN', 'UNKNOWN', 'ECE', 'FRONT', 'RCAR', 'COCKPIT', 'OVERSLAM', 'REAR', 'SIDE', 'WHEEL']),
                    'MPLINK': pd.api.types.CategoricalDtype(['YES', 'NO']),
                    'NTNU': pd.api.types.CategoricalDtype(['NO', 'YES']),
                    'MPLINK_NTNU': pd.api.types.CategoricalDtype(['NO', 'YES']),
                    'MBS': pd.api.types.CategoricalDtype(['NO', 'USED']),
                    'CLUSTER':pd.api.types.CategoricalDtype(['HPC3', 'HPC1', 'HPC2'])}
                                                              
TARGET_COLUMN = 'ELAPSEDTIME'
QUERY = '''SELECT 
       VERSION, PERFORMANCE, PRECISION, MPLINK, NTNU, MPLINK_NTNU,
       MBS, RUNEND, TIMESTEP, NBNODES, NBELEM1D, NBELEM2D,
       NBELEM3D, CLUSTER, NBSERVERS, NBCORE, DATACHECK_TIME,
       ELAPSEDTIME
FROM
  `demoairenault.challenge.training_data`'''

BATCH_SIZE = 128
NUM_EPOCHS = 5
LEARNING_RATE = 0.001

# Read the data from BigQuery
client = bigquery.Client(location='EU') 
query_job = client.query(QUERY)
data_df = query_job.to_dataframe()  # you can read from other sources to pandas DataFrame
print(f'First rows for the raw dataset: \n{data_df.head()}')

# Convert integer valued (numeric) columns to floating point
numeric_columns = data_df.select_dtypes(['int64']).columns
data_df[numeric_columns] = data_df[numeric_columns].astype('float32')

# Convert categorical columns to numeric
cat_columns = data_df.select_dtypes(['object']).columns
data_df[cat_columns] = data_df[cat_columns].astype('category')
data_df[cat_columns] = data_df[cat_columns].apply(lambda x: x.astype(
        CATEGORICAL_TYPES[x.name]))
data_df[cat_columns] = data_df[cat_columns].apply(lambda x: x.cat.codes)
print(f'First rows for the transformed dataset: \n{data_df.head()}')

# Train/Val split
train_df, val_df = train_test_split(data_df, train_size=0.8)
train_target = train_df.pop(TARGET_COLUMN)
val_target = val_df.pop(TARGET_COLUMN)
num_train_examples = len(train_df)
num_val_examples = len(val_df)

# *Possible improvements*: add standartization for numeric values to range [-1; 1], categories to one-hot encoded

# Creata tensorflow dataset object
dataset_train = (tf.data.Dataset
                 .from_tensor_slices((train_df.to_dict('list'), train_target))
                 .shuffle(buffer_size=BATCH_SIZE*4)
                 .repeat()
                 .batch(BATCH_SIZE))
                 
dataset_val = (tf.data.Dataset
                 .from_tensor_slices((val_df.to_dict('list'), val_target))
                 .repeat()
                 .batch(BATCH_SIZE))  # No shuffle

print(f'One batch of the train data:\n {next(iter(dataset_train))}')

# Prepare named inputs for our model
inputs = {key: tf.keras.layers.Input(shape=(), name=key) for key in train_df.keys()}
x = tf.stack(list(inputs.values()), axis=-1)

# Define model's architecture
x = tf.keras.layers.Dense(100, activation='relu')(x)
x = tf.keras.layers.Dense(50, activation='relu')(x)
x = tf.keras.layers.Dense(10, activation='relu')(x)
output = tf.keras.layers.Dense(1, activation='linear')(x)

# Build the model and compile it
model_func = tf.keras.Model(inputs=inputs, outputs=output)
model_func.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE),
                   loss='mean_absolute_error')

# Train the model
history = model_func.fit(dataset_train, 
                          epochs=NUM_EPOCHS, 
                          steps_per_epoch=int(num_train_examples/BATCH_SIZE), 
                          validation_data=dataset_val, 
                          validation_steps=int(num_train_examples/BATCH_SIZE), 
                          verbose=1)

model_func.save(f'{JOB_DIR}/export/')

Overwriting ./trainer/task.py


In [None]:
%%writefile ./setup.py
from setuptools import find_packages
from setuptools import setup

REQUIRED_PACKAGES = ['scikit-learn', 'pandas']

setup(
    name='trainer',
    version='0.1',
    install_requires=REQUIRED_PACKAGES,
    packages=find_packages(),
    include_package_data=True,
    description='My super training application package.'
)

Overwriting ./setup.py


## Run a local training task
This is a good test before your will try to train the model in the cloud

Many commands we are going to use accept a parameter for setting a region.  
A region is a group of Google Cloud data centers used to run computing tasks.  
To reduce latency, let's set a variable with a close by data center:

In [None]:
REGION = 'europe-west1'

For interacting with Cloud AI Platform we are going to use the [gcloud](https://cloud.google.com/sdk/gcloud) command line tool.  
Gcloud also provides [properties](https://cloud.google.com/sdk/docs/properties) used by other services.  
Let's set the __ml_engine/local_python__ property so AI Platform knows which Python version to use for local training:

In [None]:
# Explicitly tell `gcloud ai-platform local train` to use Python 3 
!gcloud config set ml_engine/local_python $(which python3)

Updated property [ml_engine/local_python].


For submitting a local training job to AI Platform you need to provide:
* A directory to store the model and logs: here we are going to use our Google Cloud Storage bucket we created earlier
* The path to your trainer package
* The name of your trainer module

In [None]:
# Define a timestamped job name
JOB_NAME = f"training_job_{datetime.now().strftime('%Y_%m_%d_%H%M%S')}"; print(JOB_NAME)
JOB_DIR = f'gs://{BUCKET_NAME}/{JOB_NAME}'; print(JOB_DIR)

training_job_2021_01_31_154624
gs://ml-competition-001/training_job_2021_01_31_154624


Let's run our local training job with the gcloud command ```gcloud ai-platform local train```.

In [None]:
# Run the localtraining job
! gcloud ai-platform local train \
  --job-dir $JOB_DIR \
  --package-path ./trainer \
  --module-name trainer.task 

First rows for the raw dataset: 
    VERSION PERFORMANCE  PRECISION  ... NBCORE DATACHECK_TIME ELAPSEDTIME
0  2018.0.1        POLE          1  ...    288          446.0       15000
1  2018.0.1        POLE          1  ...    288          446.0       15600
2  2018.0.1        POLE          1  ...    288          446.0       14600
3  2018.0.1        POLE          1  ...    288          446.0       15000
4  2018.0.1        POLE          1  ...    288          446.0       15600

[5 rows x 18 columns]
First rows for the transformed dataset: 
   VERSION  PERFORMANCE  PRECISION  ...  NBCORE  DATACHECK_TIME  ELAPSEDTIME
0        0            0        1.0  ...   288.0           446.0      15000.0
1        0            0        1.0  ...   288.0           446.0      15600.0
2        0            0        1.0  ...   288.0           446.0      14600.0
3        0            0        1.0  ...   288.0           446.0      15000.0
4        0            0        1.0  ...   288.0           446.0      15600

Your model has been saved to your Google Cloud Storage bucket.

In [None]:
!gsutil ls gs://{BUCKET_NAME}

gs://ml-competition-001/latest_model/
gs://ml-competition-001/training_job_2021_01_29_170545/
gs://ml-competition-001/training_job_2021_01_29_172946/
gs://ml-competition-001/training_job_2021_01_31_143525/
gs://ml-competition-001/training_job_2021_01_31_144654/
gs://ml-competition-001/training_job_2021_01_31_145423/
gs://ml-competition-001/training_job_2021_01_31_150103/


# AI Platform training

Train your model in the cloud.  
This can help when you'll need more compute power, run your training for a long periods of time or try several trainings in parallel with hyperparameters search.

When training through AI Platform you need a few more parameters:
* __Region__: the region used by AI Platform for training
* __Runtime version__: the AI Platform version you want to use
* __Python version__: the Python version used by your package
* __Scale tier__: define which compute power will be used (GPU, TPU, number of machines, ...), more details in [this documentation](https://cloud.google.com/ai-platform/training/docs/machine-types)

Let's submit a training job with ```gcloud ai-platform jobs submit training``` with a basic configuration (only 1 machine, no GPU, no TPU):

In [None]:
# Submit the training job
! gcloud ai-platform jobs submit training $JOB_NAME \
  --job-dir $JOB_DIR \
  --package-path ./trainer \
  --module-name trainer.task \
  --region $REGION \
  --runtime-version=2.1 \
  --python-version=3.7 \
  --scale-tier basic 

Job [training_job_2021_01_31_154624] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ai-platform jobs describe training_job_2021_01_31_154624

or continue streaming the logs with the command

  $ gcloud ai-platform jobs stream-logs training_job_2021_01_31_154624
jobId: training_job_2021_01_31_154624
state: QUEUED


The training job is a long running operation.  
You can use ```gcloud ai-platform jobs describe``` to get the status of the job:

In [None]:
! gcloud ai-platform jobs describe $JOB_NAME

createTime: '2021-01-31T15:47:53Z'
etag: 9waYwEI1I2c=
jobId: training_job_2021_01_31_154624
state: PREPARING
trainingInput:
  jobDir: gs://ml-competition-001/training_job_2021_01_31_154624
  packageUris:
  - gs://ml-competition-001/training_job_2021_01_31_154624/packages/195db6434c3413e086185d9d1aa0a1233a52b66988eedcd85c652d2fcc087a4e/trainer-0.1.tar.gz
  pythonModule: trainer.task
  pythonVersion: '3.7'
  region: europe-west1
  runtimeVersion: '2.1'
trainingOutput: {}

View job in the Cloud Console at:
https://console.cloud.google.com/mlengine/jobs/training_job_2021_01_31_154624?project=demoairenault

View logs at:
https://console.cloud.google.com/logs?resource=ml_job%2Fjob_id%2Ftraining_job_2021_01_31_154624&project=demoairenault


Your model has been saved to your Google Cloud Storage bucket.

In [None]:
!gsutil ls gs://{BUCKET_NAME}

gs://ml-competition-001/latest_model/
gs://ml-competition-001/training_job_2021_01_29_170545/
gs://ml-competition-001/training_job_2021_01_29_172946/
gs://ml-competition-001/training_job_2021_01_31_143525/
gs://ml-competition-001/training_job_2021_01_31_144654/
gs://ml-competition-001/training_job_2021_01_31_145423/
gs://ml-competition-001/training_job_2021_01_31_150103/
gs://ml-competition-001/training_job_2021_01_31_154624/


# AI Platform deployment

Now that you have trained your model, it's time to make it available for serving predictions.  
Google [AI Platform Prediction](https://cloud.google.com/ai-platform/prediction/docs) lets you do just that very easily.

Let's use ```gsutil ls``` to list your model's file from the Cloud Storage Bucket:

In [None]:
LATEST_MODEL_DIR = f'gs://{BUCKET_NAME}/latest_model/export' 
!gsutil ls -lh $LATEST_MODEL_DIR

       0 B  2021-01-29T17:18:29Z  gs://ml-competition-001/latest_model/export/
165.64 KiB  2021-01-31T15:47:42Z  gs://ml-competition-001/latest_model/export/saved_model.pb
                                 gs://ml-competition-001/latest_model/export/assets/
                                 gs://ml-competition-001/latest_model/export/variables/
TOTAL: 2 objects, 169614 bytes (165.64 KiB)


Set a **name** and a **version** for this model:

In [None]:
MODEL_NAME = 'kers' # Choose your own model name
MODEL_VERSION = 'v1' # Make sure to increase version when deploying a new version of the same model

Let's use ```gcloud ai-platform models create```to create a new model:

In [None]:
# create a model object at AI Platform first
! gcloud ai-platform models create $MODEL_NAME --region $REGION

Using endpoint [https://europe-west1-ml.googleapis.com/]
Created ml engine model [projects/demoairenault/models/kers].


Now that we have the model available in AI Platform, let's create the first version of this model.  
We need to point AI Platform to our model in Google Cloud Storage.

In [None]:
# Create model version based on that SavedModel directory
! gcloud ai-platform versions create $MODEL_VERSION --region $REGION --model $MODEL_NAME \
  --runtime-version 2.1 \
  --python-version 3.7 \
  --framework tensorflow \
  --origin $LATEST_MODEL_DIR 
  

Using endpoint [https://europe-west1-ml.googleapis.com/]
Creating version (this might take a few minutes)......done.                    


# Prediction with AI Platform (from a csv file)
At this point we have trained a model and made the model available for serving predictions thanks to AI Platform.  
Let's get some predictions from this model.

## Get the test data
Let's grab some fresh data to generate predictions on!

In [None]:
%%bigquery data_df
SELECT 
       JOBID, VERSION, PERFORMANCE, PRECISION, MPLINK, NTNU, MPLINK_NTNU,
       MBS, RUNEND, TIMESTEP, NBNODES, NBELEM1D, NBELEM2D,
       NBELEM3D, CLUSTER, NBSERVERS, NBCORE, DATACHECK_TIME,
       ELAPSEDTIME
FROM
  `demoairenault.challenge.training_data`
WHERE MOD(JOBID,4) = 2

In [None]:
data_df.head()

Unnamed: 0,JOBID,VERSION,PERFORMANCE,PRECISION,MPLINK,NTNU,MPLINK_NTNU,MBS,RUNEND,TIMESTEP,NBNODES,NBELEM1D,NBELEM2D,NBELEM3D,CLUSTER,NBSERVERS,NBCORE,DATACHECK_TIME,ELAPSEDTIME
0,974258,2018.0.1,PEDESTRIAN,1,NO,NO,NO,NO,40.68,0.0005,1647532,3760,1673587,103822,HPC3,1.0,36,74.7,4510
1,974258,2018.0.1,PEDESTRIAN,1,NO,NO,NO,NO,40.68,0.0005,1647532,3760,1673587,103822,HPC3,1.0,36,74.7,4510
2,598282,2016.05,UNKNOWN,1,NO,NO,NO,NO,30000.0,0.0005,504,0,410,0,HPC1,2.0,48,0.48,7340
3,624046,2016.05,UNKNOWN,1,NO,YES,NO,NO,20.0,0.0005,133650,44,127307,0,HPC1,2.0,48,5.86,1870
4,625322,2016.05,UNKNOWN,1,NO,YES,NO,NO,20.0,0.0005,129912,44,128857,0,HPC1,2.0,48,5.76,1930


In [None]:
# Need to avoid "serving skew"! 
# Preprocess test data the same way as we did for training
CATEGORICAL_TYPES = {'VERSION': pd.api.types.CategoricalDtype(['2018.0.1', '2016.05', '2016.06', '2019.0.2', '2012.7', '2019.0', '2018.0', '2016.01', '2019.0.1', '2019']),
                     'PERFORMANCE': pd.api.types.CategoricalDtype(['POLE', 'PEDESTRIAN', 'UNKNOWN', 'ECE', 'FRONT', 'RCAR', 'COCKPIT', 'OVERSLAM', 'REAR', 'SIDE', 'WHEEL']),
                    'MPLINK': pd.api.types.CategoricalDtype(['YES', 'NO']),
                    'NTNU': pd.api.types.CategoricalDtype(['NO', 'YES']),
                    'MPLINK_NTNU': pd.api.types.CategoricalDtype(['NO', 'YES']),
                    'MBS': pd.api.types.CategoricalDtype(['NO', 'USED']),
                    'CLUSTER':pd.api.types.CategoricalDtype(['HPC3', 'HPC1', 'HPC2'])}
                                                              
TARGET_COLUMN = 'ELAPSEDTIME'


BATCH_SIZE = 128
NUM_EPOCHS = 5
LEARNING_RATE = 0.001

# Read the data from BigQuery
# you can read from other sources to pandas DataFrame
print(f'First rows for the raw dataset: \n{data_df.head()}')

# Convert integer valued (numeric) columns to floating point
numeric_columns = data_df.select_dtypes(['int64']).columns
data_df[numeric_columns] = data_df[numeric_columns].astype('float32')

# Convert categorical columns to numeric
cat_columns = data_df.select_dtypes(['object']).columns
data_df[cat_columns] = data_df[cat_columns].astype('category')
data_df[cat_columns] = data_df[cat_columns].apply(lambda x: x.astype(
        CATEGORICAL_TYPES[x.name]))
data_df[cat_columns] = data_df[cat_columns].apply(lambda x: x.cat.codes)
print(f'First rows for the transformed dataset: \n{data_df.head()}')


First rows for the raw dataset: 
    JOBID   VERSION PERFORMANCE  PRECISION MPLINK NTNU MPLINK_NTNU MBS  \
0  974258  2018.0.1  PEDESTRIAN          1     NO   NO          NO  NO   
1  974258  2018.0.1  PEDESTRIAN          1     NO   NO          NO  NO   
2  598282   2016.05     UNKNOWN          1     NO   NO          NO  NO   
3  624046   2016.05     UNKNOWN          1     NO  YES          NO  NO   
4  625322   2016.05     UNKNOWN          1     NO  YES          NO  NO   

     RUNEND  TIMESTEP  NBNODES  NBELEM1D  NBELEM2D  NBELEM3D CLUSTER  \
0     40.68    0.0005  1647532      3760   1673587    103822    HPC3   
1     40.68    0.0005  1647532      3760   1673587    103822    HPC3   
2  30000.00    0.0005      504         0       410         0    HPC1   
3     20.00    0.0005   133650        44    127307         0    HPC1   
4     20.00    0.0005   129912        44    128857         0    HPC1   

   NBSERVERS  NBCORE  DATACHECK_TIME  ELAPSEDTIME  
0        1.0      36           74.70 

### Prepare prediction input file
The `gcloud` command-line tool accepts newline-delimited JSON for online
prediction, and this particular Keras model expects a flat list of
numbers for each input example.

AI Platform requires a different format when you make online prediction requests to the REST API without using the `gcloud` tool. The way you structure
your model may also change how you must format data for prediction. Learn more
about [formatting data for online
prediction](https://cloud.google.com/ml-engine/docs/tensorflow/prediction-overview#prediction_input_data).

Test first on a few samples:

In [None]:
# Export the prediction input to a JSON file in the format accepted by AI Platform
import json

prediction_dict_sample = test_df.drop('JOBID', axis=1).drop('ELAPSEDTIME',axis=1)[:5].to_dict('records')

with open('prediction_input.json', 'w') as json_file:
    json.dump({'instances': prediction_dict_sample}, json_file, indent=' ')

! cat prediction_input.json

{
 "instances": [
  {
   "VERSION": 0,
   "PERFORMANCE": 1,
   "PRECISION": 1.0,
   "MPLINK": 1,
   "NTNU": 0,
   "MPLINK_NTNU": 0,
   "MBS": 0,
   "RUNEND": 40.68,
   "TIMESTEP": 0.0005,
   "NBNODES": 1647532.0,
   "NBELEM1D": 3760.0,
   "NBELEM2D": 1673587.0,
   "NBELEM3D": 103822.0,
   "CLUSTER": 0,
   "NBSERVERS": 1.0,
   "NBCORE": 36.0,
   "DATACHECK_TIME": 74.7
  },
  {
   "VERSION": 0,
   "PERFORMANCE": 1,
   "PRECISION": 1.0,
   "MPLINK": 1,
   "NTNU": 0,
   "MPLINK_NTNU": 0,
   "MBS": 0,
   "RUNEND": 40.68,
   "TIMESTEP": 0.0005,
   "NBNODES": 1647532.0,
   "NBELEM1D": 3760.0,
   "NBELEM2D": 1673587.0,
   "NBELEM3D": 103822.0,
   "CLUSTER": 0,
   "NBSERVERS": 1.0,
   "NBCORE": 36.0,
   "DATACHECK_TIME": 74.7
  },
  {
   "VERSION": 1,
   "PERFORMANCE": 2,
   "PRECISION": 1.0,
   "MPLINK": 1,
   "NTNU": 0,
   "MPLINK_NTNU": 0,
   "MBS": 0,
   "RUNEND": 30000.0,
   "TIMESTEP": 0.0005,
   "NBNODES": 504.0,
   "NBELEM1D": 0.0,
   "NBELEM2D": 410.0,
   "NBELEM3D": 0.0,
   "CLUSTER":

### Test predictions on few samples

Use ```gcloud ai-platform predict``` to generate predictions from your model:

In [None]:
! gcloud ai-platform predict \
  --region $REGION \
  --model $MODEL_NAME \
  --version $MODEL_VERSION \
  --json-request prediction_input.json

Using endpoint [https://europe-west1-ml.googleapis.com/]
[[7710.03711], [7710.03711], [2029.61194], [1901.55], [1439.16016]]


### Online predictions on the whole dataset

Let's now get predictions for the whole dataset.

In [None]:
# Helper copied from the AI Platform console
import googleapiclient.discovery
from google.api_core.client_options import ClientOptions

def predict_json(project, model, instances, version=None):
    """Send json data to a deployed model for prediction.

    Args:
        project (str): project where the Cloud ML Engine Model is deployed.
        model (str): model name.
        instances ([Mapping[str: Any]]): Keys should be the names of Tensors
            your deployed model expects as inputs. Values should be datatypes
            convertible to Tensors, or (potentially nested) lists of datatypes
            convertible to tensors.
        version: str, version of the model to target.
    Returns:
        Mapping[str: any]: dictionary of prediction results defined by the
            model.
    """
    endpoint = 'https://europe-west1-ml.googleapis.com'
    client_options = ClientOptions(api_endpoint=endpoint)
    service = googleapiclient.discovery.build('ml', 'v1', client_options=client_options)
    name = 'projects/{}/models/{}'.format(project, model)

    if version is not None:
        name += '/versions/{}'.format(version)

    response = service.projects().predict(
        name=name,
        body={'instances': instances}
    ).execute()

    if 'error' in response:
        raise RuntimeError(response['error'])

    return response['predictions']

We need to provide our Project ID to the online predictions service.  
Project ID is a unique identifier for the Google Cloud environment you are currently using.  
Let's use ```gcloud config get-value project``` to get this property from gcloud:

In [None]:
PROJECT_ID = !gcloud config get-value project

In [None]:
PROJECT_ID = PROJECT_ID.get_nlstr()

In [None]:
from functools import partial

In [None]:
!echo $MODEL_NAME

kers


In [None]:
get_predictions = partial(
    predict_json,
    project=PROJECT_ID, 
    model='kers', 
    version='v1'
)

In [None]:
BATCH_SIZE = 1024

In [None]:
num_batches = len(test_df)//BATCH_SIZE; num_batches

43

In [None]:
test_df

Unnamed: 0,VERSION,PERFORMANCE,PRECISION,MPLINK,NTNU,MPLINK_NTNU,MBS,RUNEND,TIMESTEP,NBNODES,NBELEM1D,NBELEM2D,NBELEM3D,CLUSTER,NBSERVERS,NBCORE,DATACHECK_TIME,ELAPSEDTIME
0,0,1,1.0,1,0,0,0,40.68,0.0005,1647532.0,3760.0,1673587.0,103822.0,0,1.0,36.0,74.70,4510.0
1,0,1,1.0,1,0,0,0,40.68,0.0005,1647532.0,3760.0,1673587.0,103822.0,0,1.0,36.0,74.70,4510.0
2,1,2,1.0,1,0,0,0,30000.00,0.0005,504.0,0.0,410.0,0.0,1,2.0,48.0,0.48,7340.0
3,1,2,1.0,1,1,0,0,20.00,0.0005,133650.0,44.0,127307.0,0.0,1,2.0,48.0,5.86,1870.0
4,1,2,1.0,1,1,0,0,20.00,0.0005,129912.0,44.0,128857.0,0.0,1,2.0,48.0,5.76,1930.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44599,0,4,1.0,1,0,1,1,160.01,0.0005,7819462.0,24134.0,7760306.0,837887.0,0,6.0,216.0,851.00,39800.0
44600,0,4,1.0,1,0,1,0,120.01,0.0005,8107110.0,31068.0,7255714.0,2287615.0,0,6.0,216.0,782.00,29800.0
44601,0,4,1.0,1,0,1,0,120.01,0.0005,8107110.0,31068.0,7255714.0,2287615.0,0,6.0,216.0,782.00,30100.0
44602,0,4,1.0,1,0,1,0,120.01,0.0005,8107110.0,31068.0,7255714.0,2287615.0,0,6.0,216.0,782.00,29800.0


In [None]:
prediction_scores = []
df = test_df.drop('ELAPSEDTIME',axis=1)

for i in tqdm(range(num_batches+1), total=num_batches, position=0):
    batch_df = df.iloc[i*BATCH_SIZE:(i+1)*BATCH_SIZE,:]
    pred = get_predictions(instances=batch_df.to_dict('records'))
    #print(pred)
    #pred = [p['dense_1'][0] for p in pred]
    prediction_scores.extend(pred)

44it [00:08,  5.12it/s]                        


In [None]:
prediction_scores[:5]

[[7710.03711], [7710.03711], [2029.61194], [1901.55], [1439.16016]]

# Clean up

Delete all versions and all models:

In [None]:
import time
import googleapiclient.discovery

service = googleapiclient.discovery.build('ml', 'v1')

project = !gcloud config get-value project
project = project.get_nlstr()

def get_models(project):
    response = service.projects().models().list(
        parent = 'projects/{}'.format(project)
    ).execute()
    
    return response["models"]

def get_versions(model):
    response = service.projects().models().versions().list(
        parent=model
    ).execute()
    
    return response["versions"]

def delete_version(version):
    print("Deleting version: ", version["name"])
    
    response = service.projects().models().versions().delete(
        name=version["name"]
    ).execute()
    
    if "error" in response:
        print(error)
    
    return response["name"]

def delete_model(model):
    print("Deleting model: ", model["name"])
    
    response = service.projects().models().delete(
        name=model["name"]
    ).execute()
    
    if "error" in response:
        print(error)

def is_version_deleted(operation):
    print("Checking status for operation: ", operation)
    
    response = service.projects().operations().get(
        name=operation
    ).execute()
    
    print(response)
    if "done" in response:
        return True
    else:
        return False

models = get_models(project)

default_version_deletions = []
for model in models:
    print('Model: ', model["name"])
    versions = get_versions(model["name"])
    deletions_in_progress = []
    for version in versions:
        # Delete non default versions
        if "isDefault" not in version:
            versions.remove(version)
            deletions_in_progress.append(delete_version(version))
    while len(deletions_in_progress) > 0:
        # Try again in 5s
        print("Waiting 5s")
        time.sleep(5)
        for deletion_in_progress in deletions_in_progress:
            if is_version_deleted(deletion_in_progress):
                print("Deletion completed: ", deletion_in_progress)
                deletions_in_progress.remove(deletion_in_progress)
    # When all default versions are deleted, remove the default version
    default_version_deletions.append(delete_version(versions[0]))
while len(default_version_deletions) > 0:
    # Try again in 5s
    print("Waiting 5s")
    time.sleep(5)
    for default_version_deletion in default_version_deletions:
        if is_version_deleted(default_version_deletion):
            print("Deletion completed: ", default_version_deletion)
            default_version_deletions.remove(default_version_deletion)
# All versions deleted, now delete the model
for model in models:
    delete_model(model)

Delete the bucket:

In [None]:
# Delete your bucket
!gsutil rm -r gs://{BUCKET_NAME}