# Developing, Training, and Deploying a TensorFlow model on Google Cloud Platform

**Import Libraries**

In [12]:
import pandas as pd
from io import BytesIO
import datetime
from datetime import timedelta

import numpy as np
import pandas as pd
import tensorflow as tf

import datalab.storage as storage

## 1. Setting up Cloud Environment on your GCP Project 

**Change to your bucket and project name, to set up environment in your project, store files in your bucket and to run the model on cloud ml engine**

In [13]:
#Make sure you put the correct values here !!!
BUCKET='nyc_servicerequest'
PROJECT='summerai'
REGION='us-west1'

In [14]:
import os
os.environ['BUCKET'] = BUCKET
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION

## 2. Data Preprocessing

**Select the data file, date and the target variables that you want to use for the revenue forecast**

In [31]:
! gsutil cp gs://nyc_servicerequest/processedInput/eval2.csv eval2.csv

Copying gs://nyc_servicerequest/processedInput/eval2.csv...
| [1 files][183.0 MiB/183.0 MiB]                                                
Operation completed over 1 objects/183.0 MiB.                                    


In [32]:
pd.read_csv('eval2.csv')[['day_period', 'day_of_week', 'zip_encode',
       'location_encode', 'community_encode', 'agency_encode',
       'complaint_encode']].to_csv('eval2.csv')

In [33]:
!gsutil cp 'eval2.csv' gs://nyc_servicerequest/processedInput/

Copying file://eval2.csv [Content-Type=text/csv]...
==> NOTE: You are uploading one or more large file(s), which would run
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

|
Operation completed over 1 objects/166.2 MiB.                                    


## Train Locally

In [12]:
!gsutil cp gs://nyc_servicerequest/encodedInput/train0.csv trainx.csv

Copying gs://nyc_servicerequest/encodedInput/train0.csv...
- [1 files][ 45.9 MiB/ 45.9 MiB]                                                
Operation completed over 1 objects/45.9 MiB.                                     


In [13]:
pd.read_csv('trainx.csv').head()

Unnamed: 0,2,1,1.1,1.2,3,2.1,0,0.1,1.3,0.2,0.3,0.4,1.4,1.273
0,5,1,3,1,3,2,0,0,1,0,0,0,1,85.838
1,6,1,3,1,3,2,1,0,0,0,1,0,0,59.059
2,9,1,3,1,3,2,1,0,0,0,0,0,1,79.434
3,10,1,0,1,2,2,0,0,1,0,0,0,1,26.4
4,12,1,3,1,2,2,0,0,0,1,1,0,0,24.0


In [15]:
import TensorflowTrainer

In [38]:
%%bash
rm -rf nyc_rides_model
python -m TensorflowTrainer.task \
  --train_data_paths=gs://${BUCKET}/encodedInput/train0.csv \
  --eval_data_paths=gs://${BUCKET}/encodedInput/eval0.csv \
  --output_dir=nyc_rides_model \
  --job-dir=./tmp \
  --train_steps=1

W0728 07:43:24.843147 139966359180736 deprecation_wrapper.py:119] From TensorflowTrainer/model_2.py:12: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.

W0728 07:43:24.843430 139966359180736 deprecation_wrapper.py:119] From TensorflowTrainer/model_2.py:12: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead.

I0728 07:43:24.845433 139966359180736 estimator.py:1790] Using default config.
I0728 07:43:24.846021 139966359180736 estimator.py:209] Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f4c23fc3a50>, '_model_dir': 'nyc_rides_model/', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_session_config': allow_soft_placement: true
graph_option

## 3. Train in Cloud ML 

In [56]:
%%bash
gsutil rm -r gs://${BUCKET}/staging/
JOBNAME=service_requests_$(date -u +%y%m%d_%H%M%S)
gcloud ai-platform jobs submit training $JOBNAME \
  --job-dir=gs://${BUCKET}/staging/ \
  --package-path=${PWD}/TensorflowTrainer \
  --module-name=TensorflowTrainer.task \
  --region=us-west1 \
  --runtime-version=1.14 \
  --python-version=2.7 \
  --scale-tier=BASIC \
  -- \
  --train_data_paths=gs://${BUCKET}/encodedInput/train* \
  --eval_data_paths=gs://${BUCKET}/encodedInput/eval*  \
  --output_dir=gs://${BUCKET}/staging/ 

jobId: service_requests_190729_050747
state: QUEUED


Removing gs://nyc_servicerequest/temp/#1563676735737458...
Removing gs://nyc_servicerequest/temp/packages/06bc60d77e393414e5754e92531164d0124047c57813c79f3bb8c07ed3080928/DecisionTreeTrainer-0.0.0.tar.gz#1564268238153810...
Removing gs://nyc_servicerequest/temp/packages/2599a301f6eb2e1013dd795d93ecfa8bdb35bac5c031f04737c55a0bac505900/DecisionTreeTrainer-0.0.0.tar.gz#1564268711418615...
Removing gs://nyc_servicerequest/temp/packages/43854edb9b9a33aca12a997c529163260bfd23491f809241582c29086d5d639c/DecisionTreeTrainer-0.0.0.tar.gz#1564266345837463...
/ [4 objects]                                                                   
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m rm ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Removing gs://nyc_servicerequest/temp/packages/5e1a64d2fb42118743058895e6b6246780e4c4a4273e3686284a5c815f394bdb/D

**Run below lines to see the Hyperparameter Tuning**

In [57]:
#%%bash
#gcloud ai-platform jobs describe service_requests_190729_043852

**Some 5000 Train steps and 2 hours later  .... .... ... .. .. . Time to deploy !**

## 4. Create and Delploy the trained job on model 

**Run it only after your Job has completed running**

In [63]:
%%bash
REGION='us-west1'
MODEL_NAME='tensorflow_linear_model_1'
MODEL_VERSION='v1'
gcloud ml-engine models create $MODEL_NAME
MODEL_LOCATION=$(gsutil ls gs://${BUCKET}/staging/export/exporter/ | tail -1)

gcloud ai-platform versions create ${MODEL_VERSION} \
--model ${MODEL_NAME} \
--origin ${MODEL_LOCATION} \
--runtime-version=1.14 \
--python-version=2.7


Created ml engine model [projects/summerai/models/tensorflow_linear_model_2].
Creating version (this might take a few minutes)......
.............................................................................................................................................................................................................................................................................done.


**THE MODEL IS DEPLOYED .... YAYYY !!!**

In [64]:
%%bash
gcloud ai-platform versions describe v1 \
--model='tensorflow_linear_model_1'

createTime: '2019-07-29T05:14:32Z'
deploymentUri: gs://nyc_servicerequest/temp/export/exporter/1564377077/
etag: 6F8p8kLp3Sc=
framework: TENSORFLOW
isDefault: true
machineType: mls1-c1-m2
name: projects/summerai/models/tensorflow_linear_model_2/versions/v1
pythonVersion: '2.7'
runtimeVersion: '1.14'
state: READY


## Cloud ML Prediction

*Make online Predictions on Evaluation dataset.*

In [45]:
test_features = pd.read_csv('localsave2/x_all_eval.csv', header=None).iloc[1:, :]#.as_matrix().tolist()
test_labels = pd.read_csv('localsave2/y_all_eval.csv', header=None).iloc[1:, :]#.as_matrix().tolist()

In [49]:
test_features[0] = test_features[0].apply(int)

In [52]:
test_features = test_features.as_matrix().tolist()

  """Entry point for launching an IPython kernel.


In [93]:
len(test_features), len(test_labels)

(2380440, 2380440)

[[0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 0, 0],
 [1, 0, 3, 1, 0, 2, 0, 1, 0, 0, 0, 1, 0],
 [2, 0, 3, 1, 0, 2, 1, 0, 0, 0, 0, 0, 1],
 [3, 0, 3, 1, 0, 2, 0, 0, 0, 1, 0, 0, 1],
 [4, 0, 3, 1, 0, 2, 0, 1, 0, 0, 0, 0, 1]]

In [88]:
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

PROJECT_ID = 'summerai'
VERSION_NAME = 'v1'
MODEL_NAME = 'tensorflow_linear_model_2'

credentials = GoogleCredentials.get_application_default()
api = discovery.build('ml', 'v1', credentials=credentials, cache_discovery=False)

test = test_features[:int(len(test_features)/500)]
request_data = {"instances": test}

 
parent = 'projects/%s/models/%s/versions/%s' % (PROJECT_ID, MODEL_NAME, VERSION_NAME)
responses = api.projects().predict(body = request_data, name = parent).execute()
if 'error' in responses:
    print(responses['error'])
else:
    # Print the first 10 responses
    for i, response in enumerate(responses['predictions'][:10]):
        print('Prediction: {}\t\tActual: {}'.format(response, test_labels[i][0]))


# Due to the size of the data, it needs to be split in 2
#data = test_features[:int(len(test_features)/500)]
#second_half = test_features[int(len(test_features)/2):]




Prediction failed: unknown error.


In [89]:
request_data

{'instances': [['evening',
   'Mon-Tue',
   'zip_bin4',
   'location_bin1',
   'community_bin2',
   'agency_bin6',
   'complaint_bin3'],
  ['afternoon',
   'Fri-Sat-Sun',
   'zip_bin4',
   'location_bin1',
   'community_bin2',
   'agency_bin6',
   'complaint_bin3'],
  ['morning',
   'Mon-Tue',
   'zip_bin4',
   'location_bin4',
   'community_bin2',
   'agency_bin5',
   'complaint_bin3'],
  ['night',
   'Wed-Thu',
   'zip_bin4',
   'location_bin4',
   'community_bin2',
   'agency_bin5',
   'complaint_bin3'],
  ['morning',
   'Mon-Tue',
   'zip_bin4',
   'location_bin4',
   'community_bin2',
   'agency_bin5',
   'complaint_bin3']]}

In [20]:
def error_anaylsis(y_pred, y_actual):
    # mean squared error
    m = len(y_actual)
    
    mse = np.sum((y_pred - y_actual)**2)

    # root mean squared error
    # m is the number of training examples
    rmse = np.sqrt(mse/m)
    
    # sum of square of residuals
    ssr = np.sum((y_pred - y_actual)**2)

    #  total sum of squares
    sst = np.sum((y_actual - np.mean(y_actual))**2)

    # R2 score
    r2_score = 1 - (ssr/sst)
    
    return mse, rmse, ssr, sst, r2_score

In [None]:
mse, rmse, ssr, sst, r2_score = error_anaylsis(responses['predictions'], np.concatenate(test_labels[:int(len(test_features)/100)]))


In [None]:
print("RMSE for the Linear Regression model on the whole dataset ", rmse)