In [3]:
# Setup constants.
PROJECT_NUM = '1064712527705'                 # CHANGE THIS
STORAGE_BUCKET = 'papi-bucket'                # CHANGE THIS
TRAINING_DATA_FILE = 'sample/train.csv'       # CHANGE THIS
TAXIFARE_CLASSIFICATION_MODEL_ID = 'taxifare_classification'
TAXIFARE_REGRESSION_MODEL_ID = 'taxifare_regression'
CLASSIFICATION_MODEL_TYPE = 'CLASSIFICATION'
REGRESSION_MODEL_TYPE = 'REGRESSION'
TRAINING_DATA_FILE_LOCATION = STORAGE_BUCKET + '/' + TRAINING_DATA_FILE

In [4]:
# Sample calls to predict from hosted models in Predictions API.
from googleapiclient.discovery import build
from oauth2client.client import GoogleCredentials
import time

CREDENTIALS = GoogleCredentials.get_application_default()
PREDICTION_SERVICE = build('prediction', 'v1.6', credentials=CREDENTIALS)
HOSTED_MODELS_PROJECT_NUM = '414649711441'
HOSTED_MODEL_NAME = 'sample.sentiment'

body = {'input': {'csvInstance': ['hello how are you today']}}
result = PREDICTION_SERVICE.hostedmodels().predict( project=HOSTED_MODELS_PROJECT_NUM, hostedModelName=HOSTED_MODEL_NAME, body=body ).execute()

print 'Basic predict call to hosted model:', result

Basic predict call to hosted model: {u'kind': u'prediction#output', u'outputLabel': u'positive', u'id': u'sample.sentiment', u'selfLink': u'https://www.googleapis.com/prediction/v1.6/projects/414649711441/hostedmodels/sample.sentiment/predict', u'outputMulti': [{u'score': u'0.784671', u'label': u'positive'}, {u'score': u'0.186649', u'label': u'negative'}, {u'score': u'0.028680', u'label': u'neutral'}]}


In [6]:
# Delete trained models in Predictions API if necessary to showcase recreated them.
result = PREDICTION_SERVICE.trainedmodels().delete( project=PROJECT_NUM, id=TAXIFARE_CLASSIFICATION_MODEL_ID ).execute()
result = PREDICTION_SERVICE.trainedmodels().delete( project=PROJECT_NUM, id=TAXIFARE_REGRESSION_MODEL_ID ).execute()

print 'Call to delete the model in case it is still around from a previous run:', result

Call to delete the model in case it is still around from a previous run: 


In [7]:
# Insert (analyze, train, deploy) new classification model in Predictions API.
body = {
  'id': TAXIFARE_CLASSIFICATION_MODEL_ID,
  'storageDataLocation': TRAINING_DATA_FILE_LOCATION,
  'modelType': CLASSIFICATION_MODEL_TYPE,
}
result = PREDICTION_SERVICE.trainedmodels().insert( project=PROJECT_NUM, body=body ).execute()

print 'Call to insert a new classification model based on training data:', result

Call to insert a new classification model based on training data: {u'kind': u'prediction#training', u'id': u'taxifare_classification', u'selfLink': u'https://www.googleapis.com/prediction/v1.6/projects/1064712527705/trainedmodels/taxifare_classification', u'storageDataLocation': u'papi-bucket/sample/train.csv'}


In [8]:
# Insert (analyze, train, deploy) new regression model in Predictions API.
body = {
  'id': TAXIFARE_REGRESSION_MODEL_ID,
  'storageDataLocation': TRAINING_DATA_FILE_LOCATION,
  'modelType': REGRESSION_MODEL_TYPE,
}
result = PREDICTION_SERVICE.trainedmodels().insert( project=PROJECT_NUM, body=body ).execute()

print 'Call to insert a new regression model based on training data:', result

Call to insert a new regression model based on training data: {u'kind': u'prediction#training', u'id': u'taxifare_regression', u'selfLink': u'https://www.googleapis.com/prediction/v1.6/projects/1064712527705/trainedmodels/taxifare_regression', u'storageDataLocation': u'papi-bucket/sample/train.csv'}


In [None]:
# Get newly created classification model in Predictions API.
result = PREDICTION_SERVICE.trainedmodels().get( project=PROJECT_NUM, id=TAXIFARE_CLASSIFICATION_MODEL_ID ).execute()
initial_delay = 1
while (result['trainingStatus'] != 'DONE'):
  print 'Status is: ', result['trainingStatus'], '\tCurrent delay: ', initial_delay
  time.sleep(initial_delay)
  if (initial_delay < 30):
    initial_delay = initial_delay * 2
  result = PREDICTION_SERVICE.trainedmodels().get( project=PROJECT_NUM, id=TAXIFARE_CLASSIFICATION_MODEL_ID ).execute()

print 'Call to get the new classification model that was just created: ', result

Status is:  RUNNING 	Current delay:  1
Status is:  RUNNING 	Current delay:  2
Status is:  RUNNING 	Current delay:  4
Status is:  RUNNING 	Current delay:  8
Status is:  RUNNING 	Current delay:  16
Status is:  RUNNING 	Current delay:  32
Status is:  RUNNING 	Current delay:  32
Status is:  RUNNING 	Current delay:  32


In [5]:
# Get newly created regression model in Predictions API.
result = PREDICTION_SERVICE.trainedmodels().get( project=PROJECT_NUM, id=TAXIFARE_REGRESSION_MODEL_ID ).execute()
initial_delay = 1
while (result['trainingStatus'] != 'DONE'):
  print 'Status is: ', result['trainingStatus'], '\tCurrent delay: ', initial_delay
  time.sleep(initial_delay)
  if (initial_delay < 30):
    initial_delay = initial_delay * 2
  result = PREDICTION_SERVICE.trainedmodels().get( project=PROJECT_NUM, id=TAXIFARE_REGRESSION_MODEL_ID ).execute()

print 'Call to get the new regression model that was just created: ', result

Call to get the new regression model that was just created:  {u'kind': u'prediction#training', u'created': u'2017-03-23T12:56:19.738Z', u'trainingStatus': u'DONE', u'modelInfo': {u'numberInstances': u'22401', u'meanSquaredError': u'126.48', u'modelType': u'regression'}, u'trainingComplete': u'2017-03-23T13:00:44.360Z', u'id': u'taxifare_regression', u'selfLink': u'https://www.googleapis.com/prediction/v1.6/projects/1064712527705/trainedmodels/taxifare_regression'}


In [None]:
# Measure error in newly created models in Predictions API.
import math
validation_data = { # This is a sample of a larger set of validation data. The key is the expected result, and the value is the csv input for a prediction.
  '12.1': 'Sun,0,-73.984685,40.769262,-73.991065,40.728145,5.0,2009-05-31 00:48:00.000000-73.984740.769340.7281-73.9911',
  '15.3': 'Sun,0,-74.006927,40.739993,-73.950025,40.773403,1.0,2009-05-31 00:48:00.000000-74.006940.7440.7734-73.95',
  '9.3': 'Sun,0,-73.977345,40.779387,-73.97615,40.778867,1.0,2009-05-31 00:48:00.000000-73.977340.779440.7789-73.9762',
  '12.5': 'Sun,0,-73.97136,40.794413,-73.99623,40.74524,1.0,2009-05-31 00:48:00.000000-73.971440.794440.7452-73.9962',
  '6.1': 'Sun,0,-73.997642,40.763853,-73.99485,40.750282,1.0,2009-05-31 00:48:00.000000-73.997640.763940.7503-73.9948',
  '14.9': 'Sun,0,-74.004538,40.742202,-73.955823,40.773485,1.0,2009-05-31 00:48:00.000000-74.004540.742240.7735-73.9558',
  '11.7': 'Sun,0,-74.000589,40.73731,-73.985902,40.692725,1.0,2012-06-10 00:46:17.000000-74.000640.737340.6927-73.9859',
  '6.5': 'Sun,0,-73.995432,40.72114,-73.992403,40.719745,1.0,2009-05-31 00:48:00.000000-73.995440.721140.7197-73.9924',
  '5.3': 'Sun,0,-73.945033,40.779203,-73.952037,40.766802,1.0,2009-05-31 00:48:00.000000-73.94540.779240.7668-73.952',
  '6.9': 'Sun,0,-73.968592,40.693262,-73.99231,40.694317,1.0,2009-05-31 00:48:00.000000-73.968640.693340.6943-73.9923',
  '7.3': 'Sun,0,-74.004307,40.722348,-73.981768,40.73257,1.0,2009-05-31 00:48:00.000000-74.004340.722340.7326-73.9818',
  '3.5': 'Sun,2,-73.995862,40.764342,-73.994105,40.761385,5.0,2014-04-27 02:57:00.000000-73.995940.764340.7614-73.9941',
  '6.9': 'Sun,2,-73.979281,40.759491,-74.00226,40.751751,1.0,2010-01-03 02:03:40.000000-73.979340.759540.7518-74.0023',
  '10.1': 'Sun,2,-73.99367,40.721015,-73.975817,40.681697,2.0,2009-12-06 02:12:00.000000-73.993740.72140.6817-73.9758',
  '5.7': 'Sun,2,-74.002428,40.730257,-74.009255,40.738795,1.0,2009-12-06 02:12:00.000000-74.002440.730340.7388-74.0093',
  '9.3': 'Sun,2,-74.006145,40.743685,-73.979365,40.761348,2.0,2009-12-06 02:12:00.000000-74.006140.743740.7613-73.9794',
  '14.0': 'Sun,2,-73.939992,40.75127,-73.990035,40.741842,1.0,2014-04-27 02:57:00.000000-73.9440.751340.7418-73.99',
  '9.5': 'Sun,2,-74.004012,40.732882,-73.990517,40.761255,3.0,2014-04-27 02:57:00.000000-74.00440.732940.7613-73.9905',
  '12.0': 'Sun,2,-73.985962,40.722237,-73.960253,40.762042,5.0,2014-04-27 02:57:00.000000-73.98640.722240.762-73.9603',
}
classification_total_squared_error = 0.0
regression_total_squared_error = 0.0
for key, value in validation_data.iteritems():
  values = value.split(',')
  body = {
    'input': {
      'csvInstance': values
    }
  }
  classification_result = PREDICTION_SERVICE.trainedmodels().predict( project=PROJECT_NUM, id=TAXIFARE_CLASSIFICATION_MODEL_ID, body=body ).execute()
  regression_result = PREDICTION_SERVICE.trainedmodels().predict( project=PROJECT_NUM, id=TAXIFARE_REGRESSION_MODEL_ID, body=body ).execute()

  expected_result = float(key)
  classification_error = expected_result - float(classification_result['outputLabel'])
  classification_squared_error = classification_error ** 2
  classification_total_squared_error = classification_total_squared_error + classification_squared_error
  regression_error = expected_result - float(regression_result['outputValue'])
  regression_squared_error = regression_error ** 2
  regression_total_squared_error = regression_total_squared_error + regression_squared_error

count = len(validation_data)
classification_mean_squared_error = classification_total_squared_error / count
classification_rmse = math.sqrt(classification_mean_squared_error)
regression_mean_squared_error = regression_total_squared_error / count
regression_rmse = math.sqrt(regression_mean_squared_error)
print 'Total Classification Squared Error: ', classification_total_squared_error, ' Mean Squared Error: ', classification_mean_squared_error, ' RMSE: ', classification_rmse
print 'Total Regression Squared Error: ', regression_total_squared_error, ' Mean Squared Error: ', regression_mean_squared_error, ' RMSE: ', regression_rmse

In [101]:
# Other sample calls in Predictions API (list, analyze).
result = PREDICTION_SERVICE.trainedmodels().list( project=PROJECT_NUM ).execute()
print 'Call to list all the models: ', result

classification_result = PREDICTION_SERVICE.trainedmodels().analyze( project=PROJECT_NUM, id=TAXIFARE_CLASSIFICATION_MODEL_ID ).execute()
regression_result = PREDICTION_SERVICE.trainedmodels().analyze( project=PROJECT_NUM, id=TAXIFARE_REGRESSION_MODEL_ID ).execute()

print 'Call to analyze the classification model: ', classification_result
print 'Call to analyze the regression model: ', regression_result

Call to list all the models:  {u'items': [{u'kind': u'prediction#training', u'id': u'taxifare_classification'}, {u'kind': u'prediction#training', u'id': u'taxifare_regression'}], u'kind': u'prediction#list', u'selfLink': u'https://www.googleapis.com/prediction/v1.6/projects/1064712527705/trainedmodels/list'}
Call to analyze the regression model:  {u'dataDescription': {u'outputFeature': {u'numeric': {u'count': u'22401', u'variance': u'92.06', u'mean': u'11.35'}}, u'features': [{u'index': u'0', u'categorical': {u'count': u'22401', u'values': [{u'count': u'3077', u'value': u'Fri'}, {u'count': u'3717', u'value': u'Mon'}, {u'count': u'2785', u'value': u'Sat'}, {u'count': u'3734', u'value': u'Sun'}, {u'count': u'3259', u'value': u'Thu'}, {u'count': u'2536', u'value': u'Tue'}, {u'count': u'3293', u'value': u'Wed'}]}}, {u'index': u'1', u'numeric': {u'count': u'22401', u'variance': u'37.27', u'mean': u'13.45'}}, {u'index': u'2', u'numeric': {u'count': u'22401', u'variance': u'0.00', u'mean': u'