## Online Prediction with scikit-learn on Google Cloud Machine Learning Engine

#### Load Dependencies

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.externals import joblib

from google.cloud import bigquery

client = bigquery.Client() 
%reload_ext google.cloud.bigquery

def print_predictions(predictions):
    for i, prediction in enumerate(predictions[:5]):
        print('Prediction: %.2f\tActual: %.2f' % (prediction, y_test.iloc[i]))
        
pd.set_option("max_r",6)

#### Load results from BigQuery into a pandas DataFrame

In [None]:
%%bigquery df
SELECT * FROM 
`sgreenberg-project2.misc_ml.baby_weights`
LIMIT 1000

#### Separate labels from features

In [None]:
y = df['weight_pounds']
del df['weight_pounds']

#### Ensure categoricals are strings

In [None]:
for c in ['is_male', 'month', 'state']:
    df[c] = df[c].apply(str)
    
x = df.to_dict('records')

#### Split data into training and testing

In [None]:
x_train, x_test, y_train, y_test = \
    train_test_split(x, y, test_size=0.2)

#### Setup the pipeline which will be used for both training and prediction

In [None]:
pipeline = Pipeline(steps=[
    ("preprocesser", DictVectorizer(sparse=False)),
    ("estimator", RandomForestRegressor(max_depth=5))])

#### Train

In [None]:
pipeline.fit(x_train, y_train)

####  Make predictions (on the local machine)

In [None]:
print_predictions(pipeline.predict(x_test))

#### Export the model

In [None]:
joblib.dump(pipeline, './model.joblib')

#### Copy the model to Cloud Storage

In [None]:
%env BUCKET_ID sgreenberg-sklearn-cmle
! gsutil cp ./model.joblib gs://$BUCKET_ID/model.joblib

#### Create a model and model version

In [None]:
%env MODEL_NAME baby_weight4

In [None]:
! gcloud ml-engine models create $MODEL_NAME \
  --regions us-central1

In [None]:
%env VERSION_NAME v3
! gcloud ml-engine versions create $VERSION_NAME \
  --async --model $MODEL_NAME \
  --framework scikit-learn --runtime-version 1.8 \
  --origin gs://$BUCKET_ID

#### Make predictions (on the cloud)

In [None]:
%env VERSION_NAME v2
import googleapiclient.discovery
import os

MODEL_NAME = os.environ['MODEL_NAME']
VERSION_NAME = os.environ['VERSION_NAME']

service = googleapiclient.discovery.build('ml', 'v1')
name = 'projects/sgreenberg-project2/models/%s' % MODEL_NAME
name += '/versions/%s' % VERSION_NAME

responses = service.projects().predict(name=name,
    body={'instances': x_test}).execute()

print_predictions(responses['predictions'])