## 2. Set up environment

In [7]:
import pandas as pd
import xgboost as xgb
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from google.cloud import bigquery

## 3. Exploring the BigQuery dataset

In [8]:
query="""
SELECT
  weight_pounds,
  is_male,
  mother_age,
  plurality,
  gestation_weeks
FROM
  publicdata.samples.natality
WHERE year > 2000
LIMIT 10000
"""
df = bigquery.Client().query(query).to_dataframe()
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks
0,7.936641,False,30,1,38.0
1,8.624484,True,34,1,39.0
2,6.937947,True,23,1,39.0
3,6.935743,True,18,1,39.0
4,8.318041,False,23,1,40.0


In [9]:
df.describe()

Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks
count,9995.0,10000.0,10000.0,9885.0
mean,7.263349,27.181,1.0353,38.705311
std,1.315565,6.1537,0.196616,2.56775
min,0.507063,12.0,1.0,18.0
25%,6.624891,22.0,1.0,38.0
50%,7.374463,27.0,1.0,39.0
75%,8.062305,32.0,1.0,40.0
max,12.625874,48.0,4.0,47.0


In [10]:
df['is_male'].value_counts()

True     5198
False    4802
Name: is_male, dtype: int64

## 4. Prepare the data for training

### Step 1: Extract the label column

#### First drop rows with null values from the dataset and shuffle the data:

In [11]:
df = df.dropna()
df = shuffle(df, random_state=2)

In [12]:
labels = df['weight_pounds']
data = df.drop(columns=['weight_pounds'])

### Step 2: Convert categorical features to integers

In [13]:
data['is_male'] = data['is_male'].astype(int)

### Step 3: Split data into train and test sets

In [14]:
x,y = data,labels
x_train,x_test,y_train,y_test = train_test_split(x,y)

# 6. Build, train, and evaluate an XGBoost model

### Step 1: Define and train the XGBoost model

In [15]:
model = xgb.XGBRegressor(
    objective='reg:linear'
)

In [16]:
model.fit(x_train, y_train)



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, verbosity=1)

### Step 2: Evaluate your model on test data

In [17]:
y_pred = model.predict(x_test)

In [18]:
for i in range(20):
    print('Predicted weight: ', y_pred[i])
    print('Actual weight: ', y_test.iloc[i])
    print()

Predicted weight:  7.7515044
Actual weight:  7.3744626639

Predicted weight:  7.8159895
Actual weight:  6.87621795178

Predicted weight:  7.5045266
Actual weight:  7.99837086536

Predicted weight:  7.3416543
Actual weight:  6.4374980503999994

Predicted weight:  7.3158646
Actual weight:  7.936641432

Predicted weight:  7.7745075
Actual weight:  6.2809698443799995

Predicted weight:  7.0386376
Actual weight:  6.3118345610599995

Predicted weight:  7.2114053
Actual weight:  7.59933417114

Predicted weight:  7.614057
Actual weight:  5.74965579296

Predicted weight:  7.187857
Actual weight:  6.0009827716399995

Predicted weight:  7.5045266
Actual weight:  7.62578964258

Predicted weight:  7.549129
Actual weight:  7.25100379718

Predicted weight:  7.982443
Actual weight:  6.8563763481999995

Predicted weight:  6.799781
Actual weight:  8.56275425608

Predicted weight:  7.86909
Actual weight:  7.87491199864

Predicted weight:  7.8584547
Actual weight:  6.3118345610599995

Predicted weight:  7

### Step 3: Save your model

In [19]:
model.save_model('model.bst')

# 7. Deploy model to Cloud AI Platform

### Step 1: Create a Cloud Storage bucket for our model

In [21]:
# Update these to your own GCP project, model, and version names
GCP_PROJECT = 'ai-platform-demo-pp'
MODEL_BUCKET = 'gs://ai-platform-demo-pp_bkt'
VERSION_NAME = 'v1'
MODEL_NAME = 'baby_weight'

In [22]:
!gsutil mb $MODEL_BUCKET

Creating gs://ai-platform-demo-pp_bkt/...


### Step 2: Copy the model file to Cloud Storage

In [23]:
!gsutil cp ./model.bst $MODEL_BUCKET

Copying file://./model.bst [Content-Type=application/octet-stream]...
/ [1 files][ 64.6 KiB/ 64.6 KiB]                                                
Operation completed over 1 objects/64.6 KiB.                                     


### Step 3: Create and deploy the model

#### The following ai-platform gcloud command will create a new model in your project. We'll call this one xgb_mortgage:

In [24]:
!gcloud ai-platform models create $MODEL_NAME

Created ml engine model [projects/ai-platform-demo-pp/models/baby_weight].


#### Now it's time to deploy the model. We can do that with this gcloud command:

In [25]:
!gcloud ai-platform versions create $VERSION_NAME \
--model=$MODEL_NAME \
--framework='XGBOOST' \
--runtime-version=1.15 \
--origin=$MODEL_BUCKET \
--python-version=3.7 \
--project=$GCP_PROJECT

Creating version (this might take a few minutes)......done.                    


### Step 4: Test the deployed model

In [26]:
%%writefile predictions.json
[0.0, 33.0, 1.0, 27.0]
[1.0, 26.0, 1.0, 40.0]

Writing predictions.json


In [33]:
GCP_PROJECT = 'ai-platform-demo-pp'
OUTPUT_BKT = 'gs://ai-platform-demo-pp-output-bkt'
!gsutil mb $OUTPUT_BKT

Creating gs://ai-platform-demo-pp-output-bkt/...
ServiceException: 409 Bucket ai-platform-demo-pp-output-bkt already exists.


In [29]:
!gsutil cp ./predictions.json $OUTPUT_BKT

Copying file://./predictions.json [Content-Type=application/json]...
/ [1 files][   46.0 B/   46.0 B]                                                
Operation completed over 1 objects/46.0 B.                                       


#### Test your model by saving the output of the following gcloud command to a variable and printing it:

In [27]:
prediction = !gcloud ai-platform predict --model=$MODEL_NAME --json-instances=predictions.json --version=$VERSION_NAME
print(prediction.s)

[3.166841506958008, 7.858454704284668]


#### You should see your model's prediction in the output. The actual baby weight for these two examples is 1.9 and 8.1 pounds respectively.

In [None]:
!gsutil cp ./predictions.json $MODEL_BUCKET