# Import an XGBoost model to SageMaker

In [None]:
%%sh
wget -N https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip
unzip -o bank-additional.zip

In [None]:
!pip install -q xgboost==1.3.1

# Check which XGBoost versions are supported by SageMaker

### Train a model locally

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

data = pd.read_csv('./bank-additional/bank-additional-full.csv')

# One-hot encode
data = pd.get_dummies(data)

# Move labels to first column, which is what XGBoost expects
data = data.drop(['y_no'], axis=1)
data = pd.concat([data['y_yes'], data.drop(['y_yes'], axis=1)], axis=1)

# Shuffle and split into training and validation (95%/5%)
data = data.sample(frac=1, random_state=123)
train_data, val_data = train_test_split(data, test_size=0.05)

In [None]:
x_train = train_data.drop(['y_yes'], axis=1)
y_train = train_data['y_yes']

x_val = val_data.drop(['y_yes'], axis=1)
y_val = val_data['y_yes']

In [None]:
import xgboost as xgb

print('XGBoost', xgb.__version__)

cls = xgb.XGBClassifier(
    objective='binary:logistic',
    eval_metric='auc',
    max_depth=5)

cls.fit(x_train, 
        y_train,
        eval_set=[(x_val, y_val)],
        early_stopping_rounds=10)

In [None]:
prefix = 'export-xgboost'

In [None]:
%%sh -s $prefix
mkdir -p $1

In [None]:
cls.save_model(prefix+'/xgboost-model')
# See https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

### Package model for SageMaker

In [None]:
import sagemaker

sess = sagemaker.Session()
bucket = sess.default_bucket()

print(bucket)

In [None]:
%%sh -s $prefix
cd $1
tar cvfz model-xgb.tar.gz xgboost-model

In [None]:
model_path = sess.upload_data(path=prefix+'/model-xgb.tar.gz', key_prefix=prefix)

print(model_path)

### Deploy model on SageMaker

In [None]:
from sagemaker.xgboost.model import XGBoostModel

xgb_model = XGBoostModel(
    model_data=model_path,
    entry_point='xgb-script.py',
    framework_version='1.3-1',
    role=sagemaker.get_execution_role())

In [None]:
from time import strftime,gmtime

xgb_endpoint_name = 'xgb-{}-{}'.format(prefix, strftime("%Y-%m-%d-%H-%M-%S", gmtime()))

xgb_predictor = xgb_model.deploy(
                     endpoint_name=xgb_endpoint_name,
                     initial_instance_count=1, 
                     instance_type='ml.t2.medium')

print(xgb_endpoint_name)

### Predict with model

In [None]:
# Load some samples, drop labels, and one-hot encode
payload = val_data[:10].drop(['y_yes'], axis=1)
payload = payload.to_csv(header=False, index=False).rstrip('\n')

print(payload)

In [None]:
xgb_predictor.serializer = sagemaker.serializers.CSVSerializer()
xgb_predictor.deserializer = sagemaker.deserializers.CSVDeserializer()

response = xgb_predictor.predict(payload)

print(response)

In [None]:
xgb_predictor.delete_endpoint()