In [None]:
%%sh
wget -N https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip
unzip -o bank-additional.zip

In [None]:
import sagemaker

print(sagemaker.__version__)

sess   = sagemaker.Session()
bucket = sess.default_bucket()                     
prefix = 'xgboost-direct-marketing'

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

data = pd.read_csv('./bank-additional/bank-additional-full.csv')

# One-hot encode
data = pd.get_dummies(data)

# Move labels to first column, which is what XGBoost expects
data = data.drop(['y_no'], axis=1)
data = pd.concat([data['y_yes'], data.drop(['y_yes'], axis=1)], axis=1)

# Shuffle and split into training and validation (95%/5%)
data = data.sample(frac=1, random_state=123)
train_data, val_data = train_test_split(data, test_size=0.05)

# Save to CSV files
train_data.to_csv('training.csv', index=False, header=True) # Need to keep column names
val_data.to_csv('validation.csv', index=False, header=True)

In [None]:
training = sess.upload_data(path='training.csv', key_prefix=prefix + "/training")
validation = sess.upload_data(path="validation.csv", key_prefix=prefix + "/validation")
output   = 's3://{}/{}/output/'.format(bucket,prefix)

In [None]:
print(training)
print(validation)
print(output)

### Train on SageMaker

In [None]:
from sagemaker.xgboost import XGBoost

role = sagemaker.get_execution_role()

xgb_estimator = XGBoost(entry_point='xgb-dm.py', 
                          role=role,
                          instance_count=1, 
                          instance_type='ml.m5.large',
                          framework_version='1.3-1',
                          py_version='py3',
                          output_path=output,
                          hyperparameters={
                              'max-depth': 5,
                              'eval-metric': 'auc'
                          })

In [None]:
xgb_estimator.fit({'training':training, 'validation':validation})

### Retrieve model artifact

In [None]:
%%sh -s $xgb_estimator.model_data
echo $1
aws s3 cp $1 export-xgboost/
cd export-xgboost
tar xvfz model.tar.gz

In [None]:
!pip -q install xgboost==1.3.1

In [None]:
import xgboost as xgb

model = xgb.Booster()
model.load_model('export-xgboost/xgb.model')

In [None]:
model.dump_model('export-xgboost/model.txt')

In [None]:
!head export-xgboost/model.txt