### Data Preparation

In [1]:
import pandas as pd
from sklearn.datasets import load_iris

In [2]:
iris = load_iris(as_frame=True)["data"]
target = load_iris(as_frame=True)["target"]
data = pd.concat([target, iris], axis=1)

In [3]:
filepath_predictors = "../data/predictors.csv"
iris.to_csv(filepath_predictors, header=False, index=False)

In [4]:
filepath_data = "../data/data.csv"
data.to_csv(filepath_data, header=False, index=False)

### Sagemaker Training

In [5]:
import sagemaker
from sagemaker.image_uris import retrieve
from sagemaker.inputs import TrainingInput
from sagemaker.estimator import Estimator

In [6]:
session = sagemaker.Session()

In [7]:
region_name = session.boto_region_name
region_name

'us-east-1'

In [8]:
container_image = retrieve(framework="xgboost", region=region_name, version="latest")
container_image

'811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest'

In [9]:
bucket = session.default_bucket()
bucket

'sagemaker-us-east-1-885248014373'

In [10]:
uploaded_data = session.upload_data(path=filepath_data, bucket=bucket, key_prefix="training")
uploaded_data

's3://sagemaker-us-east-1-885248014373/training/data.csv'

In [11]:
input_data = TrainingInput(s3_data=uploaded_data, content_type="text/csv")

In [12]:
role = "arn:aws:iam::885248014373:role/service-role/AmazonSageMaker-ExecutionRole-20210305T230941"

In [13]:
xgboost = Estimator(
    image_uri=container_image,
    role=role,
    instance_type="ml.m5.large", 
    instance_count=1,
    output_path=f"s3://{bucket}/output",
    sagemaker_session=session)   

In [14]:
xgboost.set_hyperparameters(num_round=5, max_depth=5)

In [15]:
xgboost.fit({"train": input_data}) 

2021-06-29 17:14:59 Starting - Starting the training job...
2021-06-29 17:15:30 Starting - Launching requested ML instancesProfilerReport-1624986899: InProgress
......
2021-06-29 17:16:30 Starting - Preparing the instances for training.........
2021-06-29 17:18:18 Downloading - Downloading input data
2021-06-29 17:18:18 Training - Downloading the training image..[34mArguments: train[0m
[34m[2021-06-29:17:18:35:INFO] Running standalone xgboost training.[0m
[34m[2021-06-29:17:18:35:INFO] Path /opt/ml/input/data/validation does not exist![0m
[34m[2021-06-29:17:18:35:INFO] File size need to be processed in the node: 0.0mb. Available memory size in the node: 162.54mb[0m
[34m[2021-06-29:17:18:35:INFO] Determined delimiter of CSV input is ','[0m
[34m[17:18:35] S3DistributionType set as FullyReplicated[0m
[34m[17:18:35] 150x4 matrix with 600 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[17:18:35] src/tree/updater_prune.cc:74: tree pru

In [16]:
xgboost.latest_training_job.job_name

'xgboost-2021-06-29-17-14-59-135'

In [17]:
xgboost.model_data

's3://sagemaker-us-east-1-885248014373/output/xgboost-2021-06-29-17-14-59-135/output/model.tar.gz'

### Prediction

In [18]:
import boto3

In [19]:
runtime = boto3.client("sagemaker-runtime")

In [21]:
with open(filepath_predictors, "r") as f:
    dados = f.read()

In [None]:
EndpointName = ""

In [None]:
response = runtime.invoke_endpoint(EndpointName=EndpointName, Body=dados)

In [None]:
response["Body"].read().decode("utf-8")