In [1]:
# S3 prefix
prefix = "Scikit-iris"

import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()

## Upload the data for training

In [2]:
import boto3
import numpy as np
import pandas as pd
import os
import sagemaker

os.makedirs("./data", exist_ok=True)

s3_client = boto3.client("s3")
s3_client.download_file(
    f"sagemaker-sample-files", "datasets/tabular/iris/iris.data", "./data/iris.csv"
)

df_iris = pd.read_csv("./data/iris.csv", header=None)
df_iris[4] = df_iris[4].map({"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2})
iris = df_iris[[4, 0, 1, 2, 3]].to_numpy()
np.savetxt("./data/iris.csv", iris, delimiter=",", fmt="%1.1f, %1.3f, %1.3f, %1.3f, %1.3f")

In [3]:
WORK_DIRECTORY = "data"

train_input = sagemaker_session.upload_data(
    WORK_DIRECTORY, key_prefix="{}/{}".format(prefix, WORK_DIRECTORY)
)

### Create SageMaker Scikit Estimator

In [6]:
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"
script_path = "sicikit_learn_iris.py"

sklearn = SKLearn(
    entry_point=script_path,
    framework_version=FRAMEWORK_VERSION,
    instance_type="ml.m4.xlarge",
    role=role,
    sagemaker_session=sagemaker_session,
    hyperparameters={"max_leaf_nodes": 30},
)

### Train SKLearn Estimator on Iris data

In [7]:
sklearn.fit({"train": train_input})

2022-04-13 13:12:29 Starting - Starting the training job...
2022-04-13 13:12:56 Starting - Preparing the instances for trainingProfilerReport-1649855549: InProgress
............
2022-04-13 13:14:50 Downloading - Downloading input data...
2022-04-13 13:15:30 Training - Downloading the training image......
2022-04-13 13:16:30 Uploading - Uploading generated training model[34m2022-04-13 13:16:16,678 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2022-04-13 13:16:16,682 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-04-13 13:16:16,694 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2022-04-13 13:16:17,169 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-04-13 13:16:17,193 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-04-13 13:16:17,208 sagemaker-training-too

### Using the trained model to make inference requests
### Deploy the model

Deploying the model to SageMaker hosting just requires a deploy call on the fitted model. This call takes an instance count and instance type.

In [8]:
predictor = sklearn.deploy(initial_instance_count=1, instance_type="ml.m5.xlarge")

-----!

## Choose some data and use it for a prediction

In [9]:
import itertools
import pandas as pd

shape = pd.read_csv("data/iris.csv", header=None)

a = [50 * i for i in range(3)]
b = [40 + i for i in range(10)]
indices = [i + j for i, j in itertools.product(a, b)]

test_data = shape.iloc[indices[:-1]]
test_X = test_data.iloc[:, 1:]
test_y = test_data.iloc[:, 0]

In [10]:
print(predictor.predict(test_X.values))
print(test_y.values)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 2. 2. 2. 2.
 2. 2. 2. 2. 2.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 2. 2. 2. 2.
 2. 2. 2. 2. 2.]
