In [1]:
# Import needed libraries
import os
import sys

import numpy as np
import pandas as pd

import boto3
import sagemaker

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/xdg-ubuntu/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/lindo/.config/sagemaker/config.yaml


In [2]:
import dotenv
dotenv.load_dotenv(f"{os.getcwd()}/.env")  # take environment variables from .env

AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
AWS_SAGEMAKER_EXECUTION_ROLE = os.getenv('AWS_SAGEMAKER_EXECUTION_ROLE')
os.environ['AWS_DEFAULT_REGION'] = "ap-southeast-1"

In [3]:
# s3_client_b3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, region_name="ap-southeast-1")
# sagemaker_client_b3 = boto3.client('sagemaker', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, region_name="ap-southeast-1")

s3_client_b3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
sagemaker_client_b3 = boto3.client('sagemaker', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

# Data Ingestion

In [4]:
s3_client_b3.download_file(Bucket="ins-demo-bucket", Key="demo-sagemaker/case01/iris-data-full.csv", Filename="iris-data-full.csv")

In [5]:
df = pd.read_csv(
    "iris-data-full.csv", header=None, names=["sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"]
)
df.head()

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


# Data Preparation

In [6]:
# Convert the three classes from strings to integers in {0,1,2}
df["class_cat"] = df["class"].astype("category").cat.codes
categories_map = dict(enumerate(df["class"].astype("category").cat.categories))
print(categories_map)
df.head()

{0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'}


Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class,class_cat
0,5.1,3.5,1.4,0.2,Iris-setosa,0
1,4.9,3.0,1.4,0.2,Iris-setosa,0
2,4.7,3.2,1.3,0.2,Iris-setosa,0
3,4.6,3.1,1.5,0.2,Iris-setosa,0
4,5.0,3.6,1.4,0.2,Iris-setosa,0


In [7]:
# Split the data into 80-20 train-test split
num_samples = df.shape[0]
split = round(num_samples * 0.8)
train = df.iloc[:split, :]
test = df.iloc[split:, :]
print("{} train, {} test".format(split, num_samples - split))

120 train, 30 test


In [8]:
# Write train and test CSV files
train.to_csv("train.csv", index=False)
test.to_csv("test.csv", index=False)

In [9]:
# Create a sagemaker session to upload data to S3
import sagemaker

sagemaker_session = sagemaker.Session(sagemaker_client=sagemaker_client_b3)

In [10]:
# Upload data to default S3 bucket
prefix = "ins-demo-bucket/demo-sagemaker/case01/"
training_input_path = sagemaker_session.upload_data(path="./train.csv", bucket="ins-demo-bucket", key_prefix='demo-sagemaker/case01/training')

# Train Model

In [11]:
# # Use the current execution role for training. It needs access to S3
# role = sagemaker.get_execution_role()
# print(role)

In [15]:
from sagemaker.sklearn import SKLearn

sk_estimator = SKLearn(
    entry_point="train.py",
    role=AWS_SAGEMAKER_EXECUTION_ROLE,
    instance_count=1,
    instance_type="ml.c5.xlarge",
    py_version="py3",
    framework_version="1.2-1",
    script_mode=True,
    hyperparameters={"estimators": 20},
)

# Train the estimator
sk_estimator.fit({"train": training_input_path})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-07-24-14-58-42-135


2024-07-24 14:58:45 Starting - Starting the training job...
2024-07-24 14:59:04 Starting - Preparing the instances for training...
2024-07-24 14:59:31 Downloading - Downloading input data...
2024-07-24 14:59:56 Downloading - Downloading the training image...
2024-07-24 15:00:47 Training - Training image download completed. Training in progress.
2024-07-24 15:00:47 Uploading - Uploading generated training model.[34m2024-07-24 15:00:40,601 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-07-24 15:00:40,604 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-07-24 15:00:40,608 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-07-24 15:00:40,627 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-07-24 15:00:40,843 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[3

# Deployment

In [16]:
import time

sk_endpoint_name = "sklearn-rf-model" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
sk_predictor = sk_estimator.deploy(
    initial_instance_count=1, instance_type="ml.m5.large", endpoint_name=sk_endpoint_name
)

INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2024-07-24-15-07-49-020
INFO:sagemaker:Creating endpoint-config with name sklearn-rf-model2024-07-24-15-07-49
INFO:sagemaker:Creating endpoint with name sklearn-rf-model2024-07-24-15-07-49


------!

# Endpoint Test

In [17]:
import json

client = sagemaker_session.sagemaker_runtime_client

request_body = {"Input": [[9.0, 3571, 1976, 0.525]]}
data = json.loads(json.dumps(request_body))
payload = json.dumps(data)

response = client.invoke_endpoint(
    EndpointName=sk_endpoint_name, ContentType="application/json", Body=payload
)

result = json.loads(response["Body"].read().decode())["Output"]
print("Predicted class category {} ({})".format(result, categories_map[result]))

Predicted class category 2 (Iris-virginica)


# Cleanup

In [1]:
# sk_predictor.delete_model()
# sk_predictor.delete_endpoint()