In [52]:
%%sh

# Name of algo -> ECR
algorithm_name=sm-byoc-scikit

cd container

chmod +x randomForest-Petrol/train
chmod +x randomForest-Petrol/serve

account=$(aws sts get-caller-identity --query Account --output text)

# Region, defaults to us-west-2
region=$(aws configure get region)
region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

Login Succeeded
Sending build context to Docker daemon  32.26kB
Step 1/11 : FROM ubuntu:18.04
 ---> fbf60236a8e3
Step 2/11 : MAINTAINER Amazon AI <sage-learner@amazon.com>
 ---> Using cache
 ---> d465e2bd9823
Step 3/11 : RUN apt-get -y update && apt-get install -y --no-install-recommends          wget          python3-pip          python3-setuptools          nginx          ca-certificates     && rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> 6d544fd54850
Step 4/11 : RUN ln -s /usr/bin/python3 /usr/bin/python
 ---> Using cache
 ---> 27081b22e040
Step 5/11 : RUN ln -s /usr/bin/pip3 /usr/bin/pip
 ---> Using cache
 ---> f420a4bfc442
Step 6/11 : RUN pip --no-cache-dir install numpy==1.16.2 scipy==1.2.1 scikit-learn==0.20.2 pandas flask gunicorn
 ---> Using cache
 ---> d0f49f083d85
Step 7/11 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> cdeac2024895
Step 8/11 : ENV PYTHONDONTWRITEBYTECODE=TRUE
 ---> Using cache
 ---> fb62a820afcd
Step 9/11 : ENV PATH="/opt/program:${PATH}"
 ---> U

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



In [53]:
# S3 prefix
prefix = "DEMO-scikit-byo-rf"

# Define IAM role
import boto3
import re

import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role

role = get_execution_role()
print(role)

arn:aws:iam::906815961619:role/service-role/AmazonSageMaker-ExecutionRole-20210103T001835


In [54]:
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()

In [55]:
WORK_DIRECTORY = "data" #local directory with data

data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

In [56]:
account = sess.boto_session.client("sts").get_caller_identity()["Account"]
region = sess.boto_session.region_name
image = "906815961619.dkr.ecr.us-east-1.amazonaws.com/sm-byoc-scikit:latest".format(account, region)

rf = sage.estimator.Estimator(
    image,
    role,
    1,
    "ml.c4.2xlarge",
    output_path="s3://{}/output".format(sess.default_bucket()),
    sagemaker_session=sess,
)

rf.fit(data_location)

2021-07-18 23:45:49 Starting - Starting the training job...
2021-07-18 23:46:12 Starting - Launching requested ML instancesProfilerReport-1626651949: InProgress
......
2021-07-18 23:47:13 Starting - Preparing the instances for training......
2021-07-18 23:48:13 Downloading - Downloading input data
2021-07-18 23:48:13 Training - Downloading the training image...
2021-07-18 23:48:49 Uploading - Uploading generated training model
2021-07-18 23:48:49 Completed - Training job completed
[34mworking[0m
[34mStarting the training.[0m
[34merror with reading in dataset[0m
[34m2[0m
[34m/opt/ml/input/data/training/.ipynb_checkpoints[0m
[34m<class 'str'>[0m
[34m/opt/ml/input/data/training/petrol_consumption.csv[0m
[34m<class 'str'>[0m
[34m['/opt/ml/input/data/training/.ipynb_checkpoints', '/opt/ml/input/data/training/petrol_consumption.csv'][0m
[34m['/opt/ml/input/data/training/petrol_consumption.csv'][0m
[34m48[0m
[34mIndex(['Petrol_tax', 'Average_income', 'Paved_Highways',
 

In [57]:
from sagemaker.predictor import csv_serializer
rf_pred = rf.deploy(1, "ml.m4.xlarge", serializer=csv_serializer)

---------------!

In [58]:
shape = pd.read_csv("data/petrol_consumption.csv", header=None)
shape.drop(shape.columns[[4]], axis=1, inplace=True)
shape.sample(3)

Unnamed: 0,0,1,2,3
47,7,4296,4083,0.623
24,9,4258,4686,0.517
46,9,4476,3942,0.571


In [62]:
import itertools

a = [10*i for i in range(3)]
#print(a)
b = [10+i for i in range(5)]
#print(b)
indices = [i+j for i,j in itertools.product(a,b)]
#print(indices)

test_data = shape.iloc[indices[:-1]]
test_data

Unnamed: 0,0,1,2,3
10,7.0,4512,8507,0.552
11,8.0,4391,5939,0.53
12,7.5,5126,14186,0.525
13,7.0,4817,6930,0.574
14,7.0,4207,6580,0.545
20,8.5,4341,6010,0.677
21,7.0,4593,7834,0.663
22,8.0,4983,602,0.602
23,9.0,4897,2449,0.511
24,9.0,4258,4686,0.517


In [65]:
df = pd.read_csv("data/petrol_consumption.csv")
res = df.iloc[indices, : ]['Petrol_Consumption']
res

10    580
11    471
12    525
13    508
14    566
20    649
21    540
22    464
23    547
24    460
30    571
31    554
32    577
33    628
34    487
Name: Petrol_Consumption, dtype: int64

In [60]:
print(rf_pred.predict(test_data.values).decode('utf-8'))

The csv_serializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


497.03333333333336
520.4333333333333
459.23333333333335
521.6
553.9
720.8666666666667
658.6666666666666
519.4333333333333
457.93333333333334
537.9
540.3333333333334
586.7666666666667
580.0
593.3

