# Sesión 3 - Construyendo tus propios algoritmos

## Inicio de sesión

In [1]:
import sagemaker

try:
    role = sagemaker.get_execution_role()
except:
    role = "arn:aws:iam::827345860551:role/SageMakerExecutionRole"

sesion = sagemaker.Session()
bucket = sesion.default_bucket()
region = sesion.boto_session.region_name

Couldn't call 'get_role' to get Role ARN from role name bhernandez to get Role path.


## Subida de datos

In [2]:
import boto3
import pandas as pd
import numpy as np
import sagemaker
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split


data = load_iris()
df = pd.DataFrame(
    data=np.c_[data['data'], data['target']],
    columns= data['feature_names'] + ['Species']
    )

df.to_csv("iris.csv",index=False)

s3_path = sesion.upload_data(
    path="iris.csv",
    bucket=bucket,
    key_prefix="curso_sagemaker/data"
)

X = df.loc[:, df.columns != 'Species']
y = df.loc[:, df.columns == 'Species']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=28)

# Save data
pd.concat([X_train, y_train], axis=1).to_csv(
    "train.csv",
    index=False)

s3_train_path = sesion.upload_data(
    path="train.csv",
    bucket=bucket,
    key_prefix="curso_sagemaker/data"
)
pd.concat([X_test, y_test], axis=1).to_csv(
    "test.csv",
    index=False)

s3_test_path = sesion.upload_data(
    path="test.csv",
    bucket=bucket,
    key_prefix="curso_sagemaker/data"
)

print(s3_path)
print(s3_train_path)
print(s3_test_path)

s3://sagemaker-eu-west-1-827345860551/curso_sagemaker/data/iris.csv
s3://sagemaker-eu-west-1-827345860551/curso_sagemaker/data/train.csv
s3://sagemaker-eu-west-1-827345860551/curso_sagemaker/data/test.csv


## Construcción de la imagen docker

In [7]:
import getpass
import os

password = getpass.getpass()
command = "sudo -S sh ./build_and_push.sh"
os.system('cd src; echo %s | %s' % (password, command))

········


https://docs.docker.com/engine/reference/commandline/login/#credentials-store



Login Succeeded
Sending build context to Docker daemon  11.78kB
Step 1/11 : FROM python:3.8
 ---> 271c1bcd4489
Step 2/11 : COPY ./requirements.txt /home
 ---> Using cache
 ---> 446556bc43e2
Step 3/11 : WORKDIR /home
 ---> Using cache
 ---> 66c188ac7d22
Step 4/11 : RUN  pip install -r requirements.txt
 ---> Using cache
 ---> b679ea30b01f
Step 5/11 : COPY ./ /home
 ---> 76a36f0d31be
Step 6/11 : RUN echo "#!/bin/bash\n/usr/local/bin/python -u /home/processing.py" > /usr/bin/processing
 ---> Running in a9825cfd5706
Removing intermediate container a9825cfd5706
 ---> c56988f86d75
Step 7/11 : RUN echo "#!/bin/bash\n/usr/local/bin/python -u /home/train.py" > /usr/bin/train
 ---> Running in 84f87b4c156f
Removing intermediate container 84f87b4c156f
 ---> 28969795aca8
Step 8/11 : RUN echo "#!/bin/bash\n/usr/local/bin/python -u /home/serve.py" > /usr/bin/serve
 ---> Running in d0f0b027586a
Removing intermediate container d0f0b027586a
 ---> addddb1f5c43
Step 9/11 : RUN chmod +x /usr/bin/processing


0

## Ejecución son Sagemaker

#### Entrenamiento

In [8]:
from sagemaker.estimator import Estimator


account = sesion.boto_session.client("sts").get_caller_identity()["Account"]
image = "{}.dkr.ecr.{}.amazonaws.com/sagemaker-decision-trees:latest".format(account, region)
estimator = Estimator(
    image_uri=image,
    role=role,
    instance_count=1,
    instance_type="ml.c4.2xlarge",
    output_path="s3://{}/output".format(bucket),
    sagemaker_session=sesion,
)

estimator.fit(inputs={
    "train": s3_train_path,
    "test": s3_test_path}
)

2022-04-01 20:30:15 Starting - Starting the training job...
2022-04-01 20:30:41 Starting - Preparing the instances for trainingProfilerReport-1648845014: InProgress
......
2022-04-01 20:32:01 Downloading - Downloading input data
2022-04-01 20:32:01 Training - Downloading the training image..[34mAccuracy=0.9210526315789473[0m

2022-04-01 20:32:22 Uploading - Uploading generated training model
2022-04-01 20:32:22 Completed - Training job completed
Training seconds: 42
Billable seconds: 42


#### Trabajo de inferencia

In [9]:
from sagemaker.transformer import Transformer

output_path="s3://sagemaker-eu-west-1-827345860551/curso_sagemaker/data/output"

transformer = estimator.transformer(
    instance_count=1,
    instance_type="ml.m4.xlarge",
    output_path=output_path,
    assemble_with="Line",
    accept="text/csv",
)

transformer.transform(
    s3_test_path,
    content_type="text/csv",
)
transformer.wait()

........................
[34m * Serving Flask app 'serve' (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on
 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8080
 * Running on http://169.254.255.131:8080 (Press CTRL+C to quit)
 * Restarting with stat[0m
[35m * Serving Flask app 'serve' (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on
 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8080
 * Running on http://169.254.255.131:8080 (Press CTRL+C to quit)
 * Restarting with stat[0m
[34m * Debugger is active!
 * Debugger PIN: 113-950-310[0m
[35m * Debugger is active!
 * Debugger PIN: 113-950-310[0m
[34m169.254.255.130 - - [01/Apr/2022 20:37:59] "GET /ping HTTP/1.1" 200 -[0m
[34m169.254.255.130 - - [01/Apr/2022 20:37:59] "#033[33mGET /execution-parameters HTTP/1.1#033[0m" 404 -[0m
[34m169.254.255.130 - - [01/Apr/2022 20:37:59] "POST /invoc

## Sagemaker Pipelines

#### Processing

In [36]:
from sagemaker.processing import Processor
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep

account = sesion.boto_session.client("sts").get_caller_identity()["Account"]
image = "{}.dkr.ecr.{}.amazonaws.com/sagemaker-decision-trees:latest".format(account, region)

processor = Processor(
    image_uri=image,
    instance_type="ml.c4.xlarge",
    instance_count=1,
    entrypoint=['process'],
    role=role,
)

output_uri="s3://sagemaker-eu-west-1-827345860551/curso_sagemaker/data/output"

step_process = ProcessingStep(
    name="PreprocessData",
    processor=processor,
    inputs=[
        ProcessingInput(
            source=s3_path,
            destination="/opt/ml/processing/input"),
    ],
    outputs=[
        ProcessingOutput(
            output_name="train",
            destination=output_uri+"/train",
            source="/opt/ml/processing/output/train"),
        ProcessingOutput(
            output_name="test",
            destination=output_uri+"/test",
            source="/opt/ml/processing/output/test"),
    ],
)

#### Train

In [37]:
from sagemaker.workflow.steps import TrainingStep
from sagemaker.inputs import TrainingInput

step_train = TrainingStep(
    name="TrainSklearnModel",
    estimator=estimator,
    inputs={
        "train": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri,
            content_type="text/csv",
        ),
        "test": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
            content_type="text/csv",
        ),
    },
)

#### Create model

In [38]:
from sagemaker.model import Model
from sagemaker.inputs import CreateModelInput
from sagemaker.workflow.step_collections import CreateModelStep

model=Model(
    image_uri=image,
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    role=role,
    sagemaker_session=sesion
)

step_create_model = CreateModelStep(
    name="CreateModel",
    model=model,
    inputs = CreateModelInput(instance_type="ml.m5.large")
)

#### Transform

In [39]:
from sagemaker.inputs import TransformInput
from sagemaker.workflow.steps import TransformStep
from sagemaker.transformer import Transformer

output_data="s3://sagemaker-eu-west-1-827345860551/curso_sagemaker/output"

transformer = Transformer(
    model_name=step_create_model.properties.ModelName,
    instance_type="ml.m5.xlarge",
    accept="text/csv",
    instance_count=1,
    output_path=output_data
)

step_transform = TransformStep(
    name="TransformStep", 
    transformer=transformer,
    inputs=TransformInput(data=s3_path, content_type="text/csv")
)

#### Execute pipeline

In [40]:
import json
from pprint import pprint
from sagemaker.workflow.pipeline import Pipeline

pipeline = Pipeline(
    name="SklearnIrisPipeline",
    steps=[
        step_process,
        step_train,
        step_create_model,
        step_transform
    ],
)

pipeline.upsert(role_arn=role)
definition = json.loads(pipeline.definition())

try:
    execution = pipeline.start()
    execution.wait()
    pprint(execution.list_steps())
except:
    pprint(execution.list_steps())

[{'AttemptCount': 0,
  'EndTime': datetime.datetime(2022, 4, 1, 23, 37, 54, 709000, tzinfo=tzlocal()),
  'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:eu-west-1:827345860551:transform-job/pipelines-exqisjk7bhsn-transformstep-j6r81xqwd8'}},
  'StartTime': datetime.datetime(2022, 4, 1, 23, 34, 10, 948000, tzinfo=tzlocal()),
  'StepName': 'TransformStep',
  'StepStatus': 'Succeeded'},
 {'AttemptCount': 0,
  'EndTime': datetime.datetime(2022, 4, 1, 23, 34, 10, 409000, tzinfo=tzlocal()),
  'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:eu-west-1:827345860551:model/pipelines-exqisjk7bhsn-createmodel-mvyxu3a0ne'}},
  'StartTime': datetime.datetime(2022, 4, 1, 23, 34, 9, 726000, tzinfo=tzlocal()),
  'StepName': 'CreateModel',
  'StepStatus': 'Succeeded'},
 {'AttemptCount': 0,
  'EndTime': datetime.datetime(2022, 4, 1, 23, 34, 8, 927000, tzinfo=tzlocal()),
  'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:eu-west-1:827345860551:training-job/pipelines-exqisjk7bhsn-trainsklearn