In [1]:
#AWS import
import sagemaker
import boto3

#import interne au notebook
import numpy as np
import pandas as pd
import os

In [2]:
session = sagemaker.Session()
region = session.boto_region_name
print(f"Region AWS : {region}")

role = sagemaker.get_execution_role()
bucket = session.default_bucket()

prefix = "data_script_mode"
print(bucket)

Region AWS : us-east-1
sagemaker-us-east-1-222978838857


In [3]:
dataset = pd.read_csv("predictive_maintenance.csv", index_col = "UDI")
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'data/dataset.csv')).upload_file('predictive_maintenance.csv')

In [4]:
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput

processor = SKLearnProcessor(
    framework_version = "0.20.0",
    role = role,
    instance_count = 1,
    instance_type = "local")


bucket_path = 's3://{}'.format(bucket)

processor.run(code = "processing.py",
    inputs=[ProcessingInput(source=f"{bucket_path}/{prefix}/data/dataset.csv", destination="/opt/ml/processing/input")],
    outputs=[
        ProcessingOutput(output_name="train_data", source="/opt/ml/processing/train"),
        ProcessingOutput(output_name="test_data", source="/opt/ml/processing/test"),
    ],
    arguments=["--train-test-split-ratio", "0.2"],
)

INFO:sagemaker:Creating processing-job with name sagemaker-scikit-learn-2023-06-14-09-12-17-022
INFO:sagemaker.local.local_session:Starting processing job
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.
INFO:sagemaker.local.image:docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-2bifl:
    container_name: r2lqfmeve2-algo-1-2bifl
    entrypoint:
    - python3
    - /opt/ml/processing/input/code/processing.py
    - --train-test-split-ratio
    - '0.2'
    environment: []
    image: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3
    networks:
      sagemaker-local:
        aliases:
        - algo-1-2bifl
    stdin_open: true
    tty: true
    volumes:
    - /tmp/tmpfpj07679/algo-1-2bifl/config:/opt/ml/config
    - /tmp/tmpfpj07679/algo-1-2bifl/

Login Succeeded


INFO:sagemaker.local.image:image pulled: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3


Creating r2lqfmeve2-algo-1-2bifl ... 
Creating r2lqfmeve2-algo-1-2bifl ... done
Attaching to r2lqfmeve2-algo-1-2bifl
[36mr2lqfmeve2-algo-1-2bifl |[0m   import imp
[36mr2lqfmeve2-algo-1-2bifl |[0m   LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'
[36mr2lqfmeve2-algo-1-2bifl |[0m Received arguments Namespace(train_test_split_ratio=0.2)
[36mr2lqfmeve2-algo-1-2bifl |[0m Reading input data from /opt/ml/processing/input/dataset.csv
[36mr2lqfmeve2-algo-1-2bifl |[0m Splitting data into train and test sets with ratio 0.2
[36mr2lqfmeve2-algo-1-2bifl |[0m       Type  ...  Target
[36mr2lqfmeve2-algo-1-2bifl |[0m UDI         ...        
[36mr2lqfmeve2-algo-1-2bifl |[0m 7390     0  ...       0
[36mr2lqfmeve2-algo-1-2bifl |[0m 9276     0  ...       0
[36mr2lqfmeve2-algo-1-2bifl |[0m 2996     0  ...       0
[36mr2lqfmeve2-algo-1-2bifl |[0m 5317     1  ...       0
[36mr2lqfmeve2-algo-1-2bifl |[0m 357      1  ...       0
[36mr2lqfmeve2-algo-1-2bifl |[0m ...    

In [5]:
preprocessing_job_description = processor.jobs[-1].describe()

output_config = preprocessing_job_description["ProcessingOutputConfig"]

for output in output_config["Outputs"]:
    if output["OutputName"] == "train_data":
        preprocessed_training_data = output["S3Output"]["S3Uri"]
    if output["OutputName"] == "test_data":
        preprocessed_test_data = output["S3Output"]["S3Uri"]
        
#Observe the processed data 
training_features = pd.read_csv(preprocessed_training_data + "/dataset_train.csv", index_col = "UDI",nrows=10)
print("Training features shape: {}".format(training_features.shape))
training_features.drop(["Target"], axis=1)

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


Training features shape: (10, 7)


Unnamed: 0_level_0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min]
UDI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
7390,0,299.8,310.6,1707,32.5,124
9276,0,298.2,308.7,1605,29.4,47
2996,0,300.5,309.8,1550,37.4,148
5317,1,303.6,312.8,1689,33.1,16
357,1,297.6,308.4,1398,48.9,63
50,1,298.9,309.2,1412,44.1,140
7932,0,300.7,311.7,1499,38.4,205
1524,0,298.3,309.0,1519,36.9,41
7553,0,300.3,311.3,1525,40.7,105
497,1,297.5,309.2,1530,40.0,206


## Training

In [6]:
from sagemaker.sklearn.estimator import SKLearn

estimator = SKLearn(
    entry_point = "train.py",
    framework_version = "0.20.0",
    instance_count = 1,
    instance_type = "local",
    role=role
    )

estimator.fit({"train" : preprocessed_training_data, "test" : preprocessed_test_data})

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2023-06-14-09-28-19-927
INFO:sagemaker.local.local_session:Starting training job
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.
INFO:sagemaker.local.image:docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-b09s5:
    command: train
    container_name: ldla6mkznv-algo-1-b09s5
    environment:
    - '[Masked]'
    - '[Masked]'
    image: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3
    networks:
      sagemaker-local:
        aliases:
        - algo-1-b09s5
    stdin_open: true
    tty: true
    volumes:
    - /tmp/tmpjvpdbx3l/algo-1-b09s5/input:/opt/ml/input
    - /tmp/tmpjvpdb

Creating ldla6mkznv-algo-1-b09s5 ... 
Creating ldla6mkznv-algo-1-b09s5 ... done
Attaching to ldla6mkznv-algo-1-b09s5
[36mldla6mkznv-algo-1-b09s5 |[0m 2023-06-14 09:28:23,489 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
[36mldla6mkznv-algo-1-b09s5 |[0m 2023-06-14 09:28:23,493 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36mldla6mkznv-algo-1-b09s5 |[0m 2023-06-14 09:28:23,502 sagemaker-training-toolkit INFO     instance_groups entry not present in resource_config
[36mldla6mkznv-algo-1-b09s5 |[0m 2023-06-14 09:28:23,502 sagemaker_sklearn_container.training INFO     Invoking user training script.
[36mldla6mkznv-algo-1-b09s5 |[0m 2023-06-14 09:28:23,755 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36mldla6mkznv-algo-1-b09s5 |[0m 2023-06-14 09:28:23,764 sagemaker-training-toolkit INFO     instance_groups entry not present in resource_config
[36mldla6mkznv-algo-1

[36mldla6mkznv-algo-1-b09s5 |[0m   import imp
[36mldla6mkznv-algo-1-b09s5 |[0m   LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'
[36mldla6mkznv-algo-1-b09s5 |[0m   return self.partial_fit(X, y)
[36mldla6mkznv-algo-1-b09s5 |[0m   return self.fit(X, **fit_params).transform(X)
[36mldla6mkznv-algo-1-b09s5 |[0m   X_test = sc.transform(X_test)
[36mldla6mkznv-algo-1-b09s5 |[0m accuracy on test is : 0.9625
[36mldla6mkznv-algo-1-b09s5 |[0m   pickler.file_handle.write(chunk.tostring('C'))
[36mldla6mkznv-algo-1-b09s5 |[0m   pickler.file_handle.write(chunk.tostring('C'))
[36mldla6mkznv-algo-1-b09s5 |[0m   pickler.file_handle.write(chunk.tostring('C'))
[36mldla6mkznv-algo-1-b09s5 |[0m   pickler.file_handle.write(chunk.tostring('C'))
[36mldla6mkznv-algo-1-b09s5 |[0m   pickler.file_handle.write(chunk.tostring('C'))
[36mldla6mkznv-algo-1-b09s5 |[0m   pickler.file_handle.write(chunk.tostring('C'))
[36mldla6mkznv-algo-1-b09s5 |[0m   pickler.file_handle.write(ch

INFO:root:creating /tmp/tmpjvpdbx3l/artifacts/output/data
INFO:root:copying /tmp/tmpjvpdbx3l/algo-1-b09s5/output/success -> /tmp/tmpjvpdbx3l/artifacts/output
INFO:root:copying /tmp/tmpjvpdbx3l/model/model.joblib -> /tmp/tmpjvpdbx3l/artifacts/model


===== Job Complete =====


In [134]:
training_job_description = estimator.jobs[-1].describe()
training_job_description
model_data_s3_uri = "{}".format(training_job_description["ModelArtifacts"]["S3ModelArtifacts"])
model_data_s3_uri

's3://sagemaker-us-east-1-222978838857/sagemaker-scikit-learn-2023-06-13-14-50-37-435/model.tar.gz'

## Deploying the model

In [135]:
from sagemaker.serializers import CSVSerializer

Predictor = estimator.deploy(
    initial_instance_count = 1,
    instance_type = "local")

INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2023-06-13-14-50-50-532
INFO:sagemaker:Creating endpoint-config with name sagemaker-scikit-learn-2023-06-13-14-50-50-532
INFO:sagemaker:Creating endpoint with name sagemaker-scikit-learn-2023-06-13-14-50-50-532
INFO:sagemaker.local.image:serving
INFO:sagemaker.local.image:creating hosting dir in /tmp/tmpnz8btjxj
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.
INFO:sagemaker.local.image:docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-qmx3v:
    command: serve
    container_name: ft680fky0l-algo-1-qmx3v
    environment:
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    image: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3
    networks:
      sagemaker-local

Attaching to ft680fky0l-algo-1-qmx3v
[36mft680fky0l-algo-1-qmx3v |[0m Processing /opt/ml/code
[36mft680fky0l-algo-1-qmx3v |[0m   Preparing metadata (setup.py) ... [?25ldone
[36mft680fky0l-algo-1-qmx3v |[0m [?25hBuilding wheels for collected packages: train
[36mft680fky0l-algo-1-qmx3v |[0m   Building wheel for train (setup.py) ... [?25ldone
[36mft680fky0l-algo-1-qmx3v |[0m [?25h  Created wheel for train: filename=train-1.0.0-py2.py3-none-any.whl size=4179 sha256=aa80424700c298ab7f79682f24137c0fea8a09e1b0e25cc863739872d0a99dc6
[36mft680fky0l-algo-1-qmx3v |[0m   Stored in directory: /tmp/pip-ephem-wheel-cache-ddncl6he/wheels/3e/0f/51/2f1df833dd0412c1bc2f5ee56baac195b5be563353d111dca6
[36mft680fky0l-algo-1-qmx3v |[0m Successfully built train
[36mft680fky0l-algo-1-qmx3v |[0m Installing collected packages: train
[36mft680fky0l-algo-1-qmx3v |[0m Successfully installed train-1.0.0
[36mft680fky0l-algo-1-qmx3v |[0m   import imp
[36mft680fky0l-algo-1-qmx3v |[0m   LARGE_S

INFO:sagemaker.local.entities:Checking if serving container is up, attempt: 10


[36mft680fky0l-algo-1-qmx3v |[0m 2023-06-13 14:50:56,474 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36mft680fky0l-algo-1-qmx3v |[0m   import imp
[36mft680fky0l-algo-1-qmx3v |[0m   LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'
[36mft680fky0l-algo-1-qmx3v |[0m 172.18.0.1 - - [13/Jun/2023:14:50:57 +0000] "GET /ping HTTP/1.1" 200 0 "-" "python-urllib3/1.26.8"
!

In [136]:
Predictor.endpoint_name

'sagemaker-scikit-learn-2023-06-13-14-50-50-532'

In [137]:
testing_features = pd.read_csv(preprocessed_test_data + "/dataset_test.csv", index_col = "UDI")
print(testing_features["Target"].iloc[216])

testing_features = testing_features.drop(["Target"], axis=1)

Predictor.predict([testing_features.iloc[216].tolist()])

1
[36mft680fky0l-algo-1-qmx3v |[0m 2023-06-13 14:59:13,167 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36mft680fky0l-algo-1-qmx3v |[0m   import imp
[36mft680fky0l-algo-1-qmx3v |[0m   LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'
[36mft680fky0l-algo-1-qmx3v |[0m 172.18.0.1 - - [13/Jun/2023:14:59:14 +0000] "POST /invocations HTTP/1.1" 200 136 "-" "python-urllib3/1.26.8"


array([0])

In [138]:
Predictor.delete_model()
Predictor.delete_endpoint()

INFO:sagemaker:Deleting model with name: sagemaker-scikit-learn-2023-06-13-14-50-50-532
INFO:sagemaker:Deleting endpoint configuration with name: sagemaker-scikit-learn-2023-06-13-14-50-50-532
INFO:sagemaker:Deleting endpoint with name: sagemaker-scikit-learn-2023-06-13-14-50-50-532


Gracefully stopping... (press Ctrl+C again to force)
