In [None]:
import logging

logging.getLogger("sagemaker.config").setLevel(logging.WARNING)
logging.getLogger("sagemaker.experiments.run").setLevel(logging.WARNING)

In [4]:
import sagemaker
import pandas as pd
import numpy as np
import boto3
from datetime import datetime
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import os
import time
from sagemaker.experiments.run import Run
from sagemaker.feature_store.feature_group import FeatureGroup

  from IPython.core.display import display, HTML


In [5]:
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
role_arn = sagemaker.get_execution_role()
region = sagemaker_session.boto_region_name
s3_client = boto3.client('s3', region_name=region)
sagemaker_client = boto3.client('sagemaker')

INFO:sagemaker:Created S3 bucket: sagemaker-us-east-1-905418230844


In [7]:
!aws s3 cp customer_data.csv s3://sagemaker-us-east-1-891377032677/CustomerRiskRating/v1/

upload: ./customer_data.csv to s3://sagemaker-studio-6b1c7e70/CustomerRiskRating/v1/customer_data.csv


Kick start a Processing job

In [None]:
_datetime = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')

_prefix = "pre-processing"

with Run(
    experiment_name="CustomerRiskRating",
    run_name=f"{_prefix}-{_datetime}",
    sagemaker_session=sagemaker_session,
) as run:
    
    job_name = f"{_prefix}-{_datetime}"
    
    sklearn_processor = SKLearnProcessor(
        framework_version='1.0-1',
        role=role_arn,
        instance_type="ml.t3.medium",
        instance_count=1
    )
    
    sklearn_processor.run(
        code='preprocessing.py',
        job_name=job_name,
        inputs=[
            ProcessingInput(
                source="customer_data.csv",
                destination='/opt/ml/processing/input'
                
            )
        ],
        outputs=[
            ProcessingOutput(
                destination="s3://sagemaker-us-east-1-891377032677/customerriskrating/processingjob",
                source='/opt/ml/processing/output',
            )
        ],
        arguments=[
            "--train_size", str(0.7),
            "--val_size", str(0.2),
            "--test_size", str(0.1),
            "--random_state", str(10),
            "--target_col", "LABEL"
        ]
    )
    
    processing_job_description = sklearn_processor.jobs[-1].describe()
                
                
        
        
    
    
    