In [8]:
import os 
import time 
import boto3
import logging 
import sagemaker
import numpy as np
import pandas as pd
from smexperiments.trial import Trial
from smexperiments.experiment import Experiment 


In [9]:
from smexperiments.tracker import Tracker
from sagemaker.analytics import ExperimentAnalytics
from smexperiments.trial_component import TrialComponent

In [None]:
sess = sagemaker.Session()
bucket = 'sagemaker-practice-bucket-nuga'
region = sess.boto_session.region_name
sm = boto3.Session().client(service_name='sagemaker', region_name=region)

In [26]:
role = 'arn:aws:iam::928389782162:role/service-role/AmazonSageMaker-ExecutionRole-20230807T162115'

##### TRACK EXPERIMENTS

In [11]:
timestamp = int(time.time())
pipeline_name = f'FITNESS-PROJECT-{timestamp}'
pipeline_name

'FITNESS-PROJECT-1698176833'

In [12]:
experiment = Experiment.create(
    experiment_name=pipeline_name,
    description='Testing sagemaker pipeline',
    sagemaker_boto_client=sm
)

trial = Trial.create(
    trial_name=f'trial-{timestamp}',
    experiment_name=pipeline_name,
    sagemaker_boto_client=sm
)

In [13]:
experiment_config_process = {
    'ExperimentName': pipeline_name,
    'TrialName': trial.trial_name,
    'TrialComponentDisplayName': 'Process'
}

experiment_config_train = {
    'ExperimentName': pipeline_name,
    'TrialName': trial.trial_name,
    'TrialComponentDisplayName': 'Train'
}

experiment_config_evaluate = {
    'ExperimentName': pipeline_name,
    'TrialName': trial.trial_name,
    'TrialComponentDisplayName': 'Evaluate'
}

experiment_config_register_model = {
    'ExperimentName': pipeline_name,
    'TrialName': trial.trial_name,
    'TrialComponentDisplayName': 'Register Model'
}

In [20]:
s3_input_data_uri = f's3://{bucket}/fitness_project/data/'

In [21]:
!aws s3 ls $s3_input_data_uri

2023-10-24 20:44:05      48039 fitness_class_2212.csv


##### PARAMETERIZWE PIPELINE EXECUTION

In [22]:
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
    ParameterFloat
)

In [23]:
exp_name = ParameterString(
    name='ExperimentName',
    default_value=pipeline_name
)

input_data = ParameterString(
    name='InputData',
    default_value=s3_input_data_uri
)

instance_type = ParameterString(
    name='InstanceType',
    default_value='ml.t3.medium'
)

instance_count = ParameterInteger(
    name='InstanceCount',
    default_value=1
)


##### PROCESSSING STEP

In [28]:
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput

In [33]:
processor = SKLearnProcessor(
    framework_version='0.23-1',
    role=role,
    instance_type=instance_type,
    instance_count=instance_count,
    env={'AWS_DEFAULT_REGION':region}    
)

input_data = [
    ProcessingInput(
    input_name='input_data', 
    source=s3_input_data_uri,  
    destination='/opt/ml/processing/input'
)
]

output_data = [
    ProcessingOutput(
    source='/opt/ml/processing/output/train',  
    destination=s3_input_data_uri,  
    output_name='output_data' 
),
    ProcessingOutput(
        source='/opt/ml/processing/output/test',  
        destination=s3_input_data_uri,  
        output_name='output_data' 
    )
]

INFO:botocore.credentials:Found credentials in environment variables.


sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/Labi/Library/Application Support/sagemaker/config.yaml


INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


In [34]:
processing_step = ProcessingStep(
    name='ProcessingStep',
    code='preprocess_script.py', 
    processor=processor,
    inputs=input_data,
    outputs=output_data,
    # job_arguments=['--input-data', '/opt/ml/processing/input', '--output-data', '/opt/ml/processing/output']
)
processing_step

ProcessingStep(name='ProcessingStep', display_name=None, description=None, step_type=<StepTypeEnum.PROCESSING: 'Processing'>, depends_on=None)

##### TRAINING STEP

In [None]:
metrics_definition = {
    'Name': 
}