# build Sagemaker pipeline

In [1]:
!pip install -U sagemaker -q

## Setup 

In [3]:
import os 
import numpy as np 
import sagemaker, boto3, json 
from sagemaker.session import Session

sagemaker_session = Session()
aws_role =  'arn:aws:iam::527657206104:role/service-role/AmazonSageMaker-ExecutionRole-20230329T093409' #sagemaker.get_execution_role()
aws_region = boto3.Session().region_name
sess = sagemaker.Session()
sess, aws_role

(<sagemaker.session.Session at 0x7eff71374b80>,
 'arn:aws:iam::527657206104:role/service-role/AmazonSageMaker-ExecutionRole-20230329T093409')

## Creating processing step 

In [5]:
'''
Usning PipelineSession 
SageMaker Pipelines tries to find a previous run of your current pipeline step with the same values for certain attributes. 
If found, SageMaker Pipelines propagates the outputs from the previous run rather than recomputing the step
https://docs.aws.amazon.com/sagemaker/latest/dg/pipelines-caching.html
'''
from sagemaker.workflow.pipeline_context import PipelineSession
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.tensorflow.processing import TensorFlowProcessor
from sagemaker.workflow.steps import ProcessingStep


In [7]:
s3_manifest_file_path = 's3://computer-vision-bootcamp/new_dataset/supermarket-dataset/manifests/output/output.manifest'
s3_images_path = 's3://computer-vision-bootcamp/new_dataset/'
bucket_name = 'computer-vision-bootcamp'

In [8]:
# first step to define data processor
processor_instance_count = 1
processor_instance_type = 'local'#'ml.m5.large'

processor = TensorFlowProcessor(framework_version = '2.3',
                                role = aws_role,
                                base_job_name = 'supermarket-image-augmantation',
                                py_version = 'py37',
                                instance_count = processor_instance_count,
                                instance_type = processor_instance_type ,
                                sagemaker_session = PipelineSession()
                               )

In [11]:
# define processor step 
processor_args = processor.run(
    code = 'preprocessing.py',
    inputs=[
        ProcessingInput(
            input_name = 'manifest',
            source = s3_manifest_file_path,
            destination = '/opt/ml/processing/input/manifest'
        ),
        ProcessingInput(
            input_name = 'images',
            source = s3_images_path,
            destination = '/opt/ml/processing/input/images'
        )
    ],
    
    outputs=[
        ProcessingOutput(
            output_name='augmented_train_images',
            source='/opt/ml/processing/output',
            destination= f'{bucket_name}/Processing-job-output/'
            #,s3_upload_mode="Continuous"
        )

    ],
    arguments=[
         "--num_augmentations_per_img", str(10),
        "--output_s3_bucket_name", bucket_name        
    ]


)

INFO:sagemaker.processing:Uploaded None to s3://sagemaker-us-east-1-527657206104/supermarket-image-augmantation-2023-05-02-08-18-45-627/source/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-us-east-1-527657206104/supermarket-image-augmantation-2023-05-02-08-18-45-627/source/runproc.sh
INFO:sagemaker:Creating processing-job with name supermarket-image-augmantation-2023-05-02-08-18-45-627
INFO:sagemaker.local.local_session:Starting processing job

KeyboardInterrupt



In [None]:

data_process_step = ProcessingStep(
    name="supermarket-ImageAugmentation",
    step_args=data_processor_args
)

## Prepare Training Job 

<font color='red'>**You can train using diffrent ways (use built-in algorithm , use pre-trained model , use your own code)**<font/>



### start training job using pretrained model provided by aws 

## Fine-tune the pre-trained model on a custom dataset
***

Here we discuss how a model can be finetuned to a custom dataset with any number of classes.

Transfer learning algorithm removes the object detection head of the pre-trained model and attaches a new randomly initialized head with number of classes same as the custom dataset. The fine-tuning step fine-tunes the last layer parameters while keeping the parameters of the rest of the model frozen, and returns the fine-tuned model. The objective during finetuning is to minimize box prediction error on the input data.

- **Input** – A directory with sub-directory images and a file annotations.json.

- **Output** – A fine-tuned model that can be deployed for inference or can be further trained using incremental training. A file mapping class indexes to class labels is saved along with the models.

The input directory should look like below if the training data contains two images. The names of .png files can be anything.

The s3 path should look like `s3://bucket_name/input_directory/`. Note the trailing `/` is required.

    input_directory
        |--images
            |--abc.png
            |--def.png
        |--annotations.json

The annotations.json file should have information for bounding_boxes and their class labels. It should have a dictionary with keys "images" and "annotations". Value for the "images" key should be a list of entries, one for each image of the form {"file_name": image_name, "height": height, "width": width, "id": image_id}. Value of the 'annotations' key should be a list of entries, one for each bounding box of the form {"image_id": image_id, "bbox": [xmin, ymin, xmax, ymax], "category_id": bbox_label}.

We provide pennfudanped dataset as a default dataset for fine-tuning the model. PennFudanPed comprises images of pedestrians. The dataset has been downloaded from here.

Citation: @ONLINE {pennfudanped, author = "Liming Wang1, Jianbo Shi2, Gang Song2, and I-fan Shen1", title = "Penn-Fudan Database for Pedestrian Detection and Segmentation", year = "2007", url = "https://www.cis.upenn.edu/~jshi/ped_html/" }

***

### Retrieve Training artifacts
***
Here, for the selected model, we retrieve the training docker container, the training algorithm source, the pre-trained base model, and a python dictionary of the training hyper-parameters that the algorithm accepts with their default values. Note that the model_version="*" fetches the latest model. Also, we do need to specify the training_instance_type to fetch train_image_uri.
***

In [None]:
# Model Selection
model_id, model_version = "tensorflow-od1-ssd-resnet101-v1-fpn-1024x1024-coco17-tpu-8", "*"

In [None]:
from sagemaker import image_uris, model_uris, script_uris, hyperparameters


training_instance_type = 'local' #"ml.p3.xlarge"
# you can get train_image_uri manually or using image_uris.retrieve()
train_image_uri = image_uris.retrieve(
    region='us-east-1',
    framework = None,
    model_id = model_id,
    model_version= model_version,
    image_scope='training',
    instance_type=training_instance_type
)

# or you can manually selecet your uri image
# model version can be found in
# https://sagemaker.readthedocs.io/en/stable/doc_utils/pretrainedmodels.html
# available images: https://github.com/aws/deep-learning-containers/blob/master/available_images.md

# train_image_uri = '763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-training:2.11.0-gpu-py39'


# retrive training script 
training_script_uri = script_uris.retrieve(model_id= model_id, model_version=model_version,script_scope='training')

# Retrieve the pre-trained model tarball to further fine-tune
train_model_uri = model_uris.retrieve( model_id=model_id, model_version=model_version, model_scope='training')


print(training_script_uri)
# s3://jumpstart-cache-prod-us-east-1/source-directory-tarballs/tensorflow/transfer_learning/od1/v1.0.1/sourcedir.tar.gz

### Set Training parameters
***
Now that we are done with all the setup that is needed, we are ready to fine-tune our Object Detection model. To begin, let us create a [``sageMaker.estimator.Estimator``](https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html) object. This estimator will launch the training job. 

There are two kinds of parameters that need to be set for training. 

The first one are the parameters for the training job. These include: (i) Training data path. This is S3 folder in which the input data is stored, (ii) Output path: This the s3 folder in which the training output is stored. (iii) Training instance type: This indicates the type of machine on which to run the training. Typically, we use GPU instances for these training. We defined the training instance type above to fetch the correct train_image_uri. 

The second set of parameters are algorithm specific training hyper-parameters.
***

***
For algorithm specific hyper-parameters, we start by fetching python dictionary of the training hyper-parameters that the algorithm accepts with their default values. This can then be overridden to custom values.
***

In [None]:
from sagemaker import hyperparameters 

# hyperparameters = hyperparameters.retrieve_default(model_id = model_id, model_version = model_version)


# # # [Optional] Override default hyperparameters with custom values
# hyperparameters["learning_rate"] = "0.01"
# hyperparameters["batch_size"] = "32"
# hyperparameters["optimizer"] = "adam"
# hyperparameters["train_only_top_layer"] = "True"
# hyperparameters["reinitialize_top_layer"] = "True"
# hyperparameters['epochs'] = 10
# print(hyperparameters)


hyperparameters = {
    "batch_size": "3",
    "reinitialize_top_layer": "True",
    "train_only_top_layer": "False",
    "optimizer": "adam",
    "learning_rate": "0.001",
    "beta_1": "0.9",
    "beta_2": "0.999",
    "momentum": "0.9",
    "epsilon": "1e-07",
    "rho": "0.95",
    "initial_accumulator_value": "0.1",
    "early_stopping": "False",
    "early_stopping_patience": "5",
    "early_stopping_min_delta": "0.0",
    "epochs": "1"
}

### Train with Automatic Model Tuning ([HPO](https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning.html)) <a id='AMT'></a>
***
Amazon SageMaker automatic model tuning, also known as hyperparameter tuning, finds the best version of a model by running many training jobs on your dataset using the algorithm and ranges of hyperparameters that you specify. It then chooses the hyperparameter values that result in a model that performs the best, as measured by a metric that you choose. We will use a [HyperparameterTuner](https://sagemaker.readthedocs.io/en/stable/api/training/tuner.html) object to interact with Amazon SageMaker hyperparameter tuning APIs.
***

In [None]:
from sagemaker.tuner import ContinuousParameter

# Define objective metric per framework, based on which the best model will be selected.
amt_metric_definitions = {
    "metrics": [{"Name": "validation:localization_loss", "Regex": "Val_localization=([0-9\\.]+)"}],
    "type": "Minimize",
}

training_metric_definitions = [
    {"Name": "val_localization_loss", "Regex": "Val_localization=([0-9\\.]+)"},
    {"Name": "val_classification_loss", "Regex": "Val_classification=([0-9\\.]+)"},
    {"Name": "train_loss", "Regex": "loss=([0-9\\.]+)."},
]

# You can select from the hyperparameters supported by the model, and configure ranges of values to be searched for training the optimal model.(https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning-define-ranges.html)
hyperparameter_ranges = {
    "learning_rate": ContinuousParameter(0.001, 0.5, scaling_type="Logarithmic")
}


## Start Training
***
We start by creating the estimator object with all the required assets and then launch the training job. This can take up to 30 minutes.
***

In [None]:
from sagemaker.estimator import Estimator
from sagemaker.utils import name_from_base
from sagemaker.tuner import HyperparameterTuner
from sagemaker.inputs import TrainingInput
# Increase the total number of training jobs run by AMT, for increased accuracy (and training time).
max_jobs = 6
# Change parallel training jobs run by AMT to reduce total training time, constrained by your account limits.
# if max_jobs=max_parallel_jobs then Bayesian search turns to Random.
max_parallel_jobs = 1
use_amt = True 
training_job_output_path = f's3://computer-vision-bootcamp/model_training/{model_version}'
training_instance_count = 1
training_instance_type = 'local' # 'ml.p3.xlarge' ##if you want to test locally choose local
# Create Estimator to start trainting job 

estimator = Estimator(
    role = aws_role,
    image_uri = train_image_uri,
    source_dir = training_script_uri,
    model_uri = train_model_uri,
    entry_point='transfer_learning.py', # this script is provided by training_script_uri
    instance_count = training_instance_count,
    instance_type = training_instance_type,
    sagemaker_session = PipelineSession(),
    enable_sagemaker_metrics=True,
    hyperparameters=hyperparameters,
    base_job_name = f'supermarket-ssd-{model_id}',
    max_run = 360000,
    volume_size=50,
    output_path=f"{training_job_output_path}",
    metric_definitions=training_metric_definitions
)


# Now we initialized our estimator we need to define our inputs as parameter
training_data_input = TrainingInput(
    s3_data=data_process_step.properties.ProcessingOutputConfig.Outputs["augmented_train_images"].S3Output.S3Uri
)
validation_data_input = TrainingInput(
    s3_data=data_process_step.properties.ProcessingOutputConfig.Outputs["augmented_validation_images"].S3Output.S3Uri
)

print(training_data_input) ## to make sure it is correct i think i need to add /train/


# if you chooce to hyperparameter tunning use_amt = True 
if use_amt:

    hp_tuner = HyperparameterTuner(
        estimator,
        amt_metric_definitions["metrics"][0]["Name"],
        hyperparameter_ranges,
        amt_metric_definitions["metrics"],
        max_jobs=max_jobs,
        max_parallel_jobs=max_parallel_jobs,
        objective_type=amt_metric_definitions["type"],
        base_tuning_job_name=training_job_name
    )

    # Launch a SageMaker Tuning job to search for the best hyperparameters
    training_args = hp_tuner.fit({"training": training_dataset_s3_path},)
else:
    # Launch a SageMaker Training job by passing s3 path of the training data
    training_args = estimator.fit({"training": training_dataset_s3_path}, logs=True)






In [None]:
from sagemaker.workflow.steps import TrainingStep

object_detection_model_training_step = TrainingStep(
    name="super-market-training-job",
    step_args=training_args
)

## Pipeline execution

In [None]:
from sagemaker.workflow.pipeline import Pipeline

In [None]:
eager_object_detection_pipeline = Pipeline(
    name="super-market-shelf-refill",
    parameters=[
        processing_instance_type,
        processing_instance_count,
        training_instance_type,
        training_instance_count
    ],
    steps=[
        data_process_step,
        object_detection_model_training_step
    ]
)

In [None]:
eager_object_detection_pipeline.name,eager_object_detection_pipeline.steps

In [None]:
eager_object_detection_pipeline.parameters

In [None]:
import json

json.loads(eager_object_detection_pipeline.definition())

### Creating, updating and starting a pipeline

Submit the pipeline definition to the Pipeline service. The Pipeline service uses the role that is passed in to create all the jobs defined in the steps.


In [None]:
eager_object_detection_pipeline.upsert(role_arn=role)

In [None]:
execution = eager_object_detection_pipeline.start()

In [None]:
execution.describe()