## Image Classification - TensorFlow 

https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/image_classification_tensorflow/Amazon_TensorFlow_Image_Classification.ipynb


https://github.com/aws/amazon-sagemaker-examples/blob/93fc48d21bf88d07853775f11d6ef7db92110549/introduction_to_amazon_algorithms/jumpstart_image_classification/Amazon_JumpStart_Image_Classification.ipynb


https://aws.amazon.com/blogs/machine-learning/transfer-learning-for-tensorflow-image-classification-models-in-amazon-sagemaker/


https://aws.amazon.com/blogs/machine-learning/run-image-classification-with-amazon-sagemaker-jumpstart/

In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import os
import json
import logging
from datetime import datetime

<IPython.core.display.Javascript object>

In [3]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker import image_uris,  script_uris
from sagemaker import hyperparameters
from sagemaker.estimator import Estimator


<IPython.core.display.Javascript object>

In [4]:
boto3.set_stream_logger(name="botocore.credentials", level=logging.WARNING)

<IPython.core.display.Javascript object>

In [5]:
sess = sagemaker.Session()
region = sess.boto_region_name
print(region)

ap-southeast-2


<IPython.core.display.Javascript object>

In [6]:
# role_arn = sagemaker.get_execution_role()
role_arn = os.getenv("SGMKR_ROLE_ARN")

<IPython.core.display.Javascript object>

In [7]:
bucket_name = "sgmkr-course"
data_path = "sgmkr_clf_subfolders"

nepochs = 10
mini_batch_size = 8

train_instance_type = "ml.g4dn.xlarge"
job_name_prefix = "flowers-clf-js-tf-"

<IPython.core.display.Javascript object>

In [8]:
model_id = "pytorch-ic-mobilenet-v2"
model_version = "*"

<IPython.core.display.Javascript object>

In [9]:
train_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    model_id=model_id,
    model_version=model_version,
    image_scope="training",
    instance_type=train_instance_type,
)

train_source_uri = script_uris.retrieve(
    model_id=model_id, model_version=model_version, script_scope="training"
)

train_model_uri = model_uris.retrieve(
    model_id=model_id, model_version=model_version, model_scope="training"
)

print(train_image_uri)
print(train_source_uri)
print(train_model_uri)

image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/pytorch-training:1.10.0-gpu-py38
s3://jumpstart-cache-prod-ap-southeast-2/source-directory-tarballs/pytorch/transfer_learning/ic/v2.2.4/sourcedir.tar.gz
s3://jumpstart-cache-prod-ap-southeast-2/pytorch-training/v2.0.0/train-pytorch-ic-mobilenet-v2.tar.gz


<IPython.core.display.Javascript object>

In [10]:
hyperparameters = hyperparameters.retrieve_default(
    model_id=model_id, model_version=model_version
)

hyperparameters["epochs"] = "5"
print(hyperparameters)

{'train_only_top_layer': 'True', 'epochs': '5', 'learning_rate': '0.001', 'batch_size': '4', 'reinitialize_top_layer': 'Auto'}


<IPython.core.display.Javascript object>

In [11]:
s3_output_path = "s3://{}/{}/{}".format(bucket_name, data_path, "model_output")

<IPython.core.display.Javascript object>

In [12]:
clf_estimator = Estimator(
    role=role_arn,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=train_model_uri,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=train_instance_type,
    max_run=360000,
    hyperparameters=hyperparameters,
    output_path=s3_output_path,
)

<IPython.core.display.Javascript object>

In [17]:
s3_train_imgs = "s3://{}/{}/{}".format(bucket_name, data_path, "train_imgs")
s3_valid_imgs = "s3://{}/{}/{}".format(bucket_name, data_path, "valid_imgs")
data_channels = {
    "training": s3_train_imgs,
    "validation": s3_valid_imgs,
}
print(data_channels)

{'training': 's3://sgmkr-course/sgmkr_clf_subfolders/train_imgs/', 'validation': 's3://sgmkr-course/sgmkr_clf_subfolders/valid_imgs/'}


<IPython.core.display.Javascript object>

In [18]:
timestamp = (
    str(datetime.now().replace(microsecond=0)).replace(" ", "-").replace(":", "-")
)
job_name = job_name_prefix + timestamp
print(job_name)

flowers-clf-js-tf-2023-03-21-17-39-10


<IPython.core.display.Javascript object>

In [19]:
clf_estimator.fit(inputs=data_channels, logs=True, job_name=job_name)

Creating training-job with name: flowers-clf-js-tf-2023-03-21-17-39-10


2023-03-21 06:39:12 Starting - Starting the training job...
2023-03-21 06:39:27 Starting - Preparing the instances for training...
2023-03-21 06:40:19 Downloading - Downloading input data...
2023-03-21 06:40:39 Training - Downloading the training image..................
2023-03-21 06:43:55 Training - Training image download completed. Training in progress...[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2023-03-21 06:44:07,323 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2023-03-21 06:44:07,349 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2023-03-21 06:44:07,354 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2023-03-21 06:44:07,574 sagemaker-training-toolkit INFO     Invoking user script[0m
[34mTraining Env:[0m
[34m{
    "additional_framework_paramete


2023-03-21 06:44:26 Uploading - Uploading generated training model[34mtrain Loss: 1.4059 train Acc: 0.3103[0m
[34mval Loss: 1.0486 val Acc: 0.4286[0m
[34mEpoch 1/4[0m
[34mtrain Loss: 1.0888 train Acc: 0.5172[0m
[34mval Loss: 0.9716 val Acc: 0.7143[0m
[34mEpoch 2/4[0m
[34mtrain Loss: 0.9761 train Acc: 0.4138[0m
[34mval Loss: 0.8658 val Acc: 0.8571[0m
[34mEpoch 3/4[0m
[34mtrain Loss: 0.9784 train Acc: 0.5172[0m
[34mval Loss: 0.9652 val Acc: 0.5714[0m
[34mEpoch 4/4[0m
[34mtrain Loss: 1.0012 train Acc: 0.4828[0m
[34mval Loss: 0.9213 val Acc: 0.7143[0m
[34mTraining complete in 0m 8s[0m
[34mBest val Acc: 0.857143[0m
[34mInfo file not found at '_input_model_extracted/__models_info__.json'.[0m
[34m2023-03-21 06:44:20,125 sagemaker-training-toolkit INFO     Reporting training SUCCESS[0m

2023-03-21 06:44:37 Completed - Training job completed
Training seconds: 258
Billable seconds: 258


<IPython.core.display.Javascript object>

In [20]:
infer_instance_type = "ml.t2.medium"

<IPython.core.display.Javascript object>

In [21]:
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=infer_instance_type,
)

deploy_source_uri = script_uris.retrieve(
    model_id=model_id, model_version=model_version, script_scope="inference"
)

<IPython.core.display.Javascript object>

In [22]:
model_name = job_name
endpoint_name = job_name

<IPython.core.display.Javascript object>

In [24]:
clf_predictor = clf_estimator.deploy(
    initial_instance_count=1,
    instance_type=infer_instance_type,
    entry_point="inference.py",
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    endpoint_name=endpoint_name,
    model_name=model_name,
)

Creating model with name: flowers-clf-js-tf-2023-03-21-17-39-10
Creating endpoint-config with name flowers-clf-js-tf-2023-03-21-17-39-10
Creating endpoint with name flowers-clf-js-tf-2023-03-21-17-39-10


---------!

<IPython.core.display.Javascript object>

In [25]:
sgmkr_runt = boto3.client("runtime.sagemaker")

<IPython.core.display.Javascript object>

In [26]:
with open("images/rose.jpg", "rb") as image:
    payload = image.read()
    # payload = bytearray(payload)

response = sgmkr_runt.invoke_endpoint(
    EndpointName=endpoint_name,
    # ContentType = 'image/jpeg',
    ContentType="application/x-image",
    Accept="application/json;verbose",
    Body=payload,
)

prediction = json.loads(response["Body"].read().decode())
print(prediction)

{'probabilities': [0.4329131245613098, 0.3074776530265808, 0.2596091628074646], 'labels': ['daisy', 'rose', 'sunflower'], 'predicted_label': 'daisy'}


<IPython.core.display.Javascript object>

In [27]:
clf_predictor.delete_endpoint()

Deleting endpoint configuration with name: flowers-clf-js-tf-2023-03-21-17-39-10
Deleting endpoint with name: flowers-clf-js-tf-2023-03-21-17-39-10


<IPython.core.display.Javascript object>