# SageMaker JumpStart

## Setup

Upgrade some libraries for running this notebook.

In [None]:
!pip install sagemaker --upgrade
!pip install ipywidgets

In order to train and host with Amazon Sagemaker, you need to set up and authenticate to use AWS services.

In [None]:
import sagemaker, boto3, json
from sagemaker import get_execution_role

aws_role = get_execution_role()
aws_region = boto3.Session().region_name
sess = sagemaker.Session()

print('-----AWS authentication info------')
print('Role: ' + aws_role)
print('Region: ' + aws_region)
print('----------------------------------')

## Select a pre-trained model

In [None]:
import IPython
from ipywidgets import Dropdown

# JumpStartの model_manifest ファイルをダウンロードします
boto3.client("s3").download_file(
    f"jumpstart-cache-prod-{aws_region}", "models_manifest.json", "models_manifest.json"
)
with open("models_manifest.json", "rb") as json_file:
    model_list = json.load(json_file)

# manifestファイルから全ての画像分類モデルを選択します
ic_models_all_versions, ic_models = [
    model["model_id"] for model in model_list if "-ic-" in model["model_id"]
], []
[ic_models.append(model) for model in ic_models_all_versions if model not in ic_models]

# ユーザ選択のため、model-idsのドロップダウンリストを表示します
dropdown = Dropdown(
    options=ic_models,
    value=model_id,
    description="JumpStart Image Classification Models:",
    style={"description_width": "initial"},
    layout={"width": "max-content"},
)
display(IPython.display.Markdown("## Select a JumpStart pre-trained model from the dropdown below"))
display(dropdown)

In [None]:
from sagemaker import image_uris, model_uris, script_uris, hyperparameters
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base

# model_version="*" は最新のモデルバージョンを取得します
model_id, model_version = dropdown.value, "*"

endpoint_name = name_from_base(f"jumpstart-example-{model_id}")

training_instance_type = "ml.g4dn.2xlarge"

# training 用Dockerコンテナを取得します
train_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="training",
    model_id=model_id,
    model_version=model_version,
    instance_type=training_instance_type,
)
# training 用スクリプトを取得します
train_source_uri = script_uris.retrieve(
    model_id=model_id, 
    model_version=model_version, 
    script_scope="training"
)
# ベースモデルのuriを取得します
base_model_uri = model_uris.retrieve(
    model_id=model_id, 
    model_version=model_version, 
    model_scope="training"
)
print('----------JumpStart info----------')
print('Endpoint name: ' + endpoint_name)
print('Container image uri: ' + train_image_uri)
print('Source uri: ' + train_source_uri)
print('Base model uri: ' + base_model_uri)
print('----------------------------------')

In [None]:
# 有効なバケット内のサンプル教師画像
training_data_bucket = f"jumpstart-cache-prod-{aws_region}"
training_data_prefix = "training-datasets/tf_flowers/"

training_dataset_s3_path = f"s3://{training_data_bucket}/{training_data_prefix}"

output_bucket = sess.default_bucket()
output_prefix = "jumpstart-example-ic-training"

s3_output_location = f"s3://{output_bucket}/{output_prefix}/output"

In [None]:
from sagemaker import hyperparameters

# モデルのファインチューニングのためのデフォルトのハイパーパラメータを取得します
hyperparameters = hyperparameters.retrieve_default(model_id=model_id, model_version=model_version)

# [オプション] デフォルトのハイパーパラメータを独自の値で上書きします
hyperparameters["epochs"] = "5"
print(hyperparameters)

In [None]:
from sagemaker.tuner import ContinuousParameter

# チューニングとモデルの選択にAMT (Automatic Model Tuning)を利用するかどうか 
use_amt = False


In [None]:
# download sourcedir from S3
import os
import shutil

if not os.path.exists('source'): # ディレクトリが存在するか確認
    os.makedirs('source')
sourcedir_path = train_source_uri.replace('s3://' + training_data_bucket + '/','')
boto3.client("s3").download_file(training_data_bucket, sourcedir_path, './source/sourcedir.tar.gz')
shutil.unpack_archive('./source/sourcedir.tar.gz', extract_dir='./source/')

In [None]:
!pygmentize ./source/transfer_learning.py

In [None]:
from sagemaker.estimator import Estimator
from sagemaker.utils import name_from_base
from sagemaker.tuner import HyperparameterTuner

training_job_name = name_from_base(f"jumpstart-example-{model_id}-transfer-learning")

# SageMakerのEstimatorインスタンスを作成します
ic_estimator = Estimator(
    role=aws_role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=base_model_uri,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=360000,
    hyperparameters=hyperparameters,
    output_path=s3_output_location,
    base_job_name=training_job_name,
)

# トレーニングデータのS3パスを渡して、SageMaker 学習ジョブを開始します
ic_estimator.fit({"training": training_dataset_s3_path}, logs=True)

In [None]:
inference_instance_type = "ml.m5.xlarge"

# 推論用 Docker コンテナの uri を取得します
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=inference_instance_type,
)
# 推論用のスクリプト uri を取得します
deploy_source_uri = script_uris.retrieve(
    model_id=model_id, model_version=model_version, script_scope="inference"
)

endpoint_name = name_from_base(f"jumpstart-example-FT-{model_id}-")

# SageMakerエンドポイントをデプロイするため前のステップのestimatorを使用します
finetuned_predictor = (hp_tuner if use_amt else ic_estimator).deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    entry_point="inference.py",
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    endpoint_name=endpoint_name,
)

In [None]:
s3_bucket = f"jumpstart-cache-prod-{aws_region}"
key_prefix = "training-datasets/tf_flowers"


def download_from_s3(images):
    for filename, image_key in images.items():
        boto3.client("s3").download_file(s3_bucket, f"{key_prefix}/{image_key}", filename)


flower_images = {
    "img1.jpg": "roses/10503217854_e66a804309.jpg",
    "img2.jpg": "sunflowers/1008566138_6927679c8a.jpg",
}
download_from_s3(flower_images)

In [None]:
from IPython.core.display import HTML

for image_filename in flower_images.keys():
    with open(image_filename, "rb") as file:
        img = file.read()
    query_response = finetuned_predictor.predict(
        img, {"ContentType": "application/x-image", "Accept": "application/json;verbose"}
    )
    model_predictions = json.loads(query_response)
    predicted_label = model_predictions["predicted_label"]
    display(
        HTML(
            f'<img src={image_filename} alt={image_filename} align="left" style="width: 250px;"/>'
            f"<figcaption>Predicted Label: {predicted_label}</figcaption>"
        )
    )

In [None]:
# SageMakerエンドポイントとアタッチされたリソースを削除します
finetuned_predictor.delete_model()
finetuned_predictor.delete_endpoint()

## Incremental learinig

In [None]:
# 学習ジョブ名とアーティファクトのアウトプット場所をもとに、前のステップで学習したモデルを特定します

last_training_job_name = ic_estimator._current_job_name

last_trained_model_path = f"{s3_output_location}/{last_training_job_name}/output/model.tar.gz"

print(last_trained_model_path)

In [None]:
incremental_train_output_prefix = "jumpstart-example-ic-incremental-training"

incremental_s3_output_location = f"s3://{output_bucket}/{incremental_train_output_prefix}/output"

incremental_training_job_name = name_from_base(f"jumpstart-example-{model_id}-incremental-training")

incremental_train_estimator = Estimator(
    role=aws_role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=last_trained_model_path,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=360000,
    hyperparameters=hyperparameters,
    output_path=incremental_s3_output_location,
    base_job_name=incremental_training_job_name,
)

incremental_train_estimator.fit({"training": training_dataset_s3_path}, logs=True)