In [1]:
! pip install --upgrade sagemaker

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting sagemaker
  Downloading sagemaker-2.126.0.tar.gz (654 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m654.9/654.9 KB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting botocore<1.30.0,>=1.29.35
  Downloading botocore-1.29.40-py3-none-any.whl (10.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.3/10.3 MB[0m [31m35.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Building wheels for collected packages: sagemaker
  Building wheel for sagemaker (setup.py) ... [?25ldone
[?25h  Created wheel for sagemaker: filename=sagemaker-2.126.0-py2.py3-none-any.whl size=890086 sha256=d1d060c86d1c3c6342419b67ef3155d68375ede08aab88ccf2c6ffc7668d5b05
  Stored in directory: /home/ec2-user/.cache/pip/wheels/2f/4e/63/345e2f96c60d3f77a2b8be1182a430341092f763b4479dc578
Successfully built sagemaker
Installin

In [2]:
import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()

sess = sagemaker.Session()
bucket = sess.default_bucket()
prefix = "my-catsdogs-fulltraining"

In [3]:
from sagemaker import image_uris

training_image = image_uris.retrieve(region=sess.boto_region_name, framework="image-classification")

print(training_image)
print(sess.boto_region_name)

811284229777.dkr.ecr.us-east-1.amazonaws.com/image-classification:1
us-east-1


In [12]:
import os
import urllib.request
import boto3

s3_train_key = "nvirginia-image-classification-full-training/train"
s3_validation_key = "nvirginia-image-classification-full-training/validation"
s3_train = "s3://{}/{}/".format(bucket, s3_train_key)
s3_validation = "s3://{}/{}/".format(bucket, s3_validation_key)

def download(url):
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.request.urlretrieve(url, filename)


def upload_to_s3(channel, file):
    s3 = boto3.resource("s3")
    data = open(file, "rb")
    key = channel + "/" + file
    s3.Bucket(bucket).put_object(Key=key, Body=data)

download("https://nvirginia-lien-cats-dogs-buckets.s3.amazonaws.com/images_to_classify/dataset_rec_train.rec")
download("https://nvirginia-lien-cats-dogs-buckets.s3.amazonaws.com/images_to_classify/dataset_rec_val.rec")
#upload_to_s3(s3_train , "dataset_rec_train.rec")
#upload_to_s3(s3_validation , "dataset_rec_val.rec")
!aws s3 cp dataset_rec_train.rec $s3_train --quiet
!aws s3 cp dataset_rec_val.rec $s3_validation --quiet


print(s3_train)
print(s3_validation)

s3://sagemaker-us-east-1-597051996741/nvirginia-image-classification-full-training/train/
s3://sagemaker-us-east-1-597051996741/nvirginia-image-classification-full-training/validation/


In [13]:
deploy_amt_model = True

In [15]:
import sagemaker
s3_output_location = "s3://{}/{}/output".format(bucket, prefix)
ic = sagemaker.estimator.Estimator(
    training_image,
    role,
    instance_count=1,
    instance_type="ml.p2.xlarge",
    volume_size=50,
    max_run=360000,
    input_mode="File",
    output_path=s3_output_location,
    sagemaker_session=sess,
)
ic.set_hyperparameters(
    num_layers=18,
    image_shape="3,224,224",
    num_classes=2,
    num_training_samples=1000,
    mini_batch_size=64,
    epochs=5,
    learning_rate=0.01,
    top_k=2,
    precision_dtype="float32",
)
train_data = sagemaker.inputs.TrainingInput(
    s3_train,
    distribution="FullyReplicated",
    content_type="application/x-recordio",
    s3_data_type="S3Prefix",
)
validation_data = sagemaker.inputs.TrainingInput(
    s3_validation,
    distribution="FullyReplicated",
    content_type="application/x-recordio",
    s3_data_type="S3Prefix",
)

data_channels = {"train": train_data, "validation": validation_data}
ic.fit(inputs=data_channels, logs=True)

INFO:sagemaker:Creating training-job with name: image-classification-2022-12-30-13-29-13-383


2022-12-30 13:29:13 Starting - Starting the training job...
2022-12-30 13:29:39 Starting - Preparing the instances for training............
2022-12-30 13:31:29 Downloading - Downloading input data...
2022-12-30 13:31:59 Training - Downloading the training image...............
2022-12-30 13:34:20 Training - Training image download completed. Training in progress...[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34mNvidia gpu devices, drivers and cuda toolkit versions (only available on hosts with GPU):[0m
[34mFri Dec 30 13:34:57 2022       [0m
[34m+-----------------------------------------------------------------------------+[0m
[34m| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |[0m
[34m|-------------------------------+----------------------+----------------------+[0m
[34m| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |[0m
[34m| Fan  Temp  Perf

In [None]:
import time
from sagemaker.tuner import IntegerParameter, ContinuousParameter
from sagemaker.tuner import HyperparameterTuner

job_name = "DEMO-ic-mul-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
print("Tuning job name: ", job_name)

# Image Classification tunable hyper parameters can be found here https://docs.aws.amazon.com/sagemaker/latest/dg/IC-tuning.html
hyperparameter_ranges = {
    "beta_1": ContinuousParameter(1e-6, 0.999, scaling_type="Auto"),
    "beta_2": ContinuousParameter(1e-6, 0.999, scaling_type="Auto"),
    "eps": ContinuousParameter(1e-8, 1.0, scaling_type="Auto"),
    "gamma": ContinuousParameter(1e-8, 0.999, scaling_type="Auto"),
    "learning_rate": ContinuousParameter(1e-6, 0.5, scaling_type="Auto"),
    "mini_batch_size": IntegerParameter(8, 64, scaling_type="Auto"),
    "momentum": ContinuousParameter(0.0, 0.999, scaling_type="Auto"),
    "weight_decay": ContinuousParameter(0.0, 0.999, scaling_type="Auto"),
}

# Increase the total number of training jobs run by AMT, for increased accuracy (and training time).
max_jobs = 6
# Change parallel training jobs run by AMT to reduce total training time, constrained by your account limits.
# if max_jobs=max_parallel_jobs then Bayesian search turns to Random.
max_parallel_jobs = 1


hp_tuner = HyperparameterTuner(
    ic,
    "validation:accuracy",
    hyperparameter_ranges,
    max_jobs=max_jobs,
    max_parallel_jobs=max_parallel_jobs,
    objective_type="Maximize",
)


# Launch a SageMaker Tuning job to search for the best hyperparameters
hp_tuner.fit(inputs=data_channels, job_name=job_name)

INFO:sagemaker:Creating hyperparameter tuning job with name: DEMO-ic-mul-2022-12-30-13-40-35


Tuning job name:  DEMO-ic-mul-2022-12-30-13-40-35
.....................................................................................................................................................................................