## Hyperparameter Tuning in SageMaker

In [2]:
!pip install torchvision --no-cache-dir

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting torchvision
  Downloading torchvision-0.11.1-cp37-cp37m-manylinux1_x86_64.whl (23.3 MB)
     |████████████████████████████████| 23.3 MB 24.1 MB/s            
[?25hCollecting torch==1.10.0
  Downloading torch-1.10.0-cp37-cp37m-manylinux1_x86_64.whl (881.9 MB)
     |████████████████████████████████| 881.9 MB 35.2 MB/s             | 490.5 MB 74.7 MB/s eta 0:00:06
Installing collected packages: torch, torchvision
Successfully installed torch-1.10.0 torchvision-0.11.1


In [3]:
import sagemaker
from sagemaker.tuner import (
    IntegerParameter,
    CategoricalParameter,
    ContinuousParameter,
    HyperparameterTuner,
)

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = "sagemaker/DEMO-pytorch-cifar"

role = sagemaker.get_execution_role()

In [4]:
from torchvision.datasets import CIFAR10
from torchvision import transforms


local_dir = 'data'
CIFAR10.mirrors = ["https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/CIFAR10/"]
CIFAR10(
    local_dir,
    download=True,
    transform=transforms.Compose(
        [transforms.ToTensor()]
    )
)

Files already downloaded and verified


Dataset CIFAR10
    Number of datapoints: 50000
    Root location: data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )

In [5]:
# TODO: Upload the data to an S3 bucket. You can use the sagemaker_session object, boto3 or the AWS CLI
inputs = sagemaker_session.upload_data(path="data", bucket=bucket, key_prefix=prefix)
print("input spec (in this case, just an S3 path): {}".format(inputs))

input spec (in this case, just an S3 path): s3://sagemaker-us-east-1-678049007427/sagemaker/DEMO-pytorch-cifar


In [6]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
    entry_point="./scripts/cifar.py",
    role=role,
    py_version='py36',
    framework_version="1.8",
    instance_count=1,
    instance_type="ml.m5.large"
)

In [7]:
hyperparameter_ranges = {
    "lr": ContinuousParameter(0.001, 0.1),
    "batch-size": CategoricalParameter([32, 64, 128, 256, 512]),
    "epochs": IntegerParameter(2, 4)
}

In [8]:
objective_metric_name = "average test loss"
objective_type = "Minimize"
metric_definitions = [{"Name": "average test loss", "Regex": "Test set: Average loss: ([0-9\\.]+)"}]

In [9]:
tuner = HyperparameterTuner(
    estimator,
    objective_metric_name,
    hyperparameter_ranges,
    metric_definitions,
    max_jobs=2,
    max_parallel_jobs=2,
    objective_type=objective_type,
)

In [10]:
tuner.fit({"training": inputs})

............................................................................!


In [11]:
predictor = tuner.deploy(initial_instance_count=1, instance_type="ml.t2.medium")


2021-12-03 22:46:46 Starting - Preparing the instances for training
2021-12-03 22:46:46 Downloading - Downloading input data
2021-12-03 22:46:46 Training - Training image download completed. Training in progress.
2021-12-03 22:46:46 Uploading - Uploading generated training model
2021-12-03 22:46:46 Completed - Training job completed
-----------!

In [12]:
#estimator = sagemaker.estimator.Estimator.attach("pytorch-training-211201-0222-004-56013d5a")  

#predictor = estimator.deploy(
#    initial_instance_count=1, 
#    instance_type='ml.t2.medium')

## Query the Endpoint

In [28]:
import gzip 
import numpy as np
import random
import os

file = 'data/cifar-10-batches-py/data_batch_1'
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

data=unpickle(file)
data=np.reshape(data[b'data'][0], (3, 32, 32))

In [29]:
data = np.expand_dims(data, axis=0).astype(np.float32)
print(data.dtype)

float32


In [30]:
#from sagemaker.serializers import IdentitySerializer
#predictor.serializer = IdentitySerializer("image/png")
# .tobytes()?

response = predictor.predict(data)
print(response)

[[ -42.51738739  -90.59563446 -109.33483887  -20.02997589 -164.67401123
     0.         -172.72790527  -20.93193054 -117.64822388 -168.83175659]]


### Cleanup

After you have finished with this exercise, remember to delete the prediction endpoint to release the instance associated with it