## Hyperparameter Tuning in SageMaker

In [5]:
!pip install torchvision --no-cache-dir

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting torchvision
  Downloading torchvision-0.11.1-cp37-cp37m-manylinux1_x86_64.whl (23.3 MB)
     |████████████████████████████████| 23.3 MB 21.3 MB/s            
Collecting torch==1.10.0
  Downloading torch-1.10.0-cp37-cp37m-manylinux1_x86_64.whl (881.9 MB)
     |████████████████████████████████| 881.9 MB 85.8 MB/s             
Installing collected packages: torch, torchvision
Successfully installed torch-1.10.0 torchvision-0.11.1


In [8]:
import sagemaker
from sagemaker.tuner import (
    IntegerParameter,
    CategoricalParameter,
    ContinuousParameter,
    HyperparameterTuner,
)

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = "sagemaker/DEMO-pytorch-cifar"

role = sagemaker.get_execution_role()

In [9]:
from torchvision.datasets import CIFAR10
from torchvision import transforms


local_dir = 'data'
CIFAR10.mirrors = ["https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/CIFAR10/"]
CIFAR10(
    local_dir,
    download=True,
    transform=transforms.Compose(
        [transforms.ToTensor()]
    )
)

Files already downloaded and verified


Dataset CIFAR10
    Number of datapoints: 50000
    Root location: data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )

In [10]:
# TODO: Upload the data to an S3 bucket. You can use the sagemaker_session object, boto3 or the AWS CLI
inputs = sagemaker_session.upload_data(path="data", bucket=bucket, key_prefix=prefix)
print("input spec (in this case, just an S3 path): {}".format(inputs))

input spec (in this case, just an S3 path): s3://sagemaker-us-east-1-678049007427/sagemaker/DEMO-pytorch-cifar


In [11]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
    source_dir="./scripts",
    entry_point="cifar.py",
    role=role,
    py_version='py36',
    framework_version="1.8",
    instance_count=1,
    instance_type="ml.m5.large"
)

In [12]:
hyperparameter_ranges = {
    "lr": ContinuousParameter(0.001, 0.1),
    "batch-size": CategoricalParameter([32, 64, 128, 256, 512]),
    "epochs": IntegerParameter(2, 4)
}

In [13]:
objective_metric_name = "average test loss"
objective_type = "Minimize"
metric_definitions = [{"Name": "average test loss", "Regex": "Test set: Average loss: ([0-9\\.]+)"}]

In [14]:
tuner = HyperparameterTuner(
    estimator,
    objective_metric_name,
    hyperparameter_ranges,
    metric_definitions,
    max_jobs=2,
    max_parallel_jobs=2,
    objective_type=objective_type,
)

In [15]:
tuner.fit({"training": inputs})

......................................................................!


In [16]:
predictor = tuner.deploy(initial_instance_count=1, instance_type="ml.t2.medium")


2021-12-01 21:41:51 Starting - Preparing the instances for training
2021-12-01 21:41:51 Downloading - Downloading input data
2021-12-01 21:41:51 Training - Training image download completed. Training in progress.
2021-12-01 21:41:51 Uploading - Uploading generated training model
2021-12-01 21:41:51 Completed - Training job completed
-------------!

In [17]:
'''
estimator = sagemaker.estimator.Estimator.attach("pytorch-training-211201-0222-004-56013d5a")  

predictor = estimator.deploy(
    initial_instance_count=1, 
    instance_type='ml.t2.medium'
)
'''

SyntaxError: EOL while scanning string literal (<ipython-input-17-b07d256e50a7>, line 8)

## Query the Endpoint

In [59]:
import gzip 
import numpy as np
import random
import os

file = 'data/cifar-10-batches-py/data_batch_1'
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

data=unpickle(file)
data=np.reshape(data[b'data'][0], (3, 32, 32))

In [60]:
data = np.expand_dims(data, axis=0)

In [62]:
print(data.astype(float))

[[[[ 59.  43.  50. ... 158. 152. 148.]
   [ 16.   0.  18. ... 123. 119. 122.]
   [ 25.  16.  49. ... 118. 120. 109.]
   ...
   [208. 201. 198. ... 160.  56.  53.]
   [180. 173. 186. ... 184.  97.  83.]
   [177. 168. 179. ... 216. 151. 123.]]

  [[ 62.  46.  48. ... 132. 125. 124.]
   [ 20.   0.   8. ...  88.  83.  87.]
   [ 24.   7.  27. ...  84.  84.  73.]
   ...
   [170. 153. 161. ... 133.  31.  34.]
   [139. 123. 144. ... 148.  62.  53.]
   [144. 129. 142. ... 184. 118.  92.]]

  [[ 63.  45.  43. ... 108. 102. 103.]
   [ 20.   0.   0. ...  55.  50.  57.]
   [ 21.   0.   8. ...  50.  50.  42.]
   ...
   [ 96.  34.  26. ...  70.   7.  20.]
   [ 96.  42.  30. ...  94.  34.  34.]
   [116.  94.  87. ... 140.  84.  72.]]]]


In [57]:
#from sagemaker.serializers import IdentitySerializer
#predictor.serializer = IdentitySerializer("image/png")
# .tobytes()?

response = predictor.predict(data.astype(float))
print(response)

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (500) from primary with message "expected scalar type Double but found Float
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/site-packages/sagemaker_inference/transformer.py", line 126, in transform
    result = self._transform_fn(self._model, input_data, content_type, accept)
  File "/opt/conda/lib/python3.6/site-packages/sagemaker_inference/transformer.py", line 216, in _default_transform_fn
    prediction = self._predict_fn(data, model)
  File "/opt/conda/lib/python3.6/site-packages/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py", line 125, in default_predict_fn
    output = model(input_data)
  File "/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/opt/ml/model/code/cifar.py", line 33, in forward
    x = self.pool(F.relu(self.conv1(x)))
  File "/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/opt/conda/lib/python3.6/site-packages/torch/nn/modules/conv.py", line 399, in forward
    return self._conv_forward(input, self.weight, self.bias)
  File "/opt/conda/lib/python3.6/site-packages/torch/nn/modules/conv.py", line 396, in _conv_forward
    self.padding, self.dilation, self.groups)
RuntimeError: expected scalar type Double but found Float
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/pytorch-training-211201-2136-001-70b9da77 in account 678049007427 for more information.

### Cleanup

After you have finished with this exercise, remember to delete the prediction endpoint to release the instance associated with it

In [None]:
np.expand_dims(data, axis=1)

In [46]:
import torch
from torchvision import datasets, transforms
print(torch.tensor(data).to(torch.uint8))


tensor([[[[ 59,  43,  50,  ..., 158, 152, 148],
          [ 16,   0,  18,  ..., 123, 119, 122],
          [ 25,  16,  49,  ..., 118, 120, 109],
          ...,
          [208, 201, 198,  ..., 160,  56,  53],
          [180, 173, 186,  ..., 184,  97,  83],
          [177, 168, 179,  ..., 216, 151, 123]],

         [[ 62,  46,  48,  ..., 132, 125, 124],
          [ 20,   0,   8,  ...,  88,  83,  87],
          [ 24,   7,  27,  ...,  84,  84,  73],
          ...,
          [170, 153, 161,  ..., 133,  31,  34],
          [139, 123, 144,  ..., 148,  62,  53],
          [144, 129, 142,  ..., 184, 118,  92]],

         [[ 63,  45,  43,  ..., 108, 102, 103],
          [ 20,   0,   0,  ...,  55,  50,  57],
          [ 21,   0,   8,  ...,  50,  50,  42],
          ...,
          [ 96,  34,  26,  ...,  70,   7,  20],
          [ 96,  42,  30,  ...,  94,  34,  34],
          [116,  94,  87,  ..., 140,  84,  72]]]], dtype=torch.uint8)
