In [22]:
import sagemaker

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = 'sagemaker/DEMO-pytorch-mnist'

role = sagemaker.get_execution_role()

### tf-with-sagemaker.ipynb

In [5]:
# S3 path for training data
# training_data_uri = "s3://sagemaker-sample-data-{}/tensorflow/mnist".format(region)

In [5]:
!ls -ltr /opt/ml/model

ls: cannot access '/opt/ml/model': No such file or directory


In [3]:
!yes | pip uninstall torchvison
!pip install -qU torchvision

[0myes: standard output: Broken pipe
[0m

In [23]:
# download data
from torchvision.datasets import MNIST
from torchvision import transforms

MNIST.mirrors = ["https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/MNIST/"]

MNIST(
    'data',
    download=True,
    transform=transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
    )
)

Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.1307,), std=(0.3081,))
           )

In [24]:
inputs = sagemaker_session.upload_data(path='data', bucket=bucket, key_prefix=prefix)
print('input spec (in this case, just an S3 path): {}'.format(inputs))

input spec (in this case, just an S3 path): s3://sagemaker-us-east-1-058199717680/sagemaker/DEMO-pytorch-mnist


In [None]:
# https://github.com/aws/amazon-sagemaker-examples/tree/main/sagemaker-python-sdk/pytorch_mnist

# 1) Download mnist.py file from the above path.
# 2) in the below method, add download=True

# def _get_train_data_loader(batch_size, training_dir, is_distributed, **kwargs):
#     logger.info("Get train data loader")
#     dataset = datasets.MNIST(
#         training_dir,
#         download=True,
#         train=True,
#         transform=transforms.Compose(
#             [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
#         ),
#     )
#     train_sampler = (
#         torch.utils.data.distributed.DistributedSampler(dataset) if is_distributed else None
#     )
#     return torch.utils.data.DataLoader(
#         dataset,
#         batch_size=batch_size,
#         shuffle=train_sampler is None,
#         sampler=train_sampler,
#         **kwargs
#     )

In [None]:
!pygmentize mnist.py

### Deploy with PyTorch class

In [31]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(entry_point='mnist.py',
                    role=role,
                    py_version='py3',
                    framework_version='1.8.0',
                    instance_count=2,
                    instance_type='ml.c5.2xlarge',
                    hyperparameters={
                        'epochs': 1,
                        'backend': 'gloo'
                    })

In [34]:
estimator.fit({'training': inputs})

2022-07-11 00:39:19 Starting - Starting the training job...
2022-07-11 00:39:45 Starting - Preparing the instances for trainingProfilerReport-1657499959: InProgress
............
2022-07-11 00:41:32 Downloading - Downloading input data...
2022-07-11 00:42:17 Training - Training image download completed. Training in progress..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2022-07-11 00:42:19,520 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2022-07-11 00:42:19,522 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-07-11 00:42:19,530 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2022-07-11 00:42:19,539 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2022-07-11 00:42:19,892 sagemaker-training-toolkit INFO     No GPUs

In [35]:
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

---------!

In [36]:
!ls data/MNIST/raw

t10k-images-idx3-ubyte	   train-images-idx3-ubyte
t10k-images-idx3-ubyte.gz  train-images-idx3-ubyte.gz
t10k-labels-idx1-ubyte	   train-labels-idx1-ubyte
t10k-labels-idx1-ubyte.gz  train-labels-idx1-ubyte.gz


In [37]:
import gzip
import numpy as np
import random
import os

data_dir = 'data/MNIST/raw'
with gzip.open(os.path.join(data_dir, "t10k-images-idx3-ubyte.gz"), "rb") as f:
    images = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28, 28).astype(np.float32)

mask = random.sample(range(len(images)), 16) # randomly select some of the test images
mask = np.array(mask, dtype=np.int)
data = images[mask]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  # This is added back by InteractiveShellApp.init_path()


In [38]:
response = predictor.predict(np.expand_dims(data, axis=1))
print("Raw prediction result:")
print(response)
print()

labeled_predictions = list(zip(range(10), response[0]))
print("Labeled predictions: ")
print(labeled_predictions)
print()

labeled_predictions.sort(key=lambda label_and_prob: 1.0 - label_and_prob[1])
print("Most likely answer: {}".format(labeled_predictions[0]))

Raw prediction result:
[[-6.33137085e+02 -5.47513794e+02 -5.14448608e+02 -3.97776886e+02
  -7.95506897e+01 -2.86841949e+02 -4.55482422e+02 -1.98626678e+02
  -1.57270432e+02  0.00000000e+00]
 [-8.59371245e-01 -6.94578125e+02 -5.09035156e+02 -2.29966110e+02
  -4.33279907e+02 -5.50655425e-01 -2.05621658e+02 -5.37763123e+02
  -1.65528900e+02 -3.66799896e+02]
 [-5.83351562e+02 -1.02440308e+03 -3.99042999e+02 -1.03943164e+03
  -5.84062317e+02 -6.58948547e+02  0.00000000e+00 -1.24485803e+03
  -7.06259644e+02 -9.26044067e+02]
 [-6.11113525e+02 -7.15336914e+02  0.00000000e+00 -3.54861389e+02
  -6.56982788e+02 -8.61264832e+02 -8.51146851e+02 -2.34446320e+02
  -4.10123444e+02 -3.79624329e+02]
 [-7.72983704e+02 -2.75706726e+02  0.00000000e+00 -1.92012451e+02
  -6.58118042e+02 -6.06058167e+02 -5.89607178e+02 -3.91817719e+02
  -3.29763153e+02 -4.93891022e+02]
 [-7.96360596e+02 -1.06872156e+03 -6.39077393e+02 -1.19329529e+03
  -5.89165161e+02 -5.96070679e+02  0.00000000e+00 -1.40121680e+03
  -8.45124

#### Reference 

https://sagemaker-examples.readthedocs.io/en/latest/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.html