In [1]:
import boto3
import sagemaker

In [2]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

bucket = sagemaker_session.default_bucket()

In [3]:
bucket

'sagemaker-us-east-1-322961843176'

### Upload training data to S3

In [4]:
data_dir = 'robot_reboot_data'
prefix = 'sagemaker/robot_reboot'
input_data = sagemaker_session.upload_data(path=data_dir, bucket=bucket, key_prefix=prefix)

In [5]:
sagemaker_session.upload_data(path='robot_reboot_model', bucket=bucket, key_prefix=prefix)

's3://sagemaker-us-east-1-322961843176/sagemaker/robot_reboot'

In [6]:
for obj in boto3.resource('s3').Bucket(bucket).objects.all():
    print(obj.key)

robot_reboot-2021-03-27-00-01-00-089/source/sourcedir.tar.gz
robot_reboot-2021-03-27-00-02-05-167/model.tar.gz
robot_reboot-2021-03-27-00-02-05-167/output.tar.gz
robot_reboot-2021-03-27-00-02-05-167/source/sourcedir.tar.gz
robot_reboot-2021-03-27-00-05-07-280/model.tar.gz
robot_reboot-2021-03-27-00-05-07-280/output.tar.gz
robot_reboot-2021-03-27-00-05-07-280/source/sourcedir.tar.gz
robot_reboot-2021-03-27-00-06-50-289/model.tar.gz
robot_reboot-2021-03-27-00-06-50-289/output.tar.gz
robot_reboot-2021-03-27-00-06-50-289/source/sourcedir.tar.gz
robot_reboot-2021-03-27-00-09-37-901/model.tar.gz
robot_reboot-2021-03-27-00-09-37-901/output.tar.gz
robot_reboot-2021-03-27-00-09-37-901/source/sourcedir.tar.gz
robot_reboot-2021-03-27-00-16-25-899/source/sourcedir.tar.gz
robot_reboot-2021-03-27-00-17-07-630/source/sourcedir.tar.gz
robot_reboot-2021-03-27-00-17-23-327/model.tar.gz
robot_reboot-2021-03-27-00-17-23-327/output.tar.gz
robot_reboot-2021-03-27-00-17-23-327/source/sourcedir.tar.gz
robot_r

In [7]:
!pygmentize src/ml/train.py

[34mimport[39;49;00m [04m[36margparse[39;49;00m
[34mimport[39;49;00m [04m[36mlogging[39;49;00m
[34mimport[39;49;00m [04m[36mos[39;49;00m

[34mimport[39;49;00m [04m[36mtensorflow[39;49;00m [34mas[39;49;00m [04m[36mtf[39;49;00m
[34mfrom[39;49;00m [04m[36mtensorflow[39;49;00m[04m[36m.[39;49;00m[04m[36mkeras[39;49;00m[04m[36m.[39;49;00m[04m[36mcallbacks[39;49;00m [34mimport[39;49;00m ModelCheckpoint
[34mfrom[39;49;00m [04m[36mmodel[39;49;00m [34mimport[39;49;00m get_model

logging.getLogger().setLevel(logging.INFO)

HEIGHT = [34m31[39;49;00m
WIDTH = [34m31[39;49;00m
DEPTH = [34m9[39;49;00m
NUM_CLASSES = [34m16[39;49;00m
SHUFFLE_BUFFER_SIZE = [34m100[39;49;00m


[34mdef[39;49;00m [32mget_filenames[39;49;00m(channel_name, channel):
    [34mif[39;49;00m channel_name [35min[39;49;00m [[33m'[39;49;00m[33mtrain[39;49;00m[33m'[39;49;00m, [33m'[39;49;00m[33mvalidation[39;49;00m[33m'[39;49;00m, [33m'

### Create estimator

In [8]:
!pip install --upgrade sagemaker

Collecting sagemaker
  Downloading sagemaker-2.32.0.tar.gz (405 kB)
[K     |████████████████████████████████| 405 kB 17.3 MB/s eta 0:00:01
Building wheels for collected packages: sagemaker
  Building wheel for sagemaker (setup.py) ... [?25ldone
[?25h  Created wheel for sagemaker: filename=sagemaker-2.32.0-py2.py3-none-any.whl size=570899 sha256=1b9eceab0b0ea5756527f6e9515d545846cd77b07da818371dde766ccedd6e77
  Stored in directory: /home/ec2-user/.cache/pip/wheels/69/97/97/3cc021580a00f6ec531125f88d49baf9ec4385afabe45ecf11
Successfully built sagemaker
Installing collected packages: sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.31.0
    Uninstalling sagemaker-2.31.0:
      Successfully uninstalled sagemaker-2.31.0
Successfully installed sagemaker-2.32.0


In [15]:
from sagemaker.tensorflow import TensorFlow
estimator = TensorFlow(base_job_name='robot_reboot',
                      entry_point='train.py',
                      source_dir='src/ml',
                      role=role,
                      framework_version="2.2.0",
                      py_version='py37',
                      hyperparameters={
                          'epochs': 1,
                          'model': 's3://sagemaker-us-east-1-322961843176/sagemaker/robot_reboot/model_0/',
                          'train': 's3://sagemaker-us-east-1-322961843176/sagemaker/robot_reboot/train.tfrecords',
                          'validation': 's3://sagemaker-us-east-1-322961843176/sagemaker/robot_reboot/validation.tfrecords',
                          'eval': 's3://sagemaker-us-east-1-322961843176/sagemaker/robot_reboot/validation.tfrecords',
                          'model_version': 1
                      },
                      train_instance_count=1, 
                      train_instance_type='local')

train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [21]:
estimator.fit({'train': 's3://sagemaker-us-east-1-322961843176/sagemaker/robot_reboot/train.tfrecords',
              'validation': 's3://sagemaker-us-east-1-322961843176/sagemaker/robot_reboot/validation.tfrecords',
              'eval': 's3://sagemaker-us-east-1-322961843176/sagemaker/robot_reboot/validation.tfrecords'})

Creating 47km6iougl-algo-1-gedv4 ... 
Creating 47km6iougl-algo-1-gedv4 ... done
Attaching to 47km6iougl-algo-1-gedv4
[36m47km6iougl-algo-1-gedv4 |[0m 2021-03-29 23:00:45.615074: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:425] Initializing the SageMaker Profiler.
[36m47km6iougl-algo-1-gedv4 |[0m 2021-03-29 23:00:45.615243: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:106] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
[36m47km6iougl-algo-1-gedv4 |[0m 2021-03-29 23:00:45.638367: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:425] Initializing the SageMaker Profiler.
[36m47km6iougl-algo-1-gedv4 |[0m 2021-03-29 23:00:47,611 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
[36m47km6iougl-algo-1-gedv4 |[0m 2021-03-29 23:00:47,618 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36m47km

RuntimeError: Failed to run: ['docker-compose', '-f', '/tmp/tmpsa2wg7oi/docker-compose.yaml', 'up', '--build', '--abort-on-container-exit'], Process exited with code: 1