In [None]:
#!/usr/bin/python
import os 
import sagemaker
import subprocess


# Define instance configurations 
sess = sagemaker.Session()
role = sagemaker.get_execution_role()
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name

repo_name = 'key_value' # ECR repository
image_tag = 'chargrid' # ECR image tag
base_job_name = 'kyocera-hector' # SageMaker training prefix
# dockerfile = os.path.abspath('./new_dockerfile')

%env dockerfile Dockerfile
%env account {account}
%env region {region}
%env repo_name {repo_name}
%env image_tag {image_tag}

# print("Account: {0}".format(account))
# print("Region: {0}".format(region))
# print("Repo name: {0}".format(repo_name))
# print("Image tag: {0}".format(image_tag))
# print("Base job name: {0}".format(base_job_name))
# print("Docker file: {0}".format(dockerfile))

In [None]:
%%sh
aws ecr describe-repositories --repository-names $repo_name > /dev/null 2>&1
if [ $? -ne 0 ]
then
   aws ecr create-repository --repository-name $repo_name > /dev/null
fi
$(aws ecr get-login --region $region --no-include-email)

In [None]:
# # Build docker and push to ionstance
# subprocess.run("docker build -t {0} -f {1} . ".format(image_tag, dockerfile), shell=True)
# subprocess.run("docker tag {0} {1}.dkr.ecr.{2}.amazonaws.com/{3}:latest".format(image_tag, account, region, repo_name), shell=True)
# subprocess.run("docker push {0}.dkr.ecr.{1}.amazonaws.com/{2}:latest".format(account, region, repo_name), shell=True)

!docker build -t $image_tag -f $dockerfile .
!docker tag $image_tag $account.dkr.ecr.$region.amazonaws.com/$repo_name:latest
!docker images
!docker push $account.dkr.ecr.$region.amazonaws.com/$repo_name:latest

In [None]:

# Define data path in S3 
s3_directory = 's3://prj-kyocera/research'
train_input_channel = sagemaker.session.s3_input(s3_directory, distribution='FullyReplicated',  s3_data_type='S3Prefix')

# Define image name, output path to save model 
output_path = 's3://prj-kyocera/research/output '
image_name  = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, repo_name)

## Define instance to train 
train_instance_type = 'ml.p3.2xlarge'
# train_instance_type = 'ml.p3.8xlarge'

# Define space of disk to storage input data
storage_space = 30 # Gb

# Maximum seconds for this training job’s life (days * hours * seconds)
train_max_run = 1 * 24  * 3600

In [None]:

# Set sagemaker estimator and process to train
estimator = sagemaker.estimator.Estimator(
                       image_name=image_name,
                       base_job_name=base_job_name,
                       role=role,
                       input_mode='File',
                       train_instance_count=1,
                       train_volume_size=storage_space,
                       train_instance_type=train_instance_type,
                       output_path=output_path,
                       train_max_run=train_max_run,
                       train_use_spot_instances=True,
                       train_max_wait=2 * 24  * 3600,
                       checkpoint_local_path='weights'
                       sagemaker_session=sess)

estimator.fit({'train': train_input_channel})