In [1]:
import sagemaker
from sagemaker import get_execution_role
import boto3
import json

sess = sagemaker.Session()

role = get_execution_role()
print(role) # This is the role that SageMaker would use to leverage AWS resources (S3, CloudWatch) on your behalf

bucket = sess.default_bucket() # Replace with your own bucket name if needed
print(bucket)
prefix = 'sagemaker/DEMO-blazingtext-text8' #Replace with the prefix under which you want to store the data if needed

arn:aws:iam::830436844326:role/service-role/AmazonSageMaker-ExecutionRole-20190623T030189
sagemaker-us-east-1-830436844326


In [2]:
!gzip -d text8.gz -f

gzip: text8.gz: No such file or directory


In [3]:
train_channel = prefix + '/train'

sess.upload_data(path='text8', bucket=bucket, key_prefix=train_channel)

s3_train_data = 's3://{}/{}'.format(bucket, train_channel)

In [4]:
s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)

In [5]:
region_name = boto3.Session().region_name

In [6]:
container = sagemaker.amazon.amazon_estimator.get_image_uri(region_name, "blazingtext", "latest")
print('Using SageMaker BlazingText container: {} ({})'.format(container, region_name))

Using SageMaker BlazingText container: 811284229777.dkr.ecr.us-east-1.amazonaws.com/blazingtext:latest (us-east-1)


In [7]:
bt_model = sagemaker.estimator.Estimator(container,
                                         role, 
                                         train_instance_count=2, 
                                         train_instance_type='ml.c4.2xlarge',
                                         train_volume_size = 5,
                                         train_max_run = 360000,
                                         input_mode= 'File',
                                         output_path=s3_output_location,
                                         sagemaker_session=sess)

In [8]:
bt_model.set_hyperparameters(mode="batch_skipgram",
                             epochs=5,
                             min_count=5,
                             sampling_threshold=0.0001,
                             learning_rate=0.05,
                             window_size=5,
                             vector_dim=100,
                             negative_samples=5,
                             batch_size=11, #  = (2*window_size + 1) (Preferred. Used only if mode is batch_skipgram)
                             evaluation=True,# Perform similarity evaluation on WS-353 dataset at the end of training
                             subwords=False) # Subword embedding learning is not supported by batch_skipgram

In [9]:
train_data = sagemaker.session.s3_input(s3_train_data, distribution='FullyReplicated', 
                        content_type='text/plain', s3_data_type='S3Prefix')
data_channels = {'train': train_data}

In [None]:
bt_model.fit(inputs=data_channels, logs=True)

2019-06-22 20:53:33 Starting - Starting the training job...
2019-06-22 20:53:34 Starting - Launching requested ML instances......
2019-06-22 20:54:36 Starting - Preparing the instances for training......
2019-06-22 20:55:55 Downloading - Downloading input data
2019-06-22 20:55:55 Training - Training image download completed. Training in progress.
[31mArguments: train[0m
[32mArguments: train[0m
[31mFound 10.32.0.4 for host algo-1[0m
[31mFound 10.40.0.3 for host algo-2[0m
[32mFound 10.32.0.4 for host algo-1[0m
[32mFound 10.40.0.3 for host algo-2[0m
[31m[06/22/2019 20:56:09 INFO 140642198013760] nvidia-smi took: 0.0251688957214 secs to identify 0 gpus[0m
[31m[06/22/2019 20:56:09 INFO 140642198013760] Running distributed CPU BlazingText training using batch_skipgram on 2 hosts.[0m
[31m[06/22/2019 20:56:09 INFO 140642198013760] Number of hosts: 2, master IP address: 10.32.0.4, host IP address: 10.32.0.4.[0m
[31m[06/22/2019 20:56:09 INFO 140642198013760] HTTP server starte

In [11]:
bt_endpoint = bt_model.deploy(initial_instance_count = 1,instance_type = 'ml.m4.xlarge')

ResourceLimitExceeded: An error occurred (ResourceLimitExceeded) when calling the CreateEndpoint operation: The account-level service limit 'ml.m4.xlarge for endpoint usage' is 0 Instances, with current utilization of 0 Instances and a request delta of 1 Instances. Please contact AWS support to request an increase for this limit.

In [12]:
bt_endpoint = bt_model.deploy(initial_instance_count = 1,instance_type = 'ml.t2.medium')

ResourceLimitExceeded: An error occurred (ResourceLimitExceeded) when calling the CreateEndpoint operation: The account-level service limit 'ml.m4.xlarge for endpoint usage' is 0 Instances, with current utilization of 0 Instances and a request delta of 1 Instances. Please contact AWS support to request an increase for this limit.