In [17]:
!mkdir -p tmp

In [14]:
path = "https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Chapter09/files"

In [15]:
!wget -P tmp {path}/synthetic.train.txt

--2021-06-07 15:46:02--  https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Chapter09/files/synthetic.train.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 154276 (151K) [text/plain]
Saving to: ‘tmp/synthetic.train.txt’


2021-06-07 15:46:02 (7.36 MB/s) - ‘tmp/synthetic.train.txt’ saved [154276/154276]



In [16]:
!wget -P tmp {path}/synthetic.validation.txt

--2021-06-07 15:46:09--  https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Chapter09/files/synthetic.validation.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 50653 (49K) [text/plain]
Saving to: ‘tmp/synthetic.validation.txt’


2021-06-07 15:46:09 (101 MB/s) - ‘tmp/synthetic.validation.txt’ saved [50653/50653]



In [6]:
s3_bucket = "sagemaker-cookbook-bucket"
prefix = "chapter09"

In [8]:
s3_train_data = 's3://{}/{}/input/{}'.format(
    s3_bucket, 
    prefix, 
    "synthetic.train.txt"
)
s3_validation_data = 's3://{}/{}/input/{}'.format(
    s3_bucket, 
    prefix, 
    "synthetic.validation.txt"
)

In [18]:
!aws s3 cp tmp/synthetic.train.txt {s3_train_data}

upload: tmp/synthetic.train.txt to s3://sagemaker-cookbook-bucket/chapter09/input/synthetic.train.txt


In [19]:
!aws s3 cp tmp/synthetic.validation.txt {s3_validation_data}

upload: tmp/synthetic.validation.txt to s3://sagemaker-cookbook-bucket/chapter09/input/synthetic.validation.txt


In [32]:
import sagemaker
from sagemaker import Session

role = sagemaker.get_execution_role()
session = sagemaker.Session()

In [33]:
from sagemaker.huggingface import HuggingFace

hyperparameters = {
    'epochs': 1,
    'train_batch_size': 32,
    'model_name':'distilbert-base-uncased'
}

In [34]:
estimator = HuggingFace(
    entry_point='train.py',
    source_dir='./scripts',
    instance_type='ml.p3.2xlarge',
    instance_count=1,
    role=role,
    transformers_version='4.4',
    pytorch_version='1.6',
    py_version='py36',
    hyperparameters=hyperparameters
)

In [35]:
from sagemaker.inputs import TrainingInput

train_data = TrainingInput(s3_train_data)
validation_data = TrainingInput(s3_validation_data)

data_channels = {
    'train': train_data, 
    'valid': validation_data
}

In [36]:
%%time

estimator.fit(data_channels)

2021-06-07 16:31:45 Starting - Starting the training job...
2021-06-07 16:32:09 Starting - Launching requested ML instancesProfilerReport-1623083504: InProgress
......
2021-06-07 16:33:10 Starting - Preparing the instances for training.........
2021-06-07 16:34:33 Downloading - Downloading input data...
2021-06-07 16:35:10 Training - Downloading the training image..................
2021-06-07 16:38:11 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-06-07 16:38:07,827 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-06-07 16:38:07,850 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-06-07 16:38:08,147 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-06-07 16:38:08,495 sagemaker-tra

In [37]:
from sagemaker.pytorch.model import PyTorchModel

model_data = estimator.model_data

model = PyTorchModel(
    model_data=model_data, 
    role=role, 
    source_dir="scripts",
    entry_point='inference.py', 
    framework_version='1.6.0',
    py_version="py3"
)

In [38]:
%%time

predictor = model.deploy(
    instance_type='ml.m5.xlarge', 
    initial_instance_count=1
)

---------------!CPU times: user 18.6 s, sys: 3.28 s, total: 21.9 s
Wall time: 7min 52s


In [39]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

predictor.serializer = JSONSerializer()
predictor.deserializer = JSONDeserializer()

In [42]:
test_data = {
    "text": "This tastes bad. I hate this place."
}

predictor.predict(test_data)

'NEGATIVE'

In [41]:
test_data = {
    "text": "Very delicious. I would recommend this to my friends"
}

predictor.predict(test_data)

'POSITIVE'

In [43]:
predictor.delete_endpoint()