In [None]:
!pip install datasets[s3] "torch==1.11" "transformers==4.6.1" "sentencepiece==0.1.96" "sagemaker>=2.48"

In [3]:
!pip install -qq sagemaker-huggingface-inference-toolkit

In [9]:
from transformers import pipeline

summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small", framework="pt")
summarizer("Early in the pandemic, restaurants ditched physical menus and instead revived a long-sidelined technology, the quick response code. It seemed like a good idea at the time. As restaurants reopened from government-mandated Covid lockdowns, restaurant design experts advised them to clear their tables of high-touch items like salt, pepper and ketchup bottles. Even the physical menu had to go, and thus the QR code — which, when scanned, opens up a digital menu — came into vogue.", min_length=5, max_length=20)

Downloading:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/242M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

[{'summary_text': 'restaurant design experts advised them to clear their tables of high-touch items like salt, pepper and'}]

In [7]:
import sagemaker

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
session_bucket = sagemaker_session.default_bucket()

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {session_bucket}")
print(f"sagemaker session region: {sagemaker_session.boto_region_name}")

sagemaker role arn: arn:aws:iam::279578104300:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole
sagemaker bucket: sagemaker-us-east-1-279578104300
sagemaker session region: us-east-1


In [8]:
import sagemaker.huggingface

In [4]:
from sagemaker.s3 import S3Downloader
import os

if ("wikihowAll.csv" not in os.listdir()):
    S3Downloader.download(s3_uri="s3://sagemaker-us-east-1-279578104300/yubaba/dataset/wikihow/all/wikihowAll.csv",
                      local_path=".",
                      sagemaker_session=sagemaker_session)
    print("csv downloaded")

csv downloaded


In [5]:
import yaml

with open("./configs/wikihow_t5.yaml", "r") as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
config

{'dataset': {'name': 'wikihow', 'portion': 0.01, 'data_dir': '.'},
 'tokenizer': {'max_length': 1024},
 'model': {'name': 't5_base', 'checkpoint': None},
 'train': {'model_path': '',
  'checkpoint_path': '',
  'num_epochs': 1,
  'learning_rate': 0.0003,
  'weight_decay': 0.001,
  'eps': 1e-08,
  'batch_size': 2,
  'gradient_accum_steps': 8},
 'eval': None}

In [None]:
import torch

from datasets import load_dataset
from wrapper.wikihow import Wikihow
from transformers import T5Tokenizer

tokenizer = T5Tokenizer.from_pretrained('t5-small', model_max_length = config["tokenizer"]["max_length"])

def tokenize(batch):
    inputs = batch['text']
    inputs = inputs.strip().replace("\n","")
        
    labels = batch['headline']

    inputs = tokenizer.batch_encode_plus([inputs], truncation = True, padding = "max_length", return_tensors = "pt")
    targets = tokenizer.batch_encode_plus([labels], truncation = True, padding = "max_length", return_tensors = "pt")
        
    return {"source_ids": inputs["input_ids"].squeeze(), 
            "source_mask": inputs["attention_mask"].squeeze(),
            "target_ids": targets["input_ids"].squeeze(),
            "target_mask": targets["attention_mask"].squeeze()}

train_dataset, validation_dataset, test_dataset = load_dataset("wikihow", "all", data_dir=".", split=["train", "validation", "test"])
train_dataset = train_dataset.map(tokenize, batched=False)
validatation_dataset = validation_dataset.map(tokenize, batched=False)
test_dataset = test_dataset.map(tokenize, batched=False)
train_dataset.set_format('torch')
validation_dataset.set_format('torch')
test_dataset.set_format('torch')

(train_dataset, validation_dataset, test_dataset)

In [None]:
import botocore
from datasets.filesystems import S3FileSystem

s3 = S3FileSystem()
train_input_path = f's3://{session_bucket}/yubaba/dataset/train'
validation_input_path = f's3://{session_bucket}/yubaba/dataset/validation'
test_input_path = f's3://{session_bucket}/yubaba/dataset/test'

train_dataset.save_to_disk(train_input_path, fs=s3)
validation_dataset.save_to_disk(validation_input_path, fs=s3)
test_dataset.save_to_disk(test_input_path, fs=s3)

In [10]:
from sagemaker.pytorch import PyTorch
from sagemaker.huggingface import HuggingFace

train_input_path = f's3://{session_bucket}/yubaba/dataset/train'
validation_input_path = f's3://{session_bucket}/yubaba/dataset/validation'
test_input_path = f's3://{session_bucket}/yubaba/dataset/test'

hyperparameters = {
    "num_epochs": 1,
    "learning_rate": 0.0003,
    "weight_decay": 0.001,
    "eps": 0.00000001,
    "gradient_accum_steps": 8,
    "batch_size": 2,
    "model_name": "t5-small"
}

estimator = HuggingFace(entry_point="entry.py",
                    source_dir="./src",
                    instance_type='ml.p3.2xlarge',                    
                    instance_count=1,                    
                    role=role, 
                    transformers_version='4.17',
                    py_version="py38",
                    pytorch_version="1.10",
                    hyperparameters=hyperparameters)

In [None]:
import sagemaker
from sagemaker.huggingface import HuggingFace

# gets role for executing training job
role = sagemaker.get_execution_role()
hyperparameters = {
	'model_name_or_path':'t5-small',
	'output_dir':'/opt/ml/model'
	# add your remaining hyperparameters
	# more info here https://github.com/huggingface/transformers/tree/v4.17.0/examples/pytorch/seq2seq
}

# git configuration to download our fine-tuning script
git_config = {'repo': 'https://github.com/huggingface/transformers.git','branch': 'v4.17.0'}

# creates Hugging Face estimator
huggingface_estimator = HuggingFace(
	entry_point='run_summarization.py',
	source_dir='./examples/pytorch/seq2seq',
	instance_type='ml.p3.2xlarge',
	instance_count=1,
	role=role,
	git_config=git_config,
	transformers_version='4.17.0',
	pytorch_version='1.10.2',
	py_version='py38',
	hyperparameters = hyperparameters
)

# starting the train job
huggingface_estimator.fit()

In [15]:
estimator.fit({"train": train_input_path, "test": test_input_path})

2022-08-20 22:06:48 Starting - Starting the training job...
2022-08-20 22:07:16 Starting - Preparing the instances for trainingProfilerReport-1661033208: InProgress
.........
2022-08-20 22:08:41 Downloading - Downloading input data...............
2022-08-20 22:11:12 Training - Downloading the training image............
2022-08-20 22:13:08 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2022-08-20 22:13:11,723 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2022-08-20 22:13:11,749 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2022-08-20 22:13:11,755 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2022-08-20 22:13:12,235 sagemaker-training-toolkit INFO     Installing dependencies from requirements

In [16]:
predictor = estimator.deploy(initial_instance_count=1, instance_type="ml.m5.xlarge")

-----!

In [17]:
data = {
    "inputs": "Early in the pandemic, restaurants ditched physical menus and instead revived a long-sidelined technology, the quick response code. It seemed like a good idea at the time. As restaurants reopened from government-mandated Covid lockdowns, restaurant design experts advised them to clear their tables of high-touch items like salt, pepper and ketchup bottles. Even the physical menu had to go, and thus the QR code — which, when scanned, opens up a digital menu — came into vogue."
}
predictor.predict(data)

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from primary with message "{
  "code": 400,
  "type": "InternalServerException",
  "message": "(\"You need to define one of the following [\u0027audio-classification\u0027, \u0027automatic-speech-recognition\u0027, \u0027feature-extraction\u0027, \u0027text-classification\u0027, \u0027token-classification\u0027, \u0027question-answering\u0027, \u0027table-question-answering\u0027, \u0027fill-mask\u0027, \u0027summarization\u0027, \u0027translation\u0027, \u0027text2text-generation\u0027, \u0027text-generation\u0027, \u0027zero-shot-classification\u0027, \u0027zero-shot-image-classification\u0027, \u0027conversational\u0027, \u0027image-classification\u0027, \u0027image-segmentation\u0027, \u0027object-detection\u0027] as env \u0027HF_TASK\u0027.\", 403)"
}
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/huggingface-pytorch-training-2022-08-20-22-18-16-181 in account 279578104300 for more information.