In [None]:
!pip install -U sagemaker

## Prepare the dataset

The input text has to be in the first column and output in the second column.

In [40]:
import pandas as pd

data = pd.read_csv("Shakespear/all_shakespeare.csv", usecols=['modern', 'original'])[['modern', 'original']]
data

Unnamed: 0,modern,original
0,"Here comes my master, your brother.","Yonder comes my master, your brother."
1,"Go hide, Adam, and you’ll hear how he abuses me.","Go apart, Adam, and thou shalt hear how he wil..."
2,here?,"Now, sir, what make you here?"
3,Nothing. I’ve never been taught how to make an...,Nothing. I am not taught to make anything.
4,"Well, then, what are you messing up?","What mar you then, sir?"
...,...,...
11543,"The stuff you had at the Centaur, sir.","Your goods that lay at host, sir, in the Centaur."
11544,You have a fat friend at your master’s house: ...,There is a fat friend at your master’s house T...
11545,"After you, sir. You’re older than me.","Not I, sir. You are my elder."
11546,That’s a good point. How can we tell which of ...,That’s a question. How shall we try it?


In [41]:
data_1.to_csv("Shakespeare_Dataset_Full.csv", index=False)

## Create Training Job

In [42]:
import boto3
s3_client = boto3.client('s3')
s3_client.upload_file("Shakespeare_Dataset_Full.csv", "blog-posts-artifacts", "paraphrasing/training-data/Shakespeare_Dataset_Full.csv")

In [46]:
import sagemaker
from sagemaker.huggingface import HuggingFace

# IAM role for executing training job
role = 'YodaMaker'
hyperparameters = {
    'model_name_or_path': 't5-base',
    'output_dir': '/opt/ml/model',
    'train_file': '/opt/ml/input/data/train/Shakespeare_Dataset_Full.csv',
    'source_prefix': 'paraphrase: ',
    'learning_rate': 0.0001,
    'do_train': True,
    'num_train_epochs': 1,
    'per_device_train_batch_size': 4,
    'save_strategy': 'no',
}

In [47]:
# Git configuration to download our fine-tuning script
git_config = {'repo': 'https://github.com/huggingface/transformers.git','branch': 'v4.17.0'}

# Creates Hugging Face estimator
huggingface_estimator = HuggingFace(
    entry_point='run_summarization.py',
    source_dir='./examples/pytorch/summarization',
    output_path='s3://blog-posts-artifacts/paraphrasing/model-artifacts/',
    code_location='s3://blog-posts-artifacts/paraphrasing/training-checkpoints/',
    instance_type='ml.g4dn.xlarge',
    instance_count=1,
    role=role,
    git_config=git_config,
    transformers_version='4.17.0',
    pytorch_version='1.10.2',
    py_version='py38',
    hyperparameters = hyperparameters,
    tags=[{'Key':'owner','Value':'ali@datachef.co'}]
)

In [48]:
# Starting the training job
huggingface_estimator.fit({'train': 's3://blog-posts-artifacts/paraphrasing/training-data/Shakespeare_Dataset_Full.csv'}, wait=False)

## Deploy the trained model

In [49]:
from sagemaker.huggingface import HuggingFaceModel
import sagemaker

# IAM role with permissions to create endpoint
role = "YodaMaker"

# S3 URI of the trained model
model_uri = "s3://blog-posts-artifacts/paraphrasing/model-artifacts/huggingface-pytorch-training-2022-05-11-09-33-42-249/output/model.tar.gz"

# Create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    model_data=model_uri,
	transformers_version='4.17.0',
	pytorch_version='1.10.2',
	py_version='py38',
    role=role, 
)

# Deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
    initial_instance_count=1, # number of instances
    instance_type='ml.m5.2xlarge', # instance type
    tags=[{'Key':'owner','Value':'ali@datachef.co'}]
)

-----!

In [58]:
#shakespeare
predictor.predict({"inputs": "paraphrase: The ultimate test of your knowledge is your capacity to convey it to another.",
                   "parameters" : {"do_sample":True, "num_return_sequences":10}})

[{'generated_text': 'The top of your wisdom is thou ability of enactment.'},
 {'generated_text': 'You are then the end in measure of your knowledge, your capacity to have it communicated to'},
 {'generated_text': 'The ultimate point of your knowledge is your capacity to convey it to his.'},
 {'generated_text': "Final proof of your knowledge is your capacity to do it t' other."},
 {'generated_text': "Your knowledge is the test, the end, that 'falsely test of "},
 {'generated_text': 'The test of your knowledge is thy ability to be carried to another.'},
 {'generated_text': 'A truly honest test of your knowledge is your capacity to convey it to those who do not have'},
 {'generated_text': 'The chief test of your knowledge is your rigueur to tell it to another.'},
 {'generated_text': 'You must prove in your knowledge, to communicate it.'},
 {'generated_text': 'The absolute test of thy knowledge is to convey to another.'}]

In [64]:
# Delete the endpoint
predictor.delete_endpoint()