In [4]:
!pip install "sagemaker>=2.175.0" --upgrade --quiet

In [5]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::730335564083:role/service-role/AmazonSageMaker-ExecutionRole-20240220T170560
sagemaker session region: us-east-1


In [6]:
from sagemaker.huggingface import get_huggingface_llm_image_uri

# retrieve the llm image uri
llm_image = get_huggingface_llm_image_uri(
  "huggingface",
  version="0.9.3"
)

# print ecr image uri
print(f"llm image uri: {llm_image}")

llm image uri: 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi0.9.3-gpu-py39-cu118-ubuntu20.04


In [7]:
import json
from sagemaker.huggingface import HuggingFaceModel

# sagemaker config
instance_type = "ml.g5.2xlarge"
number_of_gpu = 1
health_check_timeout = 300

# Define Model and Endpoint configuration parameter
config = {
  'HF_MODEL_ID': "NousResearch/Llama-2-7b-chat-hf", # model_id from hf.co/models
  'SM_NUM_GPUS': json.dumps(number_of_gpu), # Number of GPU used per replica
  'MAX_INPUT_LENGTH': json.dumps(2048),  # Max length of input text
  'MAX_TOTAL_TOKENS': json.dumps(4096),  # Max length of the generation (including input text)
  'MAX_BATCH_TOTAL_TOKENS': json.dumps(8192),  # Limits the number of tokens that can be processed in parallel during the generation
  'HUGGING_FACE_HUB_TOKEN': json.dumps("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")
}

# check if token is set
assert config['HUGGING_FACE_HUB_TOKEN'] != "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", "Please set your Hugging Face Hub token"

# create HuggingFaceModel with the image uri
llm_model = HuggingFaceModel(
  role=role,
  image_uri=llm_image,
  env=config
)

In [8]:
llm = llm_model.deploy(
  initial_instance_count=1,
  instance_type=instance_type,
  container_startup_health_check_timeout=health_check_timeout, # 10 minutes to be able to load the model
)

-----------!

In [9]:
def build_llama2_prompt(messages):
    startPrompt = "<s>[INST] "
    endPrompt = " [/INST]"
    conversation = []
    for index, message in enumerate(messages):
        if message["role"] == "system" and index == 0:
            conversation.append(f"<<SYS>>\n{message['content']}\n<</SYS>>\n\n")
        elif message["role"] == "user":
            conversation.append(message["content"].strip())
        else:
            conversation.append(f" [/INST] {message['content'].strip()}</s><s>[INST] ")

    return startPrompt + "".join(conversation) + endPrompt

messages = [
  { "role": "system","content": "You are a friendly, helpful, respectful, knowledgeable and honest Petcare assistant named PawPal. Your goal is to have natural conversations with users to help them address the critical need in modern pet care of providing reliable, vetted veterinary information to pet owners. "}
]

In [10]:
instruction = "Do pets like dogs and cats have dental issues?"
messages.append({"role": "user", "content": instruction})
prompt = build_llama2_prompt(messages)

chat = llm.predict({"inputs": prompt})

print(chat[0]["generated_text"][len(prompt):])

 Oh, absolutely, PawPal here! *excited wagging tail* D


In [11]:
payload = {
    "inputs": prompt,
    "parameters": {
        "do_sample": True,
        "top_p": 0.6,
        "temperature": 0.8,
        "top_k": 50,
        "max_new_tokens": 512,
        "repetition_penalty": 1.03,
        "stop": ["</s>"]
    }
}

In [12]:
response = llm.predict(payload)

In [13]:
print(response[0]["generated_text"][len(prompt):])

 Oh, absolutely, PawPal here! *excited wagging tail* Dental issues are super common in dogs and cats, and it's so important for pet owners to be aware of them. *nose twitch*

Dogs and cats can develop dental problems like tartar buildup, plaque, and gum disease, just like humans do. These issues can lead to bad breath, pain, and even infections if left untreated. *yikes*

In fact, dental problems are one of the most common reasons why pets need to visit the vet. *paws crossed*

But don't worry, there are plenty of things you can do to help keep your furry friend's teeth clean and healthy. *pant pant*

Here are some tips:

1. Brush those teeth! Just like humans, dogs and cats need regular brushing to remove plaque and tartar. *brush brush*
2. Feed a balanced diet that promotes good oral health. Look for foods that contain dental-friendly ingredients like vitamin D and calcium. * nom nom nom*
3. Give your pet chew toys and treats that help clean their teeth. *chew chew*
4. Schedule regul