In [1]:
!pip install "sagemaker>=2.175.0" --upgrade --quiet

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
awscli 1.27.157 requires PyYAML<5.5,>=3.10, but you have pyyaml 6.0.1 which is incompatible.
sparkmagic 0.20.5 requires nest-asyncio==1.5.5, but you have nest-asyncio 1.5.6 which is incompatible.
sparkmagic 0.20.5 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.0.1 which is incompatible.[0m[31m
[0m

In [2]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::509247169753:role/service-role/SageMaker-LLMdeployment
sagemaker session region: us-east-1


In [3]:
from sagemaker.huggingface import get_huggingface_llm_image_uri

# retrieve the llm image uri
llm_image = get_huggingface_llm_image_uri(
  "huggingface",
  version="0.9.3"
)

# print ecr image uri
print(f"llm image uri: {llm_image}")

llm image uri: 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi0.9.3-gpu-py39-cu118-ubuntu20.04


In [4]:
import json
from sagemaker.huggingface import HuggingFaceModel

# sagemaker config
instance_type = "ml.g5.2xlarge"
number_of_gpu = 1
health_check_timeout = 300

# Define Model and Endpoint configuration parameter
config = {
  'HF_MODEL_ID': "NousResearch/Llama-2-7b-chat-hf", # model_id from hf.co/models
  'SM_NUM_GPUS': json.dumps(number_of_gpu), # Number of GPU used per replica
  'MAX_INPUT_LENGTH': json.dumps(2048),  # Max length of input text
  'MAX_TOTAL_TOKENS': json.dumps(4096),  # Max length of the generation (including input text)
  'MAX_BATCH_TOTAL_TOKENS': json.dumps(8192),  # Limits the number of tokens that can be processed in parallel during the generation
  'HUGGING_FACE_HUB_TOKEN': json.dumps("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")
}

# check if token is set
assert config['HUGGING_FACE_HUB_TOKEN'] != "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", "Please set your Hugging Face Hub token"

# create HuggingFaceModel with the image uri
llm_model = HuggingFaceModel(
  role=role,
  image_uri=llm_image,
  env=config
)

In [5]:
llm = llm_model.deploy(
  initial_instance_count=1,
  instance_type=instance_type,
  container_startup_health_check_timeout=health_check_timeout, # 10 minutes to be able to load the model
)

----------!

In [6]:
def build_llama2_prompt(messages):
    startPrompt = "<s>[INST] "
    endPrompt = " [/INST]"
    conversation = []
    for index, message in enumerate(messages):
        if message["role"] == "system" and index == 0:
            conversation.append(f"<<SYS>>\n{message['content']}\n<</SYS>>\n\n")
        elif message["role"] == "user":
            conversation.append(message["content"].strip())
        else:
            conversation.append(f" [/INST] {message['content'].strip()}</s><s>[INST] ")

    return startPrompt + "".join(conversation) + endPrompt

messages = [
  { "role": "system","content": "You are a friendly and knowledgeable vacation planning assistant named Clara. Your goal is to have natural conversations with users to help them plan their perfect vacation. "}
]

In [7]:
instruction = "Give me some ideas what to do when I am free?"
messages.append({"role": "user", "content": instruction})
prompt = build_llama2_prompt(messages)

chat = llm.predict({"inputs": prompt})

print(chat[0]["generated_text"][len(prompt):])

 Oh, wow, I'm so glad you asked! 😊 There


In [8]:
payload = {
    "inputs": prompt,
    "parameters": {
        "do_sample": True,
        "top_p": 0.6,
        "temperature": 0.8,
        "top_k": 50,
        "max_new_tokens": 512,
        "repetition_penalty": 1.03,
        "stop": ["</s>"]
    }
}

In [9]:
response = llm.predict(payload)

In [10]:
print(response[0]["generated_text"][len(prompt):])

 Ah, a perfect question! There are so many fun and exciting things to do on vacation, and it really depends on your interests and preferences. 😊

Have you considered exploring local culture and history? Many destinations have fascinating museums, galleries, and historical sites that offer a glimpse into the area's rich heritage. For example, if you're in Europe, you could visit famous landmarks like the Eiffel Tower in Paris or the Colosseum in Rome. Or, if you're in Asia, you could learn about the ancient civilizations of China or Japan.

Another great option is to experience the local cuisine. Each destination has its own unique flavors and dishes, and food tours are a great way to sample the local flavors. You could try street food in Bangkok, taste wine in Tuscany, or indulge in seafood in Maine. 🍽️

If you're looking for something more adventurous, there are plenty of outdoor activities to choose from. Hiking, biking, and kayaking are great ways to explore the natural beauty of a 