### Requirements


In [None]:
!pip install sagemaker python-dotenv --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/844.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━[0m [32m542.7/844.7 kB[0m [31m17.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m844.7/844.7 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.9/135.9 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.0/11.0 MB[0m [31m74.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━

In [None]:
import sagemaker
import boto3

import os
from dotenv import load_dotenv

### 1 - AWS Configuration

In [None]:
# environment variables
load_dotenv()

True

  ### Secret keys

In [None]:
REGION_NAME = "ap-south-1"
os.environ["AWS_DEFAULT_REGION"] = REGION_NAME
ROLE_NAME =  'Sagemaker-ExecutionRole'

auth_arguments = {
    'aws_access_key_id':os.environ["aws_access_key_id"],
    'aws_secret_access_key':os.environ["aws_secret_access_key"],
    'region_name':REGION_NAME
}

[IAM role](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html)

In [None]:
iam = boto3.client('iam', **auth_arguments)
role = iam.get_role(RoleName=ROLE_NAME)['Role']['Arn']

session = sagemaker.Session(boto3.Session(**auth_arguments))

### Deployment

In [None]:
from sagemaker.huggingface import get_huggingface_llm_image_uri

# image uri
llm_image = get_huggingface_llm_image_uri("huggingface")

print(f"image uri: {llm_image}")

image uri: 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.0-tgi0.8.2-gpu-py39-cu118-ubuntu20.04


In [None]:
from sagemaker.huggingface import HuggingFaceModel

# Falcon 7b
hub = {'HF_MODEL_ID':'tiiuae/falcon-7b'}

# Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   env=hub,
   role=role,  # iam role from AWS
   image_uri=llm_image,
   sagemaker_session=session
)

In [None]:
# deploy model to SageMaker
predictor = huggingface_model.deploy(
	initial_instance_count=1, # number of instances
	instance_type='ml.g5.2xlarge', #'ml.g5.4xlarge'
 	container_startup_health_check_timeout=300
)

--------------!

### Inferencing Model

In [None]:
# define prompt
prompt = """You are the most advanced AI assistant on the planet, called Falcon.

User: How can we set up Kubernetes cluster on AWS? Think step by step.
Falcon:"""

# hyperparameters for llm
request = {
  "inputs": prompt,
  "parameters": {
    "do_sample": True,
    "top_p": 0.9,
    "temperature": 0.7,
    "max_new_tokens": 512,
    "stop": ["\nUser:","<|endoftext|>","</s>"]
  }
}

# request to endpoint
response = predictor.predict(request)

# model response
assistant = response[0]["generated_text"][len(prompt):]

In [None]:
print(assistant)

In [None]:
# DELETE ENDPOINT to avoid unnecessary expenses
predictor.delete_model()
predictor.delete_endpoint()