In [None]:
%pip install sagemaker U

In [None]:
import sagemaker
import boto3
sess = sagemaker.Session()

# sagemaker session bucket -> user for uploading data , model and logs
# it will create bucket is something is not exist

sagemaker_session_bucket = None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()
    

# Role Management
try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client("iam")
    role = iam.get_role(RoleName ='sagemaker_execution_role')['Role']['Arn']
    
session = sagemaker.Session(default_bucket=sagemaker_session_bucket)
print(f'sagemaker role arn:{role}')
print(f'aremaker session region :{sess.boto_region_name}')


In [None]:
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel


# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'distilbert/distilbert-base-uncased-distilled-squad',
	'HF_TASK':'question-answering'
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	transformers_version='4.37.0',
	pytorch_version='2.1.0',
	py_version='py310',
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1, # number of instances
	instance_type='ml.m5.xlarge' # ec2 instance type
)

predictor.predict({
	"inputs": {
	"question": "What is my name?",
	"context": "My name is Clara and I live in Berkeley."
},
})


# To check the endpoint when you set up the authentication 

In [None]:
import requests
import json
import boto3
from requests.auth import HTTPBasicAuth

# The URL you provided
url = "https://runtime.sagemaker.ap-southeast-2.amazonaws.com/endpoints/huggingface-pytorch-inference-2024-08-19-12-48-28-089/invocations"

# AWS authentication using Boto3
session = boto3.Session()
credentials = session.get_credentials()
auth = HTTPBasicAuth(credentials.access_key, credentials.secret_key)

# Example payload
data = {
    "input_text": "Hello, how can I help you today?"
}

# Convert the data to JSON format
payload = json.dumps(data)

# Send a POST request to the endpoint
response = requests.post(url, data=payload, headers={"Content-Type": "application/json"}, auth=auth)

# Check the response
if response.status_code == 200:
    print("Success:", response.json())
else:
    print(f"Failed with status code: {response.status_code}")
    print("Response:", response.text)
