In [None]:
import sagemaker
import boto3

account_id = "<ACCOUNT_ID>"
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = "ecomragdev"

role = f"arn:aws:iam::{account_id}:role/SmDeploy"

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

In [None]:
from sagemaker.huggingface.model import HuggingFaceModel
from sagemaker.serverless import ServerlessInferenceConfig


# s3_location="s3://ecomragdev/models/vit-base-patch16-384.tar.gz"
s3_location = "s3://ecomragdev/models/clip-vit-base-patch32.tar.gz"

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    name="ecomrag-img-embed",
    model_data=s3_location,       # path to your model and script
    role=role,                    # iam role with permissions to create an Endpoint
    transformers_version="4.26",  # transformers version used
    pytorch_version="1.13",       # pytorch version used
    py_version='py39',            # python version used
    env={
      'HF_TASK': 'image-classification'
    }
)

serverless_config = ServerlessInferenceConfig(
  memory_size_in_mb=3072,
  max_concurrency=4,
)

huggingface_model.deploy(serverless_inference_config=serverless_config)