In [2]:
import sagemaker, boto3, json
from sagemaker.session import Session

sagemaker_session = Session()
aws_role = sagemaker_session.get_caller_identity_arn()
aws_region = boto3.Session().region_name
sess = sagemaker.Session()

from sagemaker import image_uris, model_uris, script_uris
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base

# model_version="*" fetches the latest version of the model.
infer_model_id, infer_model_version = "huggingface-eqa-distilbert-base-uncased", "*"

endpoint_name = name_from_base(f"insight-bert")
inference_instance_type = "ml.p2.xlarge"


In [6]:
def deploy_sagemaker_model(model_id,model_version,endpoint_name,inference_instance_type):
    # model_version="*" fetches the latest version of the model.    
    if inference_instance_type is None:
        inference_instance_type = "ml.p2.xlarge"

    # Retrieve the inference docker container uri.
    deploy_image_uri = image_uris.retrieve(
        region=None,
        framework=None,
        image_scope="inference",
        model_id=model_id,
        model_version=model_version,
        instance_type=inference_instance_type,
    )
    # Retrieve the inference script uri.
    deploy_source_uri = script_uris.retrieve(
        model_id=model_id, model_version=model_version, script_scope="inference"
    )
    # Retrieve the base model uri.
    base_model_uri = model_uris.retrieve(
        model_id=model_id, model_version=model_version, model_scope="inference"
    )
    # Create the SageMaker model instance. Note that we need to pass Predictor class when we deploy model through Model class,
    # for being able to run inference through the sagemaker API.
    model = Model(
        image_uri=deploy_image_uri,
        source_dir=deploy_source_uri,
        model_data=base_model_uri,
        entry_point="inference.py",
        role=aws_role,
        predictor_cls=Predictor,
        name=endpoint_name,
    )
    # deploy the Model.
    base_model_predictor = model.deploy(
        initial_instance_count=1,
        instance_type=inference_instance_type,
        endpoint_name=endpoint_name,
    )   

In [3]:
# create endpoint for insight
deploy_sagemaker_model( "huggingface-eqa-distilbert-base-uncased","*","insight-bert","ml.p2.xlarge")

--------!

In [None]:
endpoint_name="insight-summary"
deploy_sagemaker_model( "huggingface-text2text-flan-t5-large","*",endpoint_name,"ml.p2.xlarge")