In [5]:
import boto3
import sagemaker
from time import gmtime, strftime

from sagemaker.model import ModelPackage
from sagemaker.serverless import ServerlessInferenceConfig
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

# ------------------------------------------------------------------
# 0. Basic setup
# ------------------------------------------------------------------
sess = sagemaker.Session()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name
bucket = sess.default_bucket()

sm = boto3.client("sagemaker", region_name=region)
autoscale = boto3.client("application-autoscaling", region_name=region)

print("Region:", region)
print("Bucket:", bucket)
print("Role:", role)

# ‡πÇ‡∏´‡∏•‡∏î model_package_arn ‡∏ó‡∏µ‡πà‡πÑ‡∏î‡πâ‡∏à‡∏≤‡∏Å step register model
%store -r model_package_arn
print("Model package ARN:", model_package_arn)

Region: us-east-1
Bucket: sagemaker-us-east-1-423623839320
Role: arn:aws:iam::423623839320:role/service-role/SageMaker-ExecutionRole-20250705T232334
Model package ARN: arn:aws:sagemaker:us-east-1:423623839320:model-package/retail-demand-model-group/6


In [6]:
# ------------------------------------------------------------------
# 1. Guard: ‡∏ï‡∏£‡∏ß‡∏à‡∏ß‡πà‡∏≤ model package ‡∏°‡∏µ‡∏™‡∏ñ‡∏≤‡∏ô‡∏∞ Approved ‡∏à‡∏£‡∏¥‡∏á‡∏Å‡πà‡∏≠‡∏ô deploy
# ------------------------------------------------------------------
mp_desc = sm.describe_model_package(ModelPackageName=model_package_arn)
approval_status = mp_desc.get("ModelApprovalStatus")
model_pkg_group = mp_desc.get("ModelPackageGroupName")
model_pkg_version = mp_desc.get("ModelPackageVersion")

print("ModelPackageGroupName:", model_pkg_group)
print("ModelPackageVersion  :", model_pkg_version)
print("ModelApprovalStatus  :", approval_status)

if approval_status != "Approved":
    raise RuntimeError(
        f"‚ùå Model package {model_package_arn} is not Approved (status={approval_status}). "
        "Please approve it in SageMaker Model Registry UI before deploying."
    )

print("‚úÖ Model package is Approved. Proceeding to deployment.\n")


ModelPackageGroupName: retail-demand-model-group
ModelPackageVersion  : 6
ModelApprovalStatus  : Approved
‚úÖ Model package is Approved. Proceeding to deployment.



In [11]:

# ------------------------------------------------------------------
# 2. ‡πÄ‡∏•‡∏∑‡∏≠‡∏Å‡πÇ‡∏´‡∏°‡∏î deploy
#    "realtime"   -> endpoint ‡∏õ‡∏Å‡∏ï‡∏¥
#    "autoscaling" -> endpoint ‡∏õ‡∏Å‡∏ï‡∏¥ + Application Auto Scaling
#    "serverless" -> serverless endpoint
# ------------------------------------------------------------------
DEPLOY_MODE = "serverless"   # ‡∏´‡∏£‡∏∑‡∏≠ "autoscaling" ‡∏´‡∏£‡∏∑‡∏≠ "serverless"

timestamp = strftime("%Y%m%d-%H%M%S", gmtime())
endpoint_name = f"retail-demand-xgb-{DEPLOY_MODE}-{timestamp}"
print("Endpoint name:", endpoint_name)

# ‡∏™‡∏£‡πâ‡∏≤‡∏á ModelPackage object ‡∏à‡∏≤‡∏Å ARN
deploy_model = ModelPackage(
    role=role,
    model_package_arn=model_package_arn,
    sagemaker_session=sess,
)

# ------------------------------------------------------------------
# 3. helper ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö auto scaling
# ------------------------------------------------------------------
def configure_autoscaling_for_endpoint(
    endpoint_name: str,
    min_capacity: int = 1,
    max_capacity: int = 3,
    target_invocations_per_instance: float = 5.0,
    role_arn: str = None,
):
    """
    ‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤ Application Auto Scaling ‡πÉ‡∏´‡πâ endpoint scale ‡∏£‡∏∞‡∏´‡∏ß‡πà‡∏≤‡∏á [min, max]
    ‡∏ï‡∏≤‡∏° metric SageMakerVariantInvocationsPerInstance
    """
    if role_arn is None:
        raise ValueError("role_arn is required for Application Auto Scaling.")

    resource_id = f"endpoint/{endpoint_name}/variant/AllTraffic"
    print("[AS] Register scalable target:", resource_id)

    autoscale.register_scalable_target(
        ServiceNamespace="sagemaker",
        ResourceId=resource_id,
        ScalableDimension="sagemaker:variant:DesiredInstanceCount",
        MinCapacity=min_capacity,
        MaxCapacity=max_capacity,
        RoleARN=role_arn,
        SuspendedState={
            "DynamicScalingInSuspended": False,
            "DynamicScalingOutSuspended": False,
            "ScheduledScalingSuspended": False,
        },
    )

    policy_name = f"{endpoint_name}-invocations-auto-scaling"
    print("[AS] Put scaling policy:", policy_name)

    autoscale.put_scaling_policy(
        PolicyName=policy_name,
        ServiceNamespace="sagemaker",
        ResourceId=resource_id,
        ScalableDimension="sagemaker:variant:DesiredInstanceCount",
        PolicyType="TargetTrackingScaling",
        TargetTrackingScalingPolicyConfiguration={
            "TargetValue": target_invocations_per_instance,
            "PredefinedMetricSpecification": {
                "PredefinedMetricType": "SageMakerVariantInvocationsPerInstance",
            },
            "ScaleOutCooldown": 60,
            "ScaleInCooldown": 300,
        },
    )

    print("[AS] ‚úÖ Auto scaling configured for endpoint:", endpoint_name)




Endpoint name: retail-demand-xgb-serverless-20251206-135312


In [12]:
# ------------------------------------------------------------------
# 4. Deploy ‡∏ï‡∏≤‡∏°‡πÇ‡∏´‡∏°‡∏î‡∏ó‡∏µ‡πà‡πÄ‡∏•‡∏∑‡∏≠‡∏Å
# ------------------------------------------------------------------
predictor = None

if DEPLOY_MODE in ("realtime", "autoscaling"):
    # --- ‡πÇ‡∏´‡∏°‡∏î 1 + 2: realtime endpoint (instance-based) ---
    print(f"[DEPLOY] Deploying realtime endpoint from ModelPackage (mode={DEPLOY_MODE})")

    predictor = deploy_model.deploy(
        initial_instance_count=1,
        instance_type="ml.m5.xlarge",   # ‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡πÑ‡∏î‡πâ‡∏ï‡∏≤‡∏° budget/latency
        endpoint_name=endpoint_name,
        serializer=CSVSerializer(),
        deserializer=JSONDeserializer(),
    )

    print("[DEPLOY] ‚úÖ Realtime endpoint deployed:", endpoint_name)

    if DEPLOY_MODE == "autoscaling":
        configure_autoscaling_for_endpoint(
            endpoint_name=endpoint_name,
            min_capacity=1,
            max_capacity=3,
            target_invocations_per_instance=5.0,
            role_arn=role,
        )

elif DEPLOY_MODE == "serverless":
    # --- ‡πÇ‡∏´‡∏°‡∏î 3: serverless endpoint ---
    print("[DEPLOY] Deploying serverless endpoint from ModelPackage")

    serverless_config = ServerlessInferenceConfig(
        memory_size_in_mb=4096,  # 1024‚Äì6144
        max_concurrency=5,
    )

    predictor = deploy_model.deploy(
        endpoint_name=endpoint_name,
        serverless_inference_config=serverless_config,
        serializer=CSVSerializer(),
        deserializer=JSONDeserializer(),
        wait=True,
        logs=True,
    )

    print("[DEPLOY] ‚úÖ Serverless endpoint deployed:", endpoint_name)

else:
    raise ValueError(f"Unknown DEPLOY_MODE: {DEPLOY_MODE}")

print("\nüéâ Deployed endpoint:", endpoint_name)


[DEPLOY] Deploying serverless endpoint from ModelPackage


----![DEPLOY] ‚úÖ Serverless endpoint deployed: retail-demand-xgb-serverless-20251206-135312

üéâ Deployed endpoint: retail-demand-xgb-serverless-20251206-135312
