In [1]:
import boto3
import sagemaker
from sagemaker.xgboost import XGBoost
from sagemaker.serverless import ServerlessInferenceConfig
from time import gmtime, strftime

# ------------------------------------------------------------------
# 0. ตั้งค่าพื้นฐาน
# ------------------------------------------------------------------
sess = sagemaker.Session()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name
bucket = sess.default_bucket()

sm = boto3.client("sagemaker", region_name=region)
autoscale = boto3.client("application-autoscaling", region_name=region)
print("Region:", region)
print("Bucket:", bucket)
print("Role:", role)



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Region: us-east-1
Bucket: sagemaker-us-east-1-423623839320
Role: arn:aws:iam::423623839320:role/service-role/SageMaker-ExecutionRole-20250705T232334


In [3]:
# โหลด training job ที่เราเทรนไว้ก่อนหน้า
%store -r training_job_name
print("Training job name:", training_job_name)


Training job name: sagemaker-xgboost-2025-12-03-08-55-12-951


In [6]:
xgb_estimator = XGBoost.attach(training_job_name=training_job_name)

2025-12-03 09:00:17 Starting - Preparing the instances for training
2025-12-03 09:00:17 Downloading - Downloading the training image
2025-12-03 09:00:17 Training - Training image download completed. Training in progress.
2025-12-03 09:00:17 Uploading - Uploading generated training model
  import pkg_resources
[2025-12-03 08:57:07.910 ip-10-0-79-58.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2025-12-03 08:57:07.987 ip-10-0-79-58.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.
[2025-12-03:08:57:08:INFO] Imported framework sagemaker_xgboost_container.training
[2025-12-03:08:57:08:INFO] No GPUs detected (normal if no gpus installed)
[2025-12-03:08:57:08:INFO] Invoking user training script.
[2025-12-03:08:57:08:INFO] Module training does not provide a setup.py. 
Generating setup.py
[2025-12-03:08:57:08:INFO] Generating setup.cfg
[2025-12-03:08:57:08:INFO] Generating MANIFEST.in
[2025-12-03:08:57:08:INFO] Installing module with the fol

In [7]:
# ------------------------------------------------------------------
# 1. เลือกโหมด deploy
#    "realtime"   -> endpoint ปกติ
#    "autoscaling" -> endpoint ปกติ + Application Auto Scaling
#    "serverless" -> serverless endpoint
# ------------------------------------------------------------------
DEPLOY_MODE = "realtime"  # เปลี่ยนเป็น "autoscaling" หรือ "serverless" ได้

timestamp = strftime("%Y%m%d-%H%M%S", gmtime())
endpoint_name = f"retail-demand-xgb-{DEPLOY_MODE}-{timestamp}"
print("Endpoint name:", endpoint_name)


Endpoint name: retail-demand-xgb-realtime-20251204-073323


In [8]:
# ------------------------------------------------------------------
# 2. ฟังก์ชันช่วย autoscaling
# ------------------------------------------------------------------
def configure_autoscaling_for_endpoint(
    endpoint_name: str,
    min_capacity: int = 1,
    max_capacity: int = 2,
    target_invocations_per_instance: float = 2.0,
    role_arn: str = None,
):
    """
    ตั้งค่า Application Auto Scaling ให้ endpoint scale ระหว่าง [min, max]
    ตาม metric SageMakerVariantInvocationsPerInstance
    """
    if role_arn is None:
        raise ValueError("role_arn is required for Application Auto Scaling.")

    resource_id = f"endpoint/{endpoint_name}/variant/AllTraffic"
    print("[AS] Register scalable target:", resource_id)

    autoscale.register_scalable_target(
        ServiceNamespace="sagemaker",
        ResourceId=resource_id,
        ScalableDimension="sagemaker:variant:DesiredInstanceCount",
        MinCapacity=min_capacity,
        MaxCapacity=max_capacity,
        RoleARN=role_arn,
        SuspendedState={
            "DynamicScalingInSuspended": False,
            "DynamicScalingOutSuspended": False,
            "ScheduledScalingSuspended": False,
        },
    )

    policy_name = f"{endpoint_name}-invocations-auto-scaling"
    print("[AS] Put scaling policy:", policy_name)

    autoscale.put_scaling_policy(
        PolicyName=policy_name,
        ServiceNamespace="sagemaker",
        ResourceId=resource_id,
        ScalableDimension="sagemaker:variant:DesiredInstanceCount",
        PolicyType="TargetTrackingScaling",
        TargetTrackingScalingPolicyConfiguration={
            "TargetValue": target_invocations_per_instance,
            "PredefinedMetricSpecification": {
                "PredefinedMetricType": "SageMakerVariantInvocationsPerInstance",
            },
            "ScaleOutCooldown": 60,
            "ScaleInCooldown": 300,
        },
    )

    print("[AS] Auto scaling configured for endpoint:", endpoint_name)



In [9]:
# ------------------------------------------------------------------
# 3. Deploy ตามโหมดที่เลือก
# ------------------------------------------------------------------
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

predictor = None

if DEPLOY_MODE in ("realtime", "autoscaling"):
    # --- โหมด 1 + 2: realtime endpoint (instance-based) ---
    print(f"[DEPLOY] Deploying realtime endpoint (mode={DEPLOY_MODE})")

    predictor = xgb_estimator.deploy(
        initial_instance_count=1,
        instance_type="ml.m5.xlarge",
        endpoint_name=endpoint_name,
        serializer=CSVSerializer(),
        deserializer=JSONDeserializer(),
    )

    print("[DEPLOY] Realtime endpoint deployed:", endpoint_name)

    if DEPLOY_MODE == "autoscaling":
        # ตั้ง autoscaling เพิ่ม
        configure_autoscaling_for_endpoint(
            endpoint_name=endpoint_name,
            min_capacity=1,
            max_capacity=2,
            target_invocations_per_instance=2.0,
            role_arn=role,
        )

elif DEPLOY_MODE == "serverless":
    # --- โหมด 3: serverless endpoint ---
    print("[DEPLOY] Deploying serverless endpoint")

    serverless_config = ServerlessInferenceConfig(
        memory_size_in_mb=4096,  # ปรับได้ 1024–6144
        max_concurrency=5,       # concurrent requests
    )

    predictor = xgb_estimator.deploy(
        endpoint_name=endpoint_name,
        serverless_inference_config=serverless_config,
        serializer=CSVSerializer(),
        deserializer=JSONDeserializer(),
    )

    print("[DEPLOY] Serverless endpoint deployed:", endpoint_name)

else:
    raise ValueError(f"Unknown DEPLOY_MODE: {DEPLOY_MODE}")

print("Deployed endpoint:", endpoint_name)


[DEPLOY] Deploying realtime endpoint (mode=realtime)
----------------------------------------------*

Please check the troubleshooting guide for common errors: https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-python-sdk-troubleshooting.html#sagemaker-python-sdk-troubleshooting-create-endpoint
