# Endpoint Deployment

In [1]:
import json
import boto3
import sagemaker
from sagemaker.pytorch import PyTorchModel
import numpy as np
from time import sleep

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
# Upload model to S3 and deploy endpoint
s3 = boto3.client("s3")
role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name

bucket_name = sess.default_bucket()
model_path = "airdata/model.pt"

In [3]:
import tarfile

# Define model artifact file name
tar_model_path = "model.tar.gz"

# Create a tar.gz file containing the model.pt
with tarfile.open(tar_model_path, "w:gz") as tar:
    tar.add("best_model_03.pt", arcname="model.pt")


In [4]:
# Upload model file
s3.upload_file(tar_model_path, bucket_name, "airdata/model.tar.gz")
s3_model_uri = f"s3://{bucket_name}/airdata/model.tar.gz"

print("Model uploaded to:", s3_model_uri)

Model uploaded to: s3://sagemaker-us-east-1-768099485759/airdata/model.tar.gz


In [5]:
# Create and deploy SageMaker model
pytorch_model = PyTorchModel(
    model_data=s3_model_uri,
    role=role,
    entry_point="inference.py",
    source_dir="code",
    framework_version="1.12",
    py_version="py38",
)

In [6]:
predictor = pytorch_model.deploy(instance_type="ml.m5.large", initial_instance_count=1)

print("\nSageMaker endpoint deployed at:", predictor.endpoint_name)

------!
SageMaker endpoint deployed at: pytorch-inference-2025-02-19-17-18-00-634


In [7]:
# Query the deployed endpoint for testing
runtime_client = boto3.client("sagemaker-runtime")

In [8]:
def query_endpoint(endpoint_name, input_data):
    response = runtime_client.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType="application/json",
        Body=json.dumps({"data": input_data}),
    )
    result = json.loads(response["Body"].read().decode())
    print("Prediction response:", result)
    return result

## Load in production data to simulate queries to the endpoint

In [None]:
# Load production data from the .npz file
production_data = np.load("production_data.npz")
prod_X = production_data["X"]  # Extracting the input data (features)
prod_y = production_data["y"]  # Extracting the labels (if needed)

## Simulate Production

In [2]:
# Simulate feeding data every 60 seconds
for i in range(len(prod_X)):
    # Get the current batch (you may want to adjust this to send larger batches)
    current_batch = prod_X[i : i + 1]  # Send one sample at a time (or increase if batch is needed)

    # Send data to the endpoint for inference
    result = query_endpoint(current_batch)

    # Sleep for 60 seconds before sending the next batch
    sleep(60)

KeyboardInterrupt: 