# Endpoint Deployment

In [29]:
import json
import boto3
import sagemaker
from sagemaker.pytorch import PyTorchModel
import numpy as np
from time import sleep

In [30]:
# Upload model to S3 and deploy endpoint
s3 = boto3.client("s3")
role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name

bucket_name = sess.default_bucket()
model_path = "airdata/model.pt"

In [31]:
import tarfile
import shutil
import os
# Define model artifact file name
tar_model_path = "models/model.tar.gz"

# Create a tar.gz file containing the model.pt
with tarfile.open(tar_model_path, "w:gz") as tar:
    tar.add("models/best_model_02.pt", arcname="model.pt")
    tar.add("models/scaler.pkl", arcname="scaler.pkl")


In [32]:
# Upload model file
s3.upload_file(tar_model_path, bucket_name, "airdata/model.tar.gz")
s3_model_uri = f"s3://{bucket_name}/airdata/model.tar.gz"

print("Model uploaded to:", s3_model_uri)

Model uploaded to: s3://sagemaker-us-east-1-790237383528/airdata/model.tar.gz


In [33]:
# Create and deploy SageMaker model
pytorch_model = PyTorchModel(
    model_data=s3_model_uri,
    role=role,
    entry_point="inference.py",
    source_dir="code",
    framework_version="1.12",
    py_version="py38",
)

In [34]:
predictor = pytorch_model.deploy(instance_type="ml.m5.large", initial_instance_count=1)

print("\nSageMaker endpoint deployed at:", predictor.endpoint_name)

------!
SageMaker endpoint deployed at: pytorch-inference-2025-02-23-22-51-34-321


In [35]:
# Query the deployed endpoint for testing
runtime_client = boto3.client("sagemaker-runtime")

In [36]:
def query_endpoint(endpoint_name, input_data):
    response = runtime_client.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType="application/json",
        Body=json.dumps({"data": input_data}),
    )
    result = json.loads(response["Body"].read().decode())
    print("Prediction response:", result)
    return result

## Load in production data to simulate queries to the endpoint

In [37]:
import pandas as pd

production_data = pd.read_csv("data/production_data.csv")
prod_X = production_data["value"].to_numpy().reshape(-1, 1)

## Simulate Production

In [38]:
# Simulate feeding data every 60 seconds
for i in range(len(prod_X)):
    # Send one sample at a time
    input = prod_X[i].tolist()

    # Send data to the endpoint for inference
    result = query_endpoint(predictor.endpoint_name, input)

    # Sleep for 60 seconds before sending the input
    sleep(60)

Prediction response: {'prediction': 6.392635344971632}
Prediction response: {'prediction': 4.80802678583743}


KeyboardInterrupt: 