# Endpoint Deployment

In [1]:
import json
import boto3
import sagemaker
from sagemaker.pytorch import PyTorchModel
from datetime import datetime, timedelta
import pandas as pd
import OpenAQ

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
# Upload model to S3 and deploy endpoint
s3 = boto3.client("s3")
role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name

bucket_name = sess.default_bucket()
model_path = "airdata/model.pt"

In [3]:
import tarfile

# Define model artifact file name
tar_model_path = "model.tar.gz"

# Create a tar.gz file containing the model.pt
with tarfile.open(tar_model_path, "w:gz") as tar:
    tar.add("best_model_02.pt", arcname="model.pt")


In [4]:
# Upload model file
s3.upload_file(tar_model_path, bucket_name, "airdata/model.tar.gz")
s3_model_uri = f"s3://{bucket_name}/airdata/model.tar.gz"

print("Model uploaded to:", s3_model_uri)

Model uploaded to: s3://sagemaker-us-east-1-768099485759/airdata/model.tar.gz


In [5]:
# Create and deploy SageMaker model
pytorch_model = PyTorchModel(
    model_data=s3_model_uri,
    role=role,
    entry_point="inference.py",
    source_dir="code",
    framework_version="1.12",
    py_version="py38",
)

In [6]:
predictor = pytorch_model.deploy(instance_type="ml.m5.large", initial_instance_count=1)

print("\nSageMaker endpoint deployed at:", predictor.endpoint_name)

------!
SageMaker endpoint deployed at: pytorch-inference-2025-02-19-17-18-00-634


In [7]:
# Query the deployed endpoint for testing
runtime_client = boto3.client("sagemaker-runtime")

In [8]:
def query_endpoint(endpoint_name, input_data):
    response = runtime_client.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType="application/json",
        Body=json.dumps({"data": input_data}),
    )
    result = json.loads(response["Body"].read().decode())
    print("Prediction response:", result)
    return result

## Load in production data to simulate queries to the endpoint

In [1]:
import pandas as pd

data = pd.read_csv("data/production_data.csv")

# Display the first few rows
data.head()

Unnamed: 0,measurement_id,sensor_id,location_id,location,latitude,longitude,epoch,duration,parameter,value,units
0,2601,2000855,947312,Canyon ES (2795),34.03213,-118.51198,2022-02-20 11:06:44,0 days 00:03:00,pm25,0.248365,µg/m³
1,2602,2000855,947312,Canyon ES (2795),34.03213,-118.51198,2022-02-20 11:12:25,0 days 00:03:00,pm25,0.001906,µg/m³
2,2603,2000855,947312,Canyon ES (2795),34.03213,-118.51198,2022-02-20 11:18:06,0 days 00:03:00,pm25,0.133733,µg/m³
3,2604,2000855,947312,Canyon ES (2795),34.03213,-118.51198,2022-02-20 11:23:47,0 days 00:03:00,pm25,-0.170043,µg/m³
4,2605,2000855,947312,Canyon ES (2795),34.03213,-118.51198,2022-02-20 11:29:28,0 days 00:03:00,pm25,-0.129921,µg/m³


## Simulate Production

In [2]:
from time import sleep

for index, row in data.iterrows():
    query_endpoint(predictor.endpoint_name, [row["val"]])
    sleep(pd.to_timedelta(row["duration"]).seconds)

KeyboardInterrupt: 