In [3]:
#!pip install sagemaker torch boto3

In [12]:
import sagemaker
from sagemaker import image_uris
import boto3
import os
import time
import json
from pathlib import Path
import boto3
import json
import os
import joblib
import pickle
import tarfile
import sagemaker
from sagemaker.estimator import Estimator
import time
from time import gmtime, strftime
import subprocess

role = sagemaker.get_execution_role()  # execution role for the endpoint
sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs
bucket = sess.default_bucket()  # bucket to house artifacts
region = sess._region_name
account_id = sess.account_id()
s3_model_prefix = "djl-sme-sklearn-regression" 

s3_client = boto3.client("s3")
sm_client = boto3.client("sagemaker")
smr_client = boto3.client("sagemaker-runtime")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [4]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
import numpy as np

# Generate dummy data
np.random.seed(0)
X = np.random.rand(100, 1)
y = 2 * X + 1 + 0.1 * np.random.randn(100, 1)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train).float()

# Define a simple Linear Regression model
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(1, 1)  # input size = 1, output size = 1

    def forward(self, x):
        return self.linear(x)

# Instantiate the model
model = LinearRegressionModel()

# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

# Train the model
num_epochs = 500
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

# Save the trained model
model_filename = "model.pth"
torch.save(model.state_dict(), model_filename)

In [6]:
# Load the model back
loaded_model = LinearRegressionModel()
loaded_model.load_state_dict(torch.load(model_filename))
loaded_model.eval()


payload = [[0.5]]

# Sample inference
payload = torch.tensor(payload).float()
res = loaded_model(payload).detach().numpy().tolist()[0]
print(res)

[2.0196571350097656]


In [7]:
%%writefile model.py
#!/usr/bin/env python

import logging
import os
import torch
import torch.nn as nn
from djl_python import Input
from djl_python import Output


class LinearRegressionModel(nn.Module):
    def __init__(self):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(1, 1)  # simple 1D linear regressor

    def forward(self, x):
        return self.linear(x)


class TorchRegressorService(object):
    def __init__(self):
        self.initialized = False

    def initialize(self, properties: dict):
        """
        Initialize model: load from model.pth
        """
        print(os.listdir())
        if os.path.exists("model.pth"):
            self.model = LinearRegressionModel()
            self.model.load_state_dict(torch.load("model.pth", map_location=torch.device('cpu')))
            self.model.eval()
        else:
            raise ValueError("Expecting a model.pth artifact for PyTorch Model Loading")
        self.initialized = True

    def inference(self, inputs):
        """
        Perform inference on the input data
        """
        try:
            data = inputs.get_as_json()
            print(data)
            print(type(data))

            # Convert input data to tensor
            tensor_input = torch.tensor(data, dtype=torch.float32)

            # Predict
            with torch.no_grad():
                prediction = self.model(tensor_input)
                prediction = prediction.numpy().tolist()[0]

            # Prepare output
            outputs = Output()
            outputs.add_as_json(prediction)
        except Exception as e:
            logging.exception("Inference failed")
            outputs = Output().error(str(e))

        print(outputs)
        print(type(outputs))
        print("Returning inference---------")
        return outputs


_service = TorchRegressorService()


def handle(inputs: Input):
    """
    Default handle function
    """
    if not _service.initialized:
        _service.initialize(inputs.get_properties())

    if inputs.is_empty():
        return None

    return _service.inference(inputs)

Writing model.py


In [8]:
%%writefile serving.properties
engine=Python

Writing serving.properties


In [10]:
#Build tar file with model data + inference code, add requirements.txt
import subprocess
bashCommand = "tar -cvpzf model.tar.gz model.pth model.py serving.properties"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

In [13]:
# upload model data to S3
with open("model.tar.gz", "rb") as f:
    s3_client.upload_fileobj(f, bucket, "{}/model.tar.gz".format(s3_model_prefix))

In [14]:
sme_artifacts = "s3://{}/{}/{}".format(bucket, s3_model_prefix, "model.tar.gz")
sme_artifacts

's3://sagemaker-us-east-1-474422712127/djl-sme-sklearn-regression/model.tar.gz'

In [15]:
# replace this with your ECR image URI based off of your region, we are utilizing the CPU image here
inference_image_uri = '763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.29.0-cpu-full'

In [16]:
#Step 1: Model Creation
sme_model_name = "pt-djl-sme" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("Model name: " + sme_model_name)

create_model_response = sm_client.create_model(
    ModelName=sme_model_name,
    ExecutionRoleArn=role,
    PrimaryContainer={"Image": inference_image_uri, "Mode": "SingleModel", "ModelDataUrl": sme_artifacts},
)
model_arn = create_model_response["ModelArn"]

print(f"Created Model: {model_arn}")

Model name: pt-djl-sme2025-04-27-20-56-20
Created Model: arn:aws:sagemaker:us-east-1:474422712127:model/pt-djl-sme2025-04-27-20-56-20


In [17]:
#Step 2: EPC Creation
sme_epc_name = "pt-djl-sme-epc" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName=sme_epc_name,
    ProductionVariants=[
        {
            "VariantName": "sklearnvariant",
            "ModelName": sme_model_name,
            "InstanceType": "ml.c5.xlarge",
            "InitialInstanceCount": 1
        },
    ],
)
print("Endpoint Configuration Arn: " + endpoint_config_response["EndpointConfigArn"])

Endpoint Configuration Arn: arn:aws:sagemaker:us-east-1:474422712127:endpoint-config/pt-djl-sme-epc2025-04-27-20-56-33
