In [None]:
# ML framework details
framework = 'PYTORCH'
framework_version = '2.5.1'

# ML model details
ml_domain = 'NATURAL_LANGUAGE_PROCESSING'
ml_task = 'CLASSIFICATION'

model = "distilbert-base-uncased"

In [None]:
import os
import boto3
import dotenv
dotenv.load_dotenv('.env')

BUCKET_NAME = os.getenv("AWS_BUCKET")
AWS_ROLE_ARN = os.getenv("AWS_ROLE_ARN")
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")
S3_KEY = "sagemaker"


region = "us-east-1"
sagemaker_client = boto3.client(
    'sagemaker', 
    region_name=region, 
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
)

In [None]:
from sagemaker import image_uris

cpu_inference_image = image_uris.retrieve(
    framework="pytorch",
    region=region,
    version="2.5.1",
    py_version="py311",
    instance_type="ml.t2.large",
    image_scope="inference",
)
print(cpu_inference_image)

In [None]:
from sagemaker import image_uris

gpu_inference_image = image_uris.retrieve(
    framework="pytorch",
    region=region,
    version="2.5.1",
    py_version="py311",
    instance_type="ml.p5.48xlarge",
    image_scope="inference",
)
print(gpu_inference_image)

In [None]:
from datetime import datetime

current_date = datetime.now()
string_date = current_date.strftime("%Y-%m-%d-%H-%M-%S")
model_package_group_name = "intent-pytorch-inference-" + string_date
print(model_package_group_name)
model_pacakge_group_response = sagemaker_client.create_model_package_group(
    ModelPackageGroupName=str(model_package_group_name),
    ModelPackageGroupDescription="Intent classify models",
)

print(model_pacakge_group_response)

In [None]:
# import json
# with open('/Users/huynd/ipvm-intent-classify/deploy/intent/payload/test3.json', 'w') as f:
#     json.dump({"text": "give percentages of cloud dvr and hybrid in pysical security market in last 5 years list percentages for each year"}, f)

In [None]:
# import os
# import tarfile

# def compress_model(model_path):
#     dir_name = os.path.dirname(model_path)
#     file_name = os.path.basename(model_path)
#     archive_file_name = f"{file_name}.tar.gz"
#     with tarfile.open(os.path.join(dir_name, archive_file_name), 'w:gz') as tar:
#         tar.add(os.path.join(model_path, "test1.json"), arcname="test1.json")
#         tar.add(os.path.join(model_path, "test2.json"), arcname="test2.json")
#         tar.add(os.path.join(model_path, "test3.json"), arcname="test3.json")

#     return os.path.join(dir_name, archive_file_name)

# compress_model("/Users/huynd/ipvm-intent-classify/deploy/intent/payload")

In [None]:
import sagemaker
model_archive_name = "/Users/huynd/ipvm-intent-classify/deploy/intent/model.tar.gz"
payload_archive_name = "/Users/huynd/ipvm-intent-classify/payload.tar.gz"
# sample_payload_url = sagemaker.Session().upload_data(
#     payload_archive_name, bucket=BUCKET_NAME, key_prefix=S3_KEY
# )
# model_url = sagemaker.Session().upload_data(
#     model_archive_name, bucket=BUCKET_NAME, key_prefix=S3_KEY
# )
sample_payload_url = "s3://ipvm-chatbot/sagemaker/payload.tar.gz"
model_url = "s3://ipvm-chatbot/sagemaker/model.tar.gz"

In [None]:
model_package_version_response = sagemaker_client.create_model_package(
    ModelPackageGroupName=model_package_group_name,
    ModelPackageDescription="PyTorch DistilBERT Inference Recommender",
    Domain=ml_domain,
    Task=ml_task,
    SamplePayloadUrl=sample_payload_url,
    InferenceSpecification={
        "Containers": [
            {
                "ContainerHostname": "pytorch-distilbert",
                "Image": gpu_inference_image,
                "ModelDataUrl": model_url,
                "Framework": framework,
                "NearestModelName": model,
                "Environment": {
                    "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
                    "SAGEMAKER_PROGRAM": "inference.py",
                    "SAGEMAKER_REGION": region,
                    "SAGEMAKER_SUBMIT_DIRECTORY": model_url,
                },
            },
        ],
        "SupportedRealtimeInferenceInstanceTypes": [
            "ml.inf1.xlarge",
            "ml.inf1.2xlarge",
            "ml.g4dn.xlarge",
            "ml.g4dn.2xlarge",
            "ml.g4dn.4xlarge",
            "ml.p3.2xlarge",
        ],
        "SupportedContentTypes": ["application/json"],
        "SupportedResponseMIMETypes": [],
    },
)

print(model_package_version_response)

In [None]:
AWS_ROLE_ARN

In [None]:
import boto3

client = boto3.client("sagemaker", region)

default_job = "pytorch-basic-recommender-job-" + datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
default_response = client.create_inference_recommendations_job(
    JobName=str(default_job),
    JobDescription="PyTorch Inference Basic Recommender Job",
    JobType="Default",
    RoleArn=AWS_ROLE_ARN,
    InputConfig={"ModelPackageVersionArn": model_package_version_response["ModelPackageArn"]},
)

print(default_response)

In [None]:
print(default_job)

In [None]:
%%time
import time

describe_inf_recommender_response = client.describe_inference_recommendations_job(JobName=str(default_job))
while describe_inf_recommender_response["Status"] == "IN_PROGRESS":
    describe_inf_recommender_response = client.describe_inference_recommendations_job(JobName=str(default_job))
    print(describe_inf_recommender_response["Status"])
    time.sleep(15)
    
print(f'Inference recommender completed job with status: {describe_inf_recommender_response["Status"]}')

In [None]:
default_job

In [None]:
describe_inf_recommender_response = client.describe_inference_recommendations_job(JobName=default_job)
describe_inf_recommender_response

In [None]:
import pandas as pd
job_name = str(default_job)

describe_inf_recommender_response = client.describe_inference_recommendations_job(JobName=job_name)

data = [
    {**x["EndpointConfiguration"], **x["ModelConfiguration"], **x["Metrics"]}
    for x in describe_inf_recommender_response["InferenceRecommendations"]
]
df = pd.DataFrame(data)
df.drop("VariantName", inplace=True, axis=1)
pd.set_option("max_colwidth", 400)
df.head()

In [None]:
describe_inf_recommender_response["InferenceRecommendations"]

In [None]:
list_job_steps_response = client.list_inference_recommendations_job_steps(
    JobName=str(default_job)
)
print(list_job_steps_response)

In [None]:
df.to_csv("gpu_test.csv")

In [None]:
df = pd.read_csv("gpu_test.csv")
df

In [None]:
df = pd.read_csv("cpu_test.csv")
df

In [None]:
instance_type = (
    df.sort_values(by=["CostPerHour"]).head(1)["InstanceType"].to_string(index=False).strip()
)
instance_type

In [None]:
model_package_arn = model_package_version_response["ModelPackageArn"]
print("ModelPackage Version ARN : {}".format(model_package_arn))

In [None]:
list_model_packages_response = client.list_model_packages(
    ModelPackageGroupName=model_package_group_name
)
list_model_packages_response

In [None]:
model_version_arn = list_model_packages_response["ModelPackageSummaryList"][0]["ModelPackageArn"]
print(model_version_arn)

In [None]:
client.describe_model_package(ModelPackageName=model_version_arn)

In [None]:
model_package_update_input_dict = {
    "ModelPackageArn": model_package_arn,
    "ModelApprovalStatus": "Approved",
}
model_package_update_response = client.update_model_package(**model_package_update_input_dict)
model_package_update_response

In [None]:
model_name = "pytorch-distilbert-intent-" + datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
print("Model name : {}".format(model_name))

In [None]:
primary_container = {
    "ModelPackageName": model_version_arn,
}

In [None]:
model_version_arn

In [None]:
create_model_respose = client.create_model(
    ModelName=model_name, ExecutionRoleArn=AWS_ROLE_ARN, PrimaryContainer=primary_container
)

print("Model arn : {}".format(create_model_respose["ModelArn"]))

In [None]:
endpoint_config_name = "pytorch-distilbert-intent-endpoint-config-" + datetime.now().strftime(
    "%Y-%m-%d-%H-%M-%S"
)

endpoint_config_response = client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "VariantName": "AllTrafficVariant",
            "ModelName": model_name,
            "InitialInstanceCount": 1,
            "InstanceType": instance_type,
            "InitialVariantWeight": 1,
        },
    ],
)

endpoint_config_response

In [None]:
endpoint_name = "intent-model-endpoint"

create_endpoint_response = client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name,
)

create_endpoint_response

In [None]:
%%time

describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)

while describe_endpoint_response["EndpointStatus"] == "Creating":
    describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
    print(describe_endpoint_response["EndpointStatus"])
    time.sleep(15)

describe_endpoint_response

In [None]:
import time
import json
runtime = boto3.client("sagemaker-runtime")

input_data = {"text": "Tell me about Evolv"}
payload = json.dumps(input_data)
start = time.time()
response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=payload
)
print(f"took {time.time() - start}s")
result = json.loads(response['Body'].read().decode())
result