### Assignment 4

### Following the first couple steps of the lab

In [110]:
# Import necessary libraries - Running the lab
import os
import boto3
import sagemaker
import pandas as pd
import numpy as np
import re
from time import sleep

# Libraies related to model group
import time
from sagemaker import get_execution_role, session
from sagemaker.model_card import (
    ModelCard,
    ModelOverview,
    ObjectiveFunction,
    Function,
    TrainingDetails,
    IntendedUses,
    BusinessDetails,
    EvaluationJob,
    AdditionalInformation,
    Metric,
    MetricGroup,
    ModelCardStatusEnum,
    ObjectiveFunctionEnum,
    FacetEnum,
    RiskRatingEnum,
    MetricTypeEnum,
    EvaluationMetricTypeEnum,
)

In [3]:
# Helper functions
def get_csv_output_from_s3(s3uri, batch_file):
    file_name = "{}.out".format(batch_file)
    match = re.match("s3://([^/]+)/(.*)", "{}/{}".format(s3uri, file_name))
    output_bucket, output_prefix = match.group(1), match.group(2)
    s3.download_file(output_bucket, output_prefix, file_name)
    return pd.read_csv(file_name, sep=",", header=None)

In [4]:
# Define the variables that we need - will be using the same code in the lab demo for the dataset
role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name

bucket = sess.default_bucket()
prefix = "DEMO-breast-cancer-prediction-xgboost-highlevel"

### Getting the data

In [5]:
# Initialize s3
s3 = boto3.client("s3")

filename = "wdbc.csv"
s3.download_file(
    f"sagemaker-example-files-prod-{region}", "datasets/tabular/breast_cancer/wdbc.csv", filename
)
data = pd.read_csv(filename, header=None)

# specify columns extracted from wbdc.names
data.columns = [
    "id",
    "diagnosis",
    "radius_mean",
    "texture_mean",
    "perimeter_mean",
    "area_mean",
    "smoothness_mean",
    "compactness_mean",
    "concavity_mean",
    "concave points_mean",
    "symmetry_mean",
    "fractal_dimension_mean",
    "radius_se",
    "texture_se",
    "perimeter_se",
    "area_se",
    "smoothness_se",
    "compactness_se",
    "concavity_se",
    "concave points_se",
    "symmetry_se",
    "fractal_dimension_se",
    "radius_worst",
    "texture_worst",
    "perimeter_worst",
    "area_worst",
    "smoothness_worst",
    "compactness_worst",
    "concavity_worst",
    "concave points_worst",
    "symmetry_worst",
    "fractal_dimension_worst",
]

# save the data
# data.to_csv("data/assignment_data.csv", sep=",", index=False)

data.sample(8)

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
522,91789,B,11.26,19.83,71.3,388.1,0.08511,0.04413,0.005067,0.005664,...,11.93,26.43,76.38,435.9,0.1108,0.07723,0.02533,0.02832,0.2557,0.07613
314,894047,B,8.597,18.6,54.09,221.2,0.1074,0.05847,0.0,0.0,...,8.952,22.44,56.65,240.1,0.1347,0.07767,0.0,0.0,0.3142,0.08116
63,859196,B,9.173,13.86,59.2,260.9,0.07721,0.08751,0.05988,0.0218,...,10.01,19.23,65.59,310.1,0.09836,0.1678,0.1397,0.05087,0.3282,0.0849
553,924342,B,9.333,21.94,59.01,264.0,0.0924,0.05605,0.03996,0.01282,...,9.845,25.05,62.86,295.8,0.1103,0.08298,0.07993,0.02564,0.2435,0.07393
133,867387,B,15.71,13.93,102.0,761.7,0.09462,0.09462,0.07135,0.05933,...,17.5,19.25,114.3,922.8,0.1223,0.1949,0.1709,0.1374,0.2723,0.07071
269,8910720,B,10.71,20.39,69.5,344.9,0.1082,0.1289,0.08448,0.02867,...,11.69,25.21,76.51,410.4,0.1335,0.255,0.2534,0.086,0.2605,0.08701
519,917080,B,12.75,16.7,82.51,493.8,0.1125,0.1117,0.0388,0.02995,...,14.45,21.74,93.63,624.1,0.1475,0.1979,0.1423,0.08045,0.3071,0.08557
490,91376701,B,12.25,22.44,78.18,466.5,0.08192,0.052,0.01714,0.01261,...,14.17,31.99,92.74,622.9,0.1256,0.1804,0.123,0.06335,0.31,0.08203


In [6]:
# Replace M/B Diagnosis with bool values
data["diagnosis"] = data["diagnosis"].apply(lambda x: ((x == "M")) + 0)
data.sample(8)

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
557,925236,0,9.423,27.88,59.26,271.3,0.08123,0.04971,0.0,0.0,...,10.49,34.24,66.5,330.6,0.1073,0.07158,0.0,0.0,0.2475,0.06969
11,84610002,1,15.78,17.89,103.6,781.0,0.0971,0.1292,0.09954,0.06606,...,20.42,27.28,136.5,1299.0,0.1396,0.5609,0.3965,0.181,0.3792,0.1048
473,9113846,0,12.27,29.97,77.42,465.4,0.07699,0.03398,0.0,0.0,...,13.45,38.05,85.08,558.9,0.09422,0.05213,0.0,0.0,0.2409,0.06743
433,908445,1,18.82,21.97,123.7,1110.0,0.1018,0.1389,0.1594,0.08744,...,22.66,30.93,145.3,1603.0,0.139,0.3463,0.3912,0.1708,0.3007,0.08314
121,86517,1,18.66,17.12,121.4,1077.0,0.1054,0.11,0.1457,0.08665,...,22.25,24.9,145.4,1549.0,0.1503,0.2291,0.3272,0.1674,0.2894,0.08456
241,883539,0,12.42,15.04,78.61,476.5,0.07926,0.03393,0.01053,0.01108,...,13.2,20.37,83.85,543.4,0.1037,0.07776,0.06243,0.04052,0.2901,0.06783
139,868871,0,11.28,13.39,73.0,384.8,0.1164,0.1136,0.04635,0.04796,...,11.92,15.77,76.53,434.0,0.1367,0.1822,0.08669,0.08611,0.2102,0.06784
262,888570,1,17.29,22.13,114.4,947.8,0.08999,0.1273,0.09697,0.07507,...,20.39,27.24,137.9,1295.0,0.1134,0.2867,0.2298,0.1528,0.3067,0.07484


In [7]:
# data split in three sets, training, validation and batch inference
rand_split = np.random.rand(len(data))
train_list = rand_split < 0.8
val_list = (rand_split >= 0.8) & (rand_split < 0.9)
batch_list = rand_split >= 0.9

data_train = data[train_list].drop(["id"], axis=1)
data_val = data[val_list].drop(["id"], axis=1)
data_batch = data[batch_list].drop(["diagnosis"], axis=1)
data_batch_noID = data_batch.drop(["id"], axis=1)

In [8]:
train_file = "train_data.csv"
data_train.to_csv(train_file, index=False, header=False)
sess.upload_data(train_file, key_prefix="{}/train".format(prefix))

validation_file = "validation_data.csv"
data_val.to_csv(validation_file, index=False, header=False)
sess.upload_data(validation_file, key_prefix="{}/validation".format(prefix))

batch_file = "batch_data.csv"
data_batch.to_csv(batch_file, index=False, header=False)
sess.upload_data(batch_file, key_prefix="{}/batch".format(prefix))

batch_file_noID = "batch_data_noID.csv"
data_batch_noID.to_csv(batch_file_noID, index=False, header=False)
sess.upload_data(batch_file_noID, key_prefix="{}/batch".format(prefix))

's3://sagemaker-us-east-1-004608622582/DEMO-breast-cancer-prediction-xgboost-highlevel/batch/batch_data_noID.csv'

### Training and creating the model

In [9]:
%%time
from time import gmtime, strftime

job_name = "xgb-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
output_location = "s3://{}/{}/output/{}".format(bucket, prefix, job_name)
image = sagemaker.image_uris.retrieve(
    framework="xgboost", region=boto3.Session().region_name, version="1.7-1"
)

sm_estimator = sagemaker.estimator.Estimator(
    image,
    role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    volume_size=50,
    input_mode="File",
    output_path=output_location,
    sagemaker_session=sess,
)

sm_estimator.set_hyperparameters(
    objective="binary:logistic",
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.8,
    verbosity=0,
    num_round=100,
)

train_data = sagemaker.inputs.TrainingInput(
    "s3://{}/{}/train".format(bucket, prefix),
    distribution="FullyReplicated",
    content_type="text/csv",
    s3_data_type="S3Prefix",
)
validation_data = sagemaker.inputs.TrainingInput(
    "s3://{}/{}/validation".format(bucket, prefix),
    distribution="FullyReplicated",
    content_type="text/csv",
    s3_data_type="S3Prefix",
)
data_channels = {"train": train_data, "validation": validation_data}

# Start training by calling the fit method in the estimator
sm_estimator.fit(inputs=data_channels, job_name=job_name, logs=True)

INFO:sagemaker:Creating training-job with name: xgb-2024-05-31-23-53-31


2024-05-31 23:53:31 Starting - Starting the training job...
2024-05-31 23:53:47 Starting - Preparing the instances for training...
2024-05-31 23:54:11 Downloading - Downloading input data...
2024-05-31 23:54:36 Downloading - Downloading the training image...
2024-05-31 23:55:32 Training - Training image download completed. Training in progress.
2024-05-31 23:55:32 Uploading - Uploading generated training model.[34m[2024-05-31 23:55:27.555 ip-10-0-202-27.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2024-05-31 23:55:27.576 ip-10-0-202-27.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2024-05-31:23:55:27:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2024-05-31:23:55:27:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning the value itself[0m
[34m[2024-05-31:23:55:27:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2024-05-31:23:5

### Create Batch Transform

In [10]:
%%time

# Create transformer
sm_transformer = sm_estimator.transformer(1, "ml.m4.xlarge")

# start a transform job
input_location = "s3://{}/{}/batch/{}".format(
    bucket, prefix, batch_file_noID
)  # Use input data without ID column

sm_transformer.transform(input_location, content_type="text/csv", split_type="Line")

sm_transformer.wait()

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2024-05-31-23-56-13-241
INFO:sagemaker:Creating transform job with name: sagemaker-xgboost-2024-05-31-23-56-13-876


..........................................[34m[2024-06-01:00:03:19:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2024-06-01:00:03:19:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2024-06-01:00:03:19:INFO] nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;[0m
[34mworker_rlimit_nofile 4096;[0m
[34mevents {
  worker_connections 2048;[0m
[34m}[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /dev/stdout combined;
  upstream gunicorn {
    server unix:/tmp/gunicorn.sock;
  }
  server {
    listen 8080 deferred;
    client_max_body_size 0;
    keepalive_timeout 3;
    location ~ ^/(ping|invocations|execution-parameters) {
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header Host $http_host;
      proxy_redirect off;
      proxy_read_timeout 60s;
      proxy_pass http://gunicorn;
    }

In [11]:
# Grabbing output - display first 8 values
output_df = get_csv_output_from_s3(sm_transformer.output_path, batch_file_noID)
output_df.head(8)

Unnamed: 0,0
0,0.976714
1,0.707197
2,0.993038
3,0.060286
4,0.841149
5,0.968204
6,0.031473
7,0.00906


In [12]:
# Join input with prediction results

# Content_type / accept and split_type / assemble_with are required to use IO joining feature
sm_transformer.assemble_with = "Line"
sm_transformer.accept = "text/csv"

# Start a transform job
input_location = "s3://{}/{}/batch/{}".format(
    bucket, prefix, batch_file
)  

# Use input data with ID column cause InputFilter will filter it out
sm_transformer.transform(
    input_location,
    split_type="Line",
    content_type="text/csv",
    input_filter="$[1:]",
    join_source="Input",
)

sm_transformer.wait()

INFO:sagemaker:Creating transform job with name: sagemaker-xgboost-2024-06-01-00-04-18-898


............................................[34m[2024-06-01:00:11:37:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2024-06-01:00:11:37:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2024-06-01:00:11:37:INFO] nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;[0m
[34mworker_rlimit_nofile 4096;[0m
[34mevents {
  worker_connections 2048;[0m
[34m}[0m
[35m[2024-06-01:00:11:37:INFO] No GPUs detected (normal if no gpus installed)[0m
[35m[2024-06-01:00:11:37:INFO] No GPUs detected (normal if no gpus installed)[0m
[35m[2024-06-01:00:11:37:INFO] nginx config: [0m
[35mworker_processes auto;[0m
[35mdaemon off;[0m
[35mpid /tmp/nginx.pid;[0m
[35merror_log  /dev/stderr;[0m
[35mworker_rlimit_nofile 4096;[0m
[35mevents {
  worker_connections 2048;[0m
[35m}[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /dev/stdo

In [13]:
# Showcase outputs from the batch
output_df = get_csv_output_from_s3(sm_transformer.output_path, batch_file)
output_df.head(8)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,84458202,13.71,20.83,90.2,577.9,0.1189,0.1645,0.09366,0.05985,0.2196,...,28.14,110.6,897.0,0.1654,0.3682,0.2678,0.1556,0.3196,0.1151,0.976714
1,845636,16.02,23.24,102.7,797.8,0.08206,0.06669,0.03299,0.03323,0.1528,...,33.88,123.8,1150.0,0.1181,0.1551,0.1459,0.09975,0.2948,0.08452,0.707197
2,854002,19.27,26.47,127.9,1162.0,0.09401,0.1719,0.1657,0.07593,0.1853,...,30.9,161.4,1813.0,0.1509,0.659,0.6091,0.1785,0.3672,0.1123,0.993038
3,855167,13.44,21.58,86.18,563.0,0.08162,0.06031,0.0311,0.02031,0.1784,...,30.25,102.5,787.9,0.1094,0.2043,0.2085,0.1112,0.2994,0.07146,0.060286
4,85638502,13.17,21.81,85.42,531.5,0.09714,0.1047,0.08259,0.05252,0.1746,...,29.89,105.5,740.7,0.1503,0.3904,0.3728,0.1607,0.3693,0.09618,0.841149
5,857010,18.65,17.6,123.7,1076.0,0.1099,0.1686,0.1974,0.1009,0.1907,...,21.32,150.6,1567.0,0.1679,0.509,0.7345,0.2378,0.3799,0.09185,0.968204
6,857343,11.76,21.6,74.72,427.9,0.08637,0.04966,0.01657,0.01115,0.1495,...,25.72,82.98,516.5,0.1085,0.08615,0.05523,0.03715,0.2433,0.06563,0.031473
7,857810,13.05,19.31,82.61,527.2,0.0806,0.03789,0.000692,0.004167,0.1819,...,22.25,90.24,624.1,0.1021,0.06191,0.001845,0.01111,0.2439,0.06289,0.00906


In [14]:
# Update output filter to showcase only the ID and prediction results
sm_transformer.transform(
    input_location,
    split_type="Line",
    content_type="text/csv",
    input_filter="$[1:]",
    join_source="Input",
    output_filter="$[0,-1]",
)
sm_transformer.wait()

INFO:sagemaker:Creating transform job with name: sagemaker-xgboost-2024-06-01-00-12-23-876


.............................................
[34m[2024-06-01:00:19:55:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2024-06-01:00:19:55:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2024-06-01:00:19:55:INFO] nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;[0m
[34mworker_rlimit_nofile 4096;[0m
[34mevents {
  worker_connections 2048;[0m
[34m}[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /dev/stdout combined;
  upstream gunicorn {
    server unix:/tmp/gunicorn.sock;
  }
  server {
    listen 8080 deferred;
    client_max_body_size 0;
    keepalive_timeout 3;
    location ~ ^/(ping|invocations|execution-parameters) {
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header Host $http_host;
      proxy_redirect off;
      proxy_read_timeout 60s;
      proxy_pass http://gunicorn;
 

In [15]:
# Inspect new ouptut for the transformer
output_df = get_csv_output_from_s3(sm_transformer.output_path, batch_file)
output_df.head(8)

Unnamed: 0,0,1
0,84458202,0.976714
1,845636,0.707197
2,854002,0.993038
3,855167,0.060286
4,85638502,0.841149
5,857010,0.968204
6,857343,0.031473
7,857810,0.00906


### Upload the Sagemaker Model created during our training job to the Sagemaker Model Registry

In [16]:
sagemaker = boto3.client("sagemaker")

model_name = job_name
print(model_name)


info = sagemaker.describe_training_job(TrainingJobName=model_name)
model_data = info["ModelArtifacts"]["S3ModelArtifacts"]

primary_container = {"Image": image, "ModelDataUrl": model_data}

# Save our model to the Sagemaker Model Registry
create_model_response = sagemaker.create_model(
    ModelName=model_name, ExecutionRoleArn=role, PrimaryContainer=primary_container
)

print(create_model_response["ModelArn"])

xgb-2024-05-31-23-53-31
arn:aws:sagemaker:us-east-1:004608622582:model/xgb-2024-05-31-23-53-31


### Create Endpont Configuration

In [17]:
# Create Endpoint Configuration


# Create an endpoint config name. Here we create one based on the date  
# so it we can search endpoints based on creation time.
endpoint_config_name = 'lab4-1-endpoint-config' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())                            
                            
instance_type = 'ml.m5.xlarge'

endpoint_config_response = sagemaker.create_endpoint_config(
    EndpointConfigName=endpoint_config_name, # You will specify this name in a CreateEndpoint request.
    # List of ProductionVariant objects, one for each model that you want to host at this endpoint.
    ProductionVariants=[
        {
            "VariantName": "variant1", # The name of the production variant.
            "ModelName": model_name, 
            "InstanceType": instance_type, # Specify the compute instance type.
            "InitialInstanceCount": 1 # Number of instances to launch initially.
        }
    ]
)

print(f"Created EndpointConfig: {endpoint_config_response['EndpointConfigArn']}")

Created EndpointConfig: arn:aws:sagemaker:us-east-1:004608622582:endpoint-config/lab4-1-endpoint-config2024-06-01-00-20-29


In [18]:
# Deploy our model to real-time endpoint

endpoint_name = 'lab4-1-endpoint' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())                            


create_endpoint_response = sagemaker.create_endpoint(
                                            EndpointName=endpoint_name, 
                                            EndpointConfigName=endpoint_config_name) 

In [19]:
# Wait for endpoint to spin up

sagemaker.describe_endpoint(EndpointName=endpoint_name)

while True:
    print("Getting Job Status")
    res = sagemaker.describe_endpoint(EndpointName=endpoint_name)
    state = res["EndpointStatus"]
    
    if state == "InService":
        print("Endpoint in Service")
        break
    elif state == "Creating":
        print("Endpoint still creating...")
        sleep(60)
    else:
        print("Endpoint Creation Error - Check Sagemaker Console")
        break

Getting Job Status
Endpoint still creating...
Getting Job Status
Endpoint still creating...
Getting Job Status
Endpoint still creating...
Getting Job Status
Endpoint still creating...
Getting Job Status
Endpoint in Service


In [20]:
# Invoke Endpoint

sagemaker_runtime = boto3.client("sagemaker-runtime", region_name=region)

response = sagemaker_runtime.invoke_endpoint(
                            EndpointName=endpoint_name,
                            ContentType='text/csv',
                            Body=data_batch_noID.to_csv(header=None, index=False).strip('\n').split('\n')[0]
                            )
print(response['Body'].read().decode('utf-8'))

0.9767142534255981



In [21]:
# Checkout out the response
response

{'ResponseMetadata': {'RequestId': '73129524-76b5-43d7-b2f1-757d074f3bd7',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '73129524-76b5-43d7-b2f1-757d074f3bd7',
   'x-amzn-invoked-production-variant': 'variant1',
   'date': 'Sat, 01 Jun 2024 00:24:30 GMT',
   'content-type': 'text/csv; charset=utf-8',
   'content-length': '19',
   'connection': 'keep-alive'},
  'RetryAttempts': 0},
 'ContentType': 'text/csv; charset=utf-8',
 'InvokedProductionVariant': 'variant1',
 'Body': <botocore.response.StreamingBody at 0x7f371978d6f0>}

In [22]:
# Delete Endpoint
#sagemaker.delete_endpoint(EndpointName=endpoint_name)

### Part 1
#### Setup Group Model

In [88]:
# Notes
# region = region
# role = role

# Create sm client
sm_client = boto3.client('sagemaker', region_name=region)

In [89]:
# Create Model Group
model_package_group_name = "breast-cancer-group-name-detector-" + str(round(time.time()))
model_package_group_input_dict = {
 "ModelPackageGroupName" : model_package_group_name,
 "ModelPackageGroupDescription" : "Sample model package group"
}

create_model_package_group_response = sm_client.create_model_package_group(**model_package_group_input_dict)
print('ModelPackageGroup Arn : {}'.format(create_model_package_group_response['ModelPackageGroupArn']))

ModelPackageGroup Arn : arn:aws:sagemaker:us-east-1:004608622582:model-package-group/breast-cancer-group-name-detector-1717207799


In [90]:
# Running the describe_model_package_group
response = sm_client.describe_model_package_group(
    ModelPackageGroupName=model_package_group_name
)

print(response)

{'ModelPackageGroupName': 'breast-cancer-group-name-detector-1717207799', 'ModelPackageGroupArn': 'arn:aws:sagemaker:us-east-1:004608622582:model-package-group/breast-cancer-group-name-detector-1717207799', 'ModelPackageGroupDescription': 'Sample model package group', 'CreationTime': datetime.datetime(2024, 6, 1, 2, 9, 59, 60000, tzinfo=tzlocal()), 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:004608622582:user-profile/d-ot3x26nvt9y2/pthai', 'UserProfileName': 'pthai', 'DomainId': 'd-ot3x26nvt9y2', 'IamIdentity': {'Arn': 'arn:aws:sts::004608622582:assumed-role/LabRole/SageMaker', 'PrincipalId': 'AROAQCEVR773FGX7Y4SZW:SageMaker'}}, 'ModelPackageGroupStatus': 'Completed', 'ResponseMetadata': {'RequestId': '67055d90-500b-4482-ae4d-5aa088fdeeb4', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '67055d90-500b-4482-ae4d-5aa088fdeeb4', 'content-type': 'application/x-amz-json-1.1', 'content-length': '623', 'date': 'Sat, 01 Jun 2024 02:10:05 GMT'}, 'RetryAttempts': 0}}

In [95]:
sm_client.list_model_packages(ModelPackageGroupName="breast-cancer-group-name-detector-1717207799")

{'ModelPackageSummaryList': [],
 'ResponseMetadata': {'RequestId': 'dbff337a-8236-4d0a-8171-14af14e9f3bb',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'dbff337a-8236-4d0a-8171-14af14e9f3bb',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '30',
   'date': 'Sat, 01 Jun 2024 02:15:24 GMT'},
  'RetryAttempts': 0}}

### Part 2
#### Set Up Model Package

In [96]:
# Specify model source
model_url = "s3://sagemaker-us-east-1-004608622582/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2024-05-31-19-31-38/xgb-2024-05-31-19-31-38/output/model.tar.gz"
image_uri = "683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1"

modelpackage_inference_specification =  {
    "InferenceSpecification": {
        "Containers": [
            {
                "Image": image_uri,
                "ModelDataUrl": model_url
            }
        ],
        "SupportedTransformInstanceTypes": [
            'ml.m4.xlarge'
        ],
        "SupportedRealtimeInferenceInstanceTypes": [
            'ml.m4.xlarge'
        ],
        "SupportedContentTypes": ["text/csv"],
        "SupportedResponseMIMETypes": ["text/csv"],
    }
}
modelpackage_inference_specification

{'InferenceSpecification': {'Containers': [{'Image': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1',
    'ModelDataUrl': 's3://sagemaker-us-east-1-004608622582/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2024-05-31-19-31-38/xgb-2024-05-31-19-31-38/output/model.tar.gz'}],
  'SupportedTransformInstanceTypes': ['ml.m4.xlarge'],
  'SupportedRealtimeInferenceInstanceTypes': ['ml.m4.xlarge'],
  'SupportedContentTypes': ['text/csv'],
  'SupportedResponseMIMETypes': ['text/csv']}}

In [97]:
# Create model package input dictionary
model_package_name = "breast-cancer-model-name-detector-" + str(round(time.time()))
create_model_package_input_dict = { 
    "ModelPackageGroupName" : model_package_group_name,
    "ModelPackageDescription" : "Model to detect breast cancer",
    "ModelApprovalStatus" : "PendingManualApproval"
}
create_model_package_input_dict.update(modelpackage_inference_specification)

In [98]:
create_model_package_input_dict

{'ModelPackageGroupName': 'breast-cancer-group-name-detector-1717207799',
 'ModelPackageDescription': 'Model to detect breast cancer',
 'ModelApprovalStatus': 'PendingManualApproval',
 'InferenceSpecification': {'Containers': [{'Image': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1',
    'ModelDataUrl': 's3://sagemaker-us-east-1-004608622582/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2024-05-31-19-31-38/xgb-2024-05-31-19-31-38/output/model.tar.gz'}],
  'SupportedTransformInstanceTypes': ['ml.m4.xlarge'],
  'SupportedRealtimeInferenceInstanceTypes': ['ml.m4.xlarge'],
  'SupportedContentTypes': ['text/csv'],
  'SupportedResponseMIMETypes': ['text/csv']}}

In [99]:
# Create model package
create_model_package_response = sm_client.create_model_package(**create_model_package_input_dict)
model_package_arn = create_model_package_response["ModelPackageArn"]
print('ModelPackage Version ARN : {}'.format(model_package_arn))

ModelPackage Version ARN : arn:aws:sagemaker:us-east-1:004608622582:model-package/breast-cancer-group-name-detector-1717207799/1


In [100]:
# Running the describe_model_package_group 
response = sm_client.describe_model_package(
    ModelPackageName="arn:aws:sagemaker:us-east-1:004608622582:model-package/breast-cancer-group-name-detector-1717207799/1"
)

print(response)

{'ModelPackageGroupName': 'breast-cancer-group-name-detector-1717207799', 'ModelPackageVersion': 1, 'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:004608622582:model-package/breast-cancer-group-name-detector-1717207799/1', 'ModelPackageDescription': 'Model to detect breast cancer', 'CreationTime': datetime.datetime(2024, 6, 1, 2, 20, 40, 742000, tzinfo=tzlocal()), 'InferenceSpecification': {'Containers': [{'Image': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1', 'ImageDigest': 'sha256:cf81520a3b695293022793e292cf8bc3732b79231a6ebe1fb308086f6163a875', 'ModelDataUrl': 's3://sagemaker-us-east-1-004608622582/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2024-05-31-19-31-38/xgb-2024-05-31-19-31-38/output/model.tar.gz'}], 'SupportedTransformInstanceTypes': ['ml.m4.xlarge'], 'SupportedRealtimeInferenceInstanceTypes': ['ml.m4.xlarge'], 'SupportedContentTypes': ['text/csv'], 'SupportedResponseMIMETypes': ['text/csv']}, 'ModelPackageStatus': 'Completed', 'Mo

### Part 3 
#### Creating the Model Card

In [101]:
# Create the model card based on our ARN
mp_details = ModelPackage.from_model_package_arn(
    model_package_arn="arn:aws:sagemaker:us-east-1:004608622582:model-package/breast-cancer-group-name-detector-1717207799/1",
    sagemaker_session=sess,
)

In [102]:
print('model pacakge', mp_details.__dict__)

model pacakge {'model_package_arn': 'arn:aws:sagemaker:us-east-1:004608622582:model-package/breast-cancer-group-name-detector-1717207799/1', 'model_package_description': 'Model to detect breast cancer', '_model_package_status': <ModelPackageStatusEnum.COMPLETED: 'Completed'>, '_model_approval_status': <ModelApprovalStatusEnum.PENDING_MANUAL_APPROVAL: 'PendingManualApproval'>, 'approval_description': None, 'model_package_group_name': 'breast-cancer-group-name-detector-1717207799', 'model_package_name': None, 'model_package_version': 1, 'domain': None, 'task': None, '_created_by': <sagemaker.model_card.model_card.ModelPackageCreator object at 0x7f3719a2b790>, '_source_algorithms': [], '_inference_specification': <sagemaker.model_card.model_card.InferenceSpecification object at 0x7f3718811900>, '_model_metrics': None}


In [109]:
# Create the model card
model_card_name = model_package_group_name
my_card = ModelCard(
    name=model_card_name,
    sagemaker_session = sess,
    model_package_details = mp_details
)

INFO:sagemaker.model_card.model_card:Evaluation details auto-discovery was unsuccessful. ModelMetrics was not found in the given model package. Please create one from scratch with EvaluationJob.


In [None]:
# Other Option

In [112]:
model_name="xgb-2024-05-31-23-53-31"

In [113]:
# Create the model
model_overview = ModelOverview.from_model_name(
    model_name=model_name,
    sagemaker_session=sess,
    model_description="Breast Cancer Identification",
    problem_type="Classification",
    algorithm_type="CNNs",
    model_creator="Assignment4",
    model_owner="Assignment4",
)
print(f"Model id: {model_overview.model_id}")
print(f"Model training images: {model_overview.inference_environment.container_image}")
print(f"Model: {model_overview.model_artifact}")

Model id: arn:aws:sagemaker:us-east-1:004608622582:model/xgb-2024-05-31-23-53-31
Model training images: ['683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1']
Model: ['s3://sagemaker-us-east-1-004608622582/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2024-05-31-23-53-31/xgb-2024-05-31-23-53-31/output/model.tar.gz']


In [116]:
# Auto collect training details
objective_function = ObjectiveFunction(
    function=Function(
        function=ObjectiveFunctionEnum.MINIMIZE,
        facet=FacetEnum.LOSS,
    ),
    notes="This is an example objective function.",
)
training_details = TrainingDetails.from_model_overview(
    model_overview=model_overview,
    sagemaker_session=sess,
    objective_function=objective_function,
    training_observations="Add model training observations here.",
)
print(f"Training job id: {training_details.training_job_details.training_arn}")
print(
    f"Training image: {training_details.training_job_details.training_environment.container_image}"
)
print("Training Metrics: ")
print(
    [
        {"name": i.name, "value": i.value}
        for i in training_details.training_job_details.training_metrics
    ]
)

Training job id: arn:aws:sagemaker:us-east-1:004608622582:training-job/xgb-2024-05-31-23-53-31
Training image: ['683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1']
Training Metrics: 
[{'name': 'validation:logloss', 'value': 0.09588000178337097}, {'name': 'train:logloss', 'value': 0.07900000363588333}]


In [117]:
# Collect Evaluation Data
manual_metric_group = MetricGroup(
    name="binary classification metrics",
    metric_data=[Metric(name="accuracy", type=MetricTypeEnum.NUMBER, value=0.5)],
)
example_evaluation_job = EvaluationJob(
    name="Example evaluation job",
    evaluation_observation="Evaluation observations.",
    datasets=["s3://path/to/evaluation/data"],
    metric_groups=[manual_metric_group],
)
evaluation_details = [example_evaluation_job]

In [120]:
# More Details
intended_uses = IntendedUses(
    purpose_of_model="Predict Breaset Cancer",
    intended_uses="Not used except this test.",
    factors_affecting_model_efficiency="No.",
    risk_rating=RiskRatingEnum.LOW,
    explanations_for_risk_rating="Just an example.",
)
business_details = BusinessDetails(
    business_problem="Fighting Cancer",
    business_stakeholders="Patients, Doctors, Hosptital, Family, Friends",
    line_of_business="Health Care",
)
additional_information = AdditionalInformation(
    ethical_considerations="TBD",
    caveats_and_recommendations="Needs some EDA",
    custom_details={"custom details1": "details value"},
)

In [126]:
# Model Card Parameters
model_card_name = "breast-cancer-model-card"
my_card = ModelCard(
    name=model_card_name,
    status=ModelCardStatusEnum.DRAFT,
    model_overview=model_overview,
    training_details=training_details,
    intended_uses=intended_uses,
    business_details=business_details,
    evaluation_details=evaluation_details,
    additional_information=additional_information,
    sagemaker_session=sess,
)


In [127]:
# Create Model Card
my_card.create()
print(f"Model card {my_card.name} is successfully created with id {my_card.arn}")

INFO:sagemaker.model_card.model_card:Creating model card with name: breast-cancer-model-card


Model card breast-cancer-model-card is successfully created with id arn:aws:sagemaker:us-east-1:004608622582:model-card/breast-cancer-model-card


In [129]:
# Descrive Model Card
response = sm_client.describe_model_card(
    ModelCardName=model_card_name
)
print(response)

{'ModelCardArn': 'arn:aws:sagemaker:us-east-1:004608622582:model-card/breast-cancer-model-card', 'ModelCardName': 'breast-cancer-model-card', 'ModelCardVersion': 1, 'Content': '{"model_overview": {"model_id": "arn:aws:sagemaker:us-east-1:004608622582:model/xgb-2024-05-31-23-53-31", "model_name": "xgb-2024-05-31-23-53-31", "model_description": "Breast Cancer Identification", "problem_type": "Classification", "algorithm_type": "CNNs", "model_creator": "Assignment4", "model_owner": "Assignment4", "model_artifact": ["s3://sagemaker-us-east-1-004608622582/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2024-05-31-23-53-31/xgb-2024-05-31-23-53-31/output/model.tar.gz"], "inference_environment": {"container_image": ["683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1"]}}, "intended_uses": {"purpose_of_model": "Predict Breaset Cancer", "intended_uses": "Not used except this test.", "factors_affecting_model_efficiency": "No.", "risk_rating": "Low", "explanations_for_

In [None]:
a

In [130]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>