In [10]:
import boto3, time, json

sess = boto3.Session()
sm = sess.client("sagemaker")
region = sess.region_name
account = sess.client("sts").get_caller_identity().get("Account")
s3_bucket_name = "sagemaker-us-east-1-474916309046"
s3_prefix = "fpe-pii"
sagemaker_role_arn = "arn:aws:iam::474916309046:role/service-role/AmazonSageMaker-ExecutionRole-20201211T145559"
ecr_image_arn = "474916309046.dkr.ecr.us-east-1.amazonaws.com/fpe-pii:latest"

## Create Sagemaker Model

In [7]:
# create sagemaker model
model_file_name = "mdv5a"
model_data = f"s3://{s3_bucket_name}/{s3_prefix}/model/{model_file_name}.tar.gz"
sm_model_name = f"fpe-pii-{model_file_name}"
container = {"Image": ecr_image_arn, "ModelDataUrl": model_data}

In [None]:
# create_model_response = sm.create_model(
#     ModelName=sm_model_name,
#     ExecutionRoleArn=sagemaker_role_arn,
#     PrimaryContainer=container
# )
# print(create_model_response["ModelArn"])

## Create Batch Job

In [4]:
# batch_input = f"s3://{s3_bucket_name}/{s3_prefix}/imagesets/atherton/img/"
# batch_output = f"s3://{s3_bucket_name}/{s3_prefix}/imagesets/atherton/pii/"
batch_input = f"s3://{s3_bucket_name}/fpe/data/parkers-brook/images/"
batch_output = f"s3://{s3_bucket_name}/fpe/data/parkers-brook/pii/"
batch_input, batch_output

('s3://sagemaker-us-east-1-474916309046/fpe/data/parkers-brook/images/',
 's3://sagemaker-us-east-1-474916309046/fpe/data/parkers-brook/pii/')

In [8]:
batch_job_name = 'fpe-pii-batch-' + time.strftime("%Y%m%d%H%M%S", time.gmtime())
batch_request = {
    "TransformJobName": batch_job_name,
    "ModelName": sm_model_name,
    "BatchStrategy": "MultiRecord",
    "TransformInput": {
        "DataSource": {
            "S3DataSource": {
                "S3DataType": "S3Prefix",
                "S3Uri": batch_input
            }
        },
        "CompressionType": "None",
    },
    "TransformOutput": {
        "S3OutputPath": batch_output,
        "AssembleWith": "Line"
    },
    "TransformResources": {
        "InstanceType": "ml.m5.large",
        "InstanceCount": 4
    },
    "ModelClientConfig": {
        "InvocationsTimeoutInSeconds": 3600,
        "InvocationsMaxRetries": 1,
    },
}
batch_request

{'TransformJobName': 'fpe-pii-batch-20230331131956',
 'ModelName': 'fpe-pii-mdv5a',
 'BatchStrategy': 'MultiRecord',
 'TransformInput': {'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',
    'S3Uri': 's3://sagemaker-us-east-1-474916309046/fpe/data/parkers-brook/images/'}},
  'CompressionType': 'None'},
 'TransformOutput': {'S3OutputPath': 's3://sagemaker-us-east-1-474916309046/fpe/data/parkers-brook/pii/',
  'AssembleWith': 'Line'},
 'TransformResources': {'InstanceType': 'ml.m5.large', 'InstanceCount': 4},
 'ModelClientConfig': {'InvocationsTimeoutInSeconds': 3600,
  'InvocationsMaxRetries': 1}}

In [9]:
sm.create_transform_job(**batch_request)

{'TransformJobArn': 'arn:aws:sagemaker:us-east-1:474916309046:transform-job/fpe-pii-batch-20230331131956',
 'ResponseMetadata': {'RequestId': 'c7ca2f74-da87-4cf6-8d6b-df40b53ace33',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'c7ca2f74-da87-4cf6-8d6b-df40b53ace33',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '105',
   'date': 'Fri, 31 Mar 2023 13:19:59 GMT'},
  'RetryAttempts': 0}}

## Process Output

In [19]:
lambda_client = boto3.client('lambda')

# Define the input for the Lambda function
input_data = {
  'bucketName': "sagemaker-us-east-1-474916309046",
  # 'folderName': "fpe-pii/imagesets/atherton/pii/",
  # 'outputFileName': "fpe-pii/imagesets/atherton/pii-output.json"
  'folderName': "fpe/data/parkers-brook/pii/",
  'outputFileName': "fpe/data/parkers-brook/pii-output.json"
}

# Convert the input to a JSON string
payload = json.dumps(input_data)
payload

'{"bucketName": "sagemaker-us-east-1-474916309046", "folderName": "fpe/data/parkers-brook/pii/", "outputFileName": "fpe/data/parkers-brook/pii-output.json"}'

In [29]:
%%time
# Invoke the Lambda function
response = lambda_client.invoke(
  FunctionName='fpe-pii-output',
  InvocationType='Event',
  Payload=payload
)

# Print the response from the Lambda function
print(response)

{'ResponseMetadata': {'RequestId': '7c2587b0-5aae-490b-b78e-c68bc06a3218', 'HTTPStatusCode': 202, 'HTTPHeaders': {'date': 'Fri, 31 Mar 2023 14:50:03 GMT', 'content-length': '0', 'connection': 'keep-alive', 'x-amzn-requestid': '7c2587b0-5aae-490b-b78e-c68bc06a3218', 'x-amzn-remapped-content-length': '0', 'x-amzn-trace-id': 'root=1-6426f31b-0bfc2e4264f86f4c04077377;sampled=0'}, 'RetryAttempts': 0}, 'StatusCode': 202, 'Payload': <botocore.response.StreamingBody object at 0x116c24df0>}
CPU times: user 8.12 ms, sys: 2.25 ms, total: 10.4 ms
Wall time: 228 ms
