## Test in Sagemaker

In [1]:
!pip install -r ./requirements.txt

Collecting diffusers~=0.21.4 (from -r ./requirements.txt (line 1))
  Using cached diffusers-0.21.4-py3-none-any.whl.metadata (18 kB)
Collecting torch~=2.1.0 (from -r ./requirements.txt (line 2))
  Using cached torch-2.1.2-cp310-cp310-manylinux1_x86_64.whl.metadata (25 kB)
Collecting omegaconf~=2.3.0 (from -r ./requirements.txt (line 4))
  Using cached omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)
Collecting accelerate~=0.23.0 (from -r ./requirements.txt (line 5))
  Using cached accelerate-0.23.0-py3-none-any.whl.metadata (18 kB)
Collecting transformers~=4.34.0 (from -r ./requirements.txt (line 6))
  Using cached transformers-4.34.1-py3-none-any.whl.metadata (121 kB)
Collecting einops (from -r ./requirements.txt (line 8))
  Using cached einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Collecting gradio (from -r ./requirements.txt (line 10))
  Using cached gradio-4.39.0-py3-none-any.whl.metadata (15 kB)
Collecting gradio_imageslider (from -r ./requirements.txt (line 11))
  Using cache

In [12]:
from diffusers import AutoencoderKL
from codes.pipeline_demofusion_sdxl import DemoFusionSDXLPipeline
import torch, gc
from torchvision import transforms
from PIL import Image
import os
import json
import boto3

PREVIEW_EXT = ["png", "jpeg", "jpg"]
GEOTIFF_EXT = ["tiff", "tif", "geotiff"]

def load_and_process_image(pil_image):
    transform = transforms.Compose(
        [
            transforms.Resize((1024, 1024)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
        ]
    )
    image = transform(pil_image)
    image = image.unsqueeze(0).half()
    return image


def pad_image(image):
    w, h = image.size
    if w == h:
        return image
    elif w > h:
        new_image = Image.new(image.mode, (w, w), (0, 0, 0))
        pad_w = 0
        pad_h = (w - h) // 2
        new_image.paste(image, (0, pad_h))
        return new_image
    else:
        new_image = Image.new(image.mode, (h, h), (0, 0, 0))
        pad_w = (h - w) // 2
        pad_h = 0
        new_image.paste(image, (pad_w, 0))
        return new_image

def generate_images(prompt, negative_prompt, height, width, num_inference_steps, guidance_scale, cosine_scale_1, cosine_scale_2, cosine_scale_3, sigma, view_batch_size, stride, seed, image_path):
    input_image = Image.open(image_path)
    w, h = input_image.size
    padded_image = pad_image(input_image).resize((1024, 1024)).convert("RGB")
    image_lr = load_and_process_image(padded_image).to('cuda')
    vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
    pipe = DemoFusionSDXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", vae=vae, torch_dtype=torch.float16)
    pipe = pipe.to("cuda")
    generator = torch.Generator(device='cuda')
    generator = generator.manual_seed(int(seed))
    images = pipe(prompt, negative_prompt=negative_prompt, generator=generator,
                  height=int(height), width=int(width), view_batch_size=int(view_batch_size), stride=int(stride),
                  num_inference_steps=int(num_inference_steps), guidance_scale=guidance_scale,
                  cosine_scale_1=cosine_scale_1, cosine_scale_2=cosine_scale_2, cosine_scale_3=cosine_scale_3, sigma=sigma,
                  multi_decoder=True, show_image=False, lowvram=False, image_lr=image_lr
                 )    
    images_path = list()
    for i, image in enumerate(images):
        image_path = './tmp/image_'+str(i)+'.png' 
        images_path.append(image_path)
        height, width = image.size
        if w < h:
            resize_w = int(w * (height / h))
            edge = (width - resize_w) // 2
            crop_area = (edge, 0, width-edge, height)
            cropped_image = image.crop(crop_area)
        elif w > h:
            resize_h = int(h * (width / w))
            edge = (height - resize_h) // 2
            crop_area = (0, edge, width, height-edge)
            cropped_image = image.crop(crop_area)
        else:
            cropped_image = image
        cropped_image.save(image_path)
    pipe = None
    gc.collect()
    torch.cuda.empty_cache()
    return images_path


def download_from_s3(file_path, bucket, key, region):
    # if file_path exists, no need to download
    if os.path.exists(file_path):
        print("{} exists already".format(file_path))
        return
    s3 = boto3.client("s3", region_name=region)

    print("Downloading s3://{}/{} to {}...".format(bucket, key, file_path))
    s3.download_file(bucket, key, file_path)
    print("S3 download successful! \n")


def upload_to_s3(file_path, bucket, key, region):
    s3 = boto3.client("s3", region_name=region)
    _extension = file_path.split(".")[-1]
    if _extension == "png":
        content_type = "image/png"
    elif _extension in ["jpeg", "jpg"]:
        content_type = "image/jpeg"
    else:
        content_type = "image/tiff"
    print("Uploading to s3://{}/{}...".format(bucket, key))
    s3.upload_file(file_path, bucket, key, ExtraArgs={"ContentType": content_type})
    print("S3 upload successful! \n")


def process_input(data):
    if not os.path.isdir("./tmp"):
        os.mkdir("./tmp")

    if isinstance(data, str):
        model_input = json.loads(data)
    else:
        model_input = json.loads(data.read().decode("utf-8"))

    print("Body:", model_input)

    prompt = model_input["prompt"]
    negative_prompt = model_input["negative_prompt"]
    width = model_input["width"]
    height = model_input["height"]
    num_inference_steps = model_input["num_inference_steps"]
    guidance_scale = model_input["guidance_scale"]
    cosine_scale_1 = model_input["cosine_scale_1"]
    cosine_scale_2 = model_input["cosine_scale_2"]
    cosine_scale_3 = model_input["cosine_scale_3"]
    sigma = model_input["sigma"]
    view_batch_size = model_input["view_batch_size"]
    stride = model_input["stride"]
    seed = model_input["seed"]
    bucket = model_input["bucket"]
    key = model_input["key"]
    region = model_input["region"]


    filename = key.split("/")[-1]
    local_path ="./tmp/"+ filename
    download_from_s3(local_path, bucket, key, region)

    images_path = generate_images(prompt, negative_prompt, height, width, num_inference_steps, guidance_scale, cosine_scale_1, cosine_scale_2, cosine_scale_3, sigma, view_batch_size, stride, seed, local_path)

    return images_path, model_input


def process_output(model_input, images_path):
    response = {}
    response["predictions"] = []
    bucket = model_input["bucket"]
    region = model_input["region"]
    for image_path in images_path:
        image_name = image_path.split("/")[-1]
        key = 'results/' + image_name
        upload_to_s3(image_path, bucket, key, region)
        single_response = {
            "image_s3_path" : {
                "bucket" : bucket,
                "region" : region,
                "key" : key,

            },
        }
        response["predictions"].append(single_response)
    return response



def handler(data, context):
   """
   data:
   {
        "image_input":
        "prompt":
        "negative_prompt":
        "weight":
        "height":
        "num_inference_steps":
        "guidance_scale":
        "cosine_scale_1":
        "cosine_scale_2":
        "cosine_scale_3":
        "sigma":
        "seed":
        "bucket":
        "region":
        "key":

   } 
   """
   images_path, model_input = process_input(data)
   response = process_output(model_input, images_path)

   return json.dumps(response, indent=2)


In [5]:
!nvidia-smi

Tue Jul 30 20:15:13 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA L4                      Off | 00000000:31:00.0 Off |                    0 |
| N/A   36C    P8              11W /  72W |      0MiB / 23034MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [13]:
payload = { 
    "prompt": "a satellite image",
    "negative_prompt": "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
    "width": 2048,
    "height": 2048,
    "num_inference_steps": 50,
    "guidance_scale": 7.5,
    "cosine_scale_1": 3,
    "cosine_scale_2": 1,
    "cosine_scale_3": 1,
    "sigma": 0.8,
    "view_batch_size": 16,
    "stride": 64,
    "seed": 2013,
    "bucket": "test-aws-mybucket",
    "region": "us-west-2",
    "key": "data/sample.png",
}

print(handler(json.dumps(payload), None))

Body: {'prompt': 'a satellite image', 'negative_prompt': 'blurry, ugly, duplicate, poorly drawn, deformed, mosaic', 'width': 2048, 'height': 2048, 'num_inference_steps': 50, 'guidance_scale': 7.5, 'cosine_scale_1': 3, 'cosine_scale_2': 1, 'cosine_scale_3': 1, 'sigma': 0.8, 'view_batch_size': 16, 'stride': 64, 'seed': 2013, 'bucket': 'test-aws-mybucket', 'region': 'us-west-2', 'key': 'data/sample.png'}
./tmp/sample.png exists already


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

### Encoding Real Image ###
### Phase 1 Denoising ###


  0%|          | 0/50 [00:00<?, ?it/s]

### Phase 1 Decoding ###


  0%|          | 0/16 [00:00<?, ?it/s]

### Phase 2 Denoising ###


  0%|          | 0/50 [00:00<?, ?it/s]

### Phase 2 Decoding ###


  0%|          | 0/64 [00:00<?, ?it/s]

Uploading to s3://test-aws-mybucket/results/image_0.png...
S3 upload successful! 

Uploading to s3://test-aws-mybucket/results/image_1.png...
S3 upload successful! 

Uploading to s3://test-aws-mybucket/results/image_2.png...
S3 upload successful! 

{
  "predictions": [
    {
      "image_s3_path": {
        "bucket": "test-aws-mybucket",
        "region": "us-west-2",
        "key": "results/image_0.png"
      }
    },
    {
      "image_s3_path": {
        "bucket": "test-aws-mybucket",
        "region": "us-west-2",
        "key": "results/image_1.png"
      }
    },
    {
      "image_s3_path": {
        "bucket": "test-aws-mybucket",
        "region": "us-west-2",
        "key": "results/image_2.png"
      }
    }
  ]
}


## Inference Endpoint Deployment

In [2]:
import boto3
import json


# Specify your AWS Region
aws_region='us-west-2'

# Create a low-level SageMaker service client.
sagemaker_client = boto3.client('sagemaker', region_name=aws_region)

# Role to give SageMaker permission to access AWS services.
sagemaker_role= "arn:aws:iam::573944535954:role/Super_Resolution"

ecr_image = "573944535954.dkr.ecr.us-west-2.amazonaws.com/super-resolution:latest"
instance_type = "ml.g5.xlarge"


In [4]:
from time import gmtime, strftime

model_name = 'demofusion-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
create_model_response = sagemaker_client.create_model(
    ModelName=model_name,
    PrimaryContainer={
        'Image': ecr_image,
    },
    ExecutionRoleArn = sagemaker_role
)

ClientError: An error occurred (ValidationException) when calling the CreateModel operation: Cannot create already existing model "arn:aws:sagemaker:us-west-2:573944535954:model/superresolution-demofusion".

In [12]:
# The name of the endpoint configuration associated with this endpoint.
endpoint_config_name = "superresolution-demofusion-config"

create_endpoint_config_response = sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "VariantName": "AllTraffic", # The name of the production variant.
            "ModelName": model_name, 
            "InstanceType": instance_type, # Specify the compute instance type.
            "InitialInstanceCount": 1 # Number of instances to launch initially.
        }
    ],
    AsyncInferenceConfig={
        "OutputConfig": {
            # Location to upload response outputs when no location is provided in the request.
            "S3OutputPath": "s3://test-aws-mybucket/results/"
            },        
        }
)

In [13]:
# The name of the endpoint.The name must be unique within an AWS Region in your AWS account.
endpoint_name = 'super-resolution-demofusion' 

create_endpoint_response = sagemaker_client.create_endpoint(
                                            EndpointName=endpoint_name, 
                                            EndpointConfigName=endpoint_config_name) 

resp = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
status = resp['EndpointStatus']
print("Endpoint Status: " + status)

print('Waiting for {} endpoint to be in service...'.format(endpoint_name))
waiter = sagemaker_client.get_waiter('endpoint_in_service')
waiter.wait(EndpointName=endpoint_name)

In [16]:
# Create a low-level client representing Amazon SageMaker Runtime
sagemaker_runtime = boto3.client("sagemaker-runtime", region_name=aws_region)

# Specify the location of the input. Here, a single SVM sample
input_location = "s3://test-aws-mybucket/payload.json"


# After you deploy a model into production using SageMaker hosting 
# services, your client applications use this API to get inferences 
# from the model hosted at the specified endpoint.
response = sagemaker_runtime.invoke_endpoint_async(
                            EndpointName=endpoint_name, 
                            ContentType='application/json',
                            InputLocation=input_location,
                            InvocationTimeoutSeconds=3600)

ValidationError: An error occurred (ValidationError) when calling the InvokeEndpointAsync operation: Endpoint super-resolution-demofusion of account 573944535954 not found.

In [None]:
sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
sagemaker_client.delete_model(ModelName=model_name)

## Inference Endpoint Deployment Version2

In [20]:
import boto3
import json
import sagemaker
from sagemaker import Model
from sagemaker.async_inference import AsyncInferenceConfig

sess = sagemaker.Session()

# Specify your AWS Region
aws_region='us-west-2'

# Role to give SageMaker permission to access AWS services.
sagemaker_role= "arn:aws:iam::573944535954:role/Super_Resolution"

ecr_image = "573944535954.dkr.ecr.us-west-2.amazonaws.com/super-resolution:latest"
model_name = "superresolution-demofusion"
instance_type = "ml.g5.xlarge"
endpoint_name = 'super-resolution-demofusion' 

In [4]:
# Specify the name of your endpoint
endpoint_name='super-resolution-demofusion'

# Create a low-level SageMaker service client.
sagemaker_client = boto3.client('sagemaker', region_name=aws_region)

# Delete endpoint
sagemaker_client.delete_endpoint(EndpointName=endpoint_name)

delete_config_response = sagemaker_client.delete_endpoint_config(
    EndpointConfigName=endpoint_name
)



In [21]:

async_config = AsyncInferenceConfig(
    output_path="s3://test-aws-mybucket/results/",
    failure_path="s3://test-aws-mybucket/results/"
)

estimator = Model(
    name=model_name,
    image_uri=ecr_image,
    role=sagemaker_role,
    source_dir="/opt/ml/code",
    entry_point="inference.py",
    sagemaker_session=sess
)

predictor = estimator.deploy(
    1, 
    instance_type, 
    endpoint_name=endpoint_name, 
    async_inference_config=async_config,
    container_startup_health_check_timeout= 600,
)

Using already existing model: superresolution-demofusion


---------------------------------*

UnexpectedStatusException: Error hosting endpoint super-resolution-demofusion: Failed. Reason: The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint.. Try changing the instance type or reference the troubleshooting page https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference-troubleshooting.html

In [None]:
sm_client = sess.sagemaker_runtime_client

payload = { 
    "prompt": "a satellite image",
    "negative_prompt": "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
    "width": 2048,
    "height": 2048,
    "num_inference_steps": 50,
    "guidance_scale": 7.5,
    "cosine_scale_1": 3,
    "cosine_scale_2": 1,
    "cosine_scale_3": 1,
    "sigma": 0.8,
    "view_batch_size": 16,
    "stride": 64,
    "seed": 2013,
    "bucket": "test-aws-mybucket",
    "region": "us-west-2",
    "key": "data/sample.png",
}
response = sm_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Body=json.dumps(payload),
)

r = response["Body"]
print("RESULT r.read().decode():", r.read().decode())

## Inference local endpoint deployment

In [None]:
!pip install pprint boto3 sagemaker json

In [None]:
import pprint
import boto3

pp = pprint.PrettyPrinter(indent=1)

sm_boto3 = boto3.client("sagemaker")

region = boto3.Session().region_name
account_id = boto3.client("sts").get_caller_identity()["Account"]

image_uri_inference = "573944535954.dkr.ecr.us-west-2.amazonaws.com/super-resolution:latest"

In [None]:
import sagemaker
from sagemaker.local import LocalSession

instance_type_local = "local_gpu"

session_local = LocalSession()
session_local.config = {instance_type_local: {"local_code": True}}
print(type(session_local))

from sagemaker import get_execution_role

role = get_execution_role()

In [None]:
from sagemaker import Model


estimator = Model(
    image_uri=image_uri_inference,
    role=role,
    source_dir="/opt/ml/code",
    entry_point="inference.py", # this argument is used to override internal container entrypoint, if needed!
    sagemaker_session=session_local,  # local session
    #                   predictor_cls=None,
    #                   env=None,
    #                   name=None,
    #                   vpc_config=None,
    #                   enable_network_isolation=False,
    #                   model_kms_key=None,
    #                   image_config=None,
    #                   code_location=None,
    #                   container_log_level=20,
    #                   dependencies=None,
    #                   git_config=None
)

predictor = estimator.deploy(1, instance_type_local)

In [None]:
import json

sagemaker_session = LocalSession()
sagemaker_session.config = {instance_type_local: {"local_code": True}}

sm_client = sagemaker_session.sagemaker_runtime_client


In [None]:
payload = { 
    "prompt": "a satellite image",
    "negative_prompt": "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
    "width": 2048,
    "height": 2048,
    "num_inference_steps": 50,
    "guidance_scale": 7.5,
    "cosine_scale_1": 3,
    "cosine_scale_2": 1,
    "cosine_scale_3": 1,
    "sigma": 0.8,
    "view_batch_size": 16,
    "stride": 64,
    "seed": 2013,
    "bucket": "test-aws-mybucket",
    "region": "us-west-2",
    "key": "data/sample.png",
}


response = sm_client.invoke_endpoint(
    EndpointName="local-endpoint",
    ContentType="application/json",
    Body=json.dumps(payload),
)

r = response["Body"]
print("RESULT r.read().decode():", r.read().decode())