In [None]:
!df -h

In [28]:
!pip install torch==2.0.1 --quiet
!pip install diffusers==0.30.0 --quiet
!pip install transformers==4.44.0 --quiet
!pip install accelerate==0.30.0 --quiet
!pip install sentencepiece==0.2.0 --quiet
!pip install peft==0.7.0 --quiet

In [None]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client("iam")
    role = iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

In [30]:
from distutils.dir_util import copy_tree
from pathlib import Path

HF_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"

# create model dir
model_tar = Path(f"Model-SDXL")
model_tar.mkdir(exist_ok=True)

In [None]:
from diffusers import DiffusionPipeline
import torch

pipe = DiffusionPipeline.from_pretrained(
    HF_MODEL_ID,
    torch_dtype=torch.float16
)

In [35]:
pipe.save_pretrained(model_tar)

In [None]:
!mkdir code

In [None]:
%%writefile code/requirements.txt
torch==2.0.1
diffusers==0.30.0
transformers==4.41.2
accelerate==0.30.0
sentencepiece==0.2.0
peft==0.7.0

In [None]:
%%writefile code/inference.py
import base64
import torch
from io import BytesIO
from diffusers import DiffusionPipeline


def model_fn(model_dir, context=None):
    pipe = DiffusionPipeline.from_pretrained(model_dir, torch_dtype=torch.float16)
    pipe = pipe.to("cuda")
    pipe.load_lora_weights(
        "lora-trained-xl/", 
        weight_name="pytorch_lora_weights.safetensors"
    )
    
    return pipe


def predict_fn(data, pipe):
    # get prompt & parameters
    prompt = data.pop("inputs", data)
    num_inference_steps = data.pop("num_inference_steps", 1)
    guidance_scale = data.pop("guidance_scale", 1.5)
    num_images_per_prompt = data.pop("num_images_per_prompt", 1)

    # Generate image
    generated_images = pipe(
        prompt,
        num_inference_steps=num_inference_steps,
        guidance_scale=guidance_scale,
        num_images_per_prompt=num_images_per_prompt,
    )["images"]

    # create response
    encoded_images = []
    for image in generated_images:
        buffered = BytesIO()
        image.save(buffered, format="JPEG")
        encoded_images.append(base64.b64encode(buffered.getvalue()).decode())

    # create response
    return {"generated_images": encoded_images}

In [None]:
# copy code/ to model dir
copy_tree("code/", str(model_tar.joinpath("code")))

In [None]:
import tarfile
import os
from pathlib import Path


model_tar = Path(f"Model-SDXL")

# helper to create the model.tar.gz
def compress(tar_dir=None,output_file="model.tar.gz"):
    parent_dir=os.getcwd()
    os.chdir(tar_dir)
    with tarfile.open(os.path.join(parent_dir, output_file), "w:gz") as tar:
        for item in os.listdir("."):
          print(item)
          tar.add(item, arcname=item)
    os.chdir(parent_dir)

compress(str(model_tar))

In [None]:
import sagemaker
from sagemaker.s3 import S3Uploader
sess = sagemaker.Session()

# upload model.tar.gz to s3
s3_model_uri = S3Uploader.upload(local_path="model.tar.gz", desired_s3_uri=f"s3://{sess.default_bucket()}/sdxl-finetuned")

print(f"model uploaded to: {s3_model_uri}")

In [42]:
import sagemaker
from sagemaker.huggingface.model import HuggingFaceModel


# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=s3_model_uri,
   role=sagemaker.get_execution_role(),
   transformers_version="4.37.0",
   pytorch_version="2.1.0",
   py_version="py310",
)

In [None]:
# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.2xlarge"
)

In [44]:
from PIL import Image
from io import BytesIO
from IPython.display import display
import base64
import matplotlib.pyplot as plt

# helper decoder
def decode_base64_image(image_string):
  base64_image = base64.b64decode(image_string)
  buffer = BytesIO(base64_image)
  return Image.open(buffer)

# display PIL images as grid
def display_images(images=None,columns=3, width=100, height=100):
    plt.figure(figsize=(width, height))
    for i, image in enumerate(images):
        plt.subplot(int(len(images) / columns + 1), columns, i + 1)
        plt.axis("off")
        plt.imshow(image)

In [None]:
prompt = "a photo of sks dog in a bucket"

# run prediction
response = predictor.predict(data={
    "inputs": prompt,
    "num_images_per_prompt": 3,
    "num_inference_steps": 50
    }
)

# decode images
decoded_images = [decode_base64_image(image) for image in response["generated_images"]]

# visualize generation
display_images(decoded_images)

In [48]:
predictor.delete_model()
predictor.delete_endpoint()