Skip to content

Commit

Permalink
CLI and notebook examples for text to image diffusion models (#3154)
Browse files Browse the repository at this point in the history
* CLI and notebook example for dreambooth finetuning

* notebook update pipeline

* notebook

---------

Co-authored-by: gaurav <rajguru@microsoft.com>
Co-authored-by: grajguru <grajguru@microsoft.com>
  • Loading branch information
3 people committed May 7, 2024
1 parent f8fe695 commit 673c005
Show file tree
Hide file tree
Showing 6 changed files with 439 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

# Read base64 encoded image from txt file and convert it to image file

import argparse
import re
import json
import io
import base64
from PIL import Image


INPUT_PROMPT_COLUMN = "prompt"
OUTPUT_IMAGE_COLUMN = "generated_image"


def base64_str_to_image(response_file: str):
"""
Read file that contains response from online endpoint. Extarct base64 encoded images and save them as image files.
:param response_file: Path to json file, which has response received from endpoint.
:type response_file: str
:return: None
"""
with open(response_file) as f:
json_str = json.load(f)

json_obj = json.loads(json_str)
for i, obj in enumerate(json_obj):
generated_image = obj[OUTPUT_IMAGE_COLUMN]
img = Image.open(io.BytesIO(base64.b64decode(generated_image)))
text_prompt = ""
if INPUT_PROMPT_COLUMN in obj:
text_prompt = obj[INPUT_PROMPT_COLUMN].strip()
text_prompt = f"Img_{i}_" + re.sub(r"[^a-zA-Z0-9 ]+", "", text_prompt)
text_prompt = text_prompt[:50]
img.save(text_prompt + ".jpg", "JPEG")


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--response_file",
type=str,
default="generated_image.json",
help="File having image response from endpoint.",
)
args, unknown = parser.parse_known_args()

base64_str_to_image(args.response_file)
10 changes: 10 additions & 0 deletions cli/foundation-models/system/finetune/text-to-image/deploy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
instance_type: Standard_NC6s_v3
instance_count: 1
liveness_probe:
initial_delay: 180
period: 180
failure_threshold: 49
timeout: 299
request_settings:
request_timeout_ms: 90000
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/bin/bash
set -x

# script inputs
registry_name="azureml"
subscription_id="<SUBSCRIPTION_ID>"
resource_group_name="<RESOURCE_GROUP>"
workspace_name="<WORKSPACE_NAME>"

cluster_name="sample-finetune-cluster-gpu"

# If above compute cluster does not exist, create it with the following vm size
cluster_sku="Standard_NC6s_v3"

# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
# Setting this to more than the number of GPUs will result in an error.
gpus_per_node=1
# This is the number of nodes in cluster
instance_count=1

# huggingFace model
huggingface_model_name="runwayml/stable-diffusion-v1-5"
# This is the foundation model for finetuning from azureml system registry
aml_registry_model_name="runwayml-stable-diffusion-v1-5"
model_label="latest"

version=$(date +%s)
finetuned_huggingface_model_name="runwayml-stable-diffusion-2-1-dog-text-to-image"
huggingface_endpoint_name="text-to-image-dog-$version"
deployment_name="text2img-dog-mlflow-deploy"
deployment_sku="Standard_NC6s_v3"
request_file="request.json"
response_file="generated_image.json"

# finetuning job parameters
finetuning_pipeline_component="diffusers_text_to_image_dreambooth_pipeline"
# Training settings
process_count_per_instance=$gpus_per_node # set to the number of GPUs available in the compute
instance_count=$instance_count

# 1. Install dependencies
pip install azure-ai-ml==1.8.0
pip install azure-identity==1.13.0

# 2. Setup pre-requisites
az account set -s $subscription_id
workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"

# Check if $compute_cluster_finetune exists, else create it
if az ml compute show --name $cluster_name $workspace_info
then
echo "Compute cluster $cluster_name already exists"
else
echo "Creating compute cluster $cluster_name"
az ml compute create --name $cluster_name --type amlcompute --min-instances 0 --max-instances $instance_count --size $cluster_sku $workspace_info || {
echo "Failed to create compute cluster $cluster_name"
exit 1
}
fi

# Check if the finetuning pipeline component exists
if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
then
echo "Finetuning pipeline component $finetuning_pipeline_component does not exist"
exit 1
fi

# 3. Check if the model exists in the registry
# need to confirm model show command works for registries outside the tenant (aka system registry)
if ! az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name
then
echo "Model $aml_registry_model_name:$model_label does not exist in registry $registry_name"
exit 1
fi

# Get the latest model version
model_version=$(az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name --query version --output tsv)

# 4. Prepare data
# Git clone the DOG dataset
dataset_dir="dog-example"
dataset_url="https://datasets-server.huggingface.co/rows?dataset=diffusers%2Fdog-example&config=default&split=train&offset=0&length=100"
python prepare_data.py --url $dataset_url --dataset_dir $dataset_dir

# Check if training data exist
if [ ! -d $dataset_dir ]; then
echo "Training data $train_data does not exist"
exit 1
fi

# 5. Submit finetuning job using pipeline.yaml for a stable diffusion model

# # If you want to use a HuggingFace model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below
# inputs.model_name=$huggingface_model_name

parent_job_name=$( az ml job create --file "./diffusers-dreambooth-dog-text-to-image.yaml" $workspace_info --query name -o tsv \
--set jobs.huggingface_diffusers_model_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$aml_registry_model_name/versions/$model_version" \
inputs.compute_model_import=$cluster_name inputs.process_count_per_instance=$process_count_per_instance \
inputs.instance_data_dir.path=$dataset_dir \
inputs.instance_count=$instance_count inputs.compute_finetune=$cluster_name) || {
echo "Failed to submit finetuning job"
exit 1
}

az ml job stream --name $parent_job_name $workspace_info || {
echo "job stream failed"; exit 1;
}

# 6. Create model in workspace from train job output for fine-tuned HuggingFace Transformers model
az ml model create --name $finetuned_huggingface_model_name --version $version --type mlflow_model \
--path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info || {
echo "model create in workspace failed"; exit 1;
}

# 7. Deploy the fine-tuned HuggingFace Transformers model to an endpoint
# Create online endpoint
az ml online-endpoint create --name $huggingface_endpoint_name $workspace_info || {
echo "endpoint create failed"; exit 1;
}

# Deploy model from registry to endpoint in workspace

az ml online-deployment create --file ./deploy.yaml --name=$deployment_name $workspace_info --set \
endpoint_name=$huggingface_endpoint_name model=azureml:$finetuned_huggingface_model_name:$version \
instance_type=$deployment_sku || {
echo "deployment create failed"; exit 1;
}

az ml online-endpoint update $workspace_info --name=$huggingface_endpoint_name --traffic="$deployment_name=100" || {
echo "Failed to set all traffic to the new deployment"
exit 1
}

# 8. Try a sample scoring request on the deployed HuggingFace Transformers model

read -r -d '' request_json << EOM
{
"input_data": {"columns": ["prompt"], "index": [0], "data": ["a photo of sks dog in a bucket"]},
"params": {
"height": 512,
"width": 512,
"num_inference_steps": 50,
"guidance_scale": 7.5,
"negative_prompt": ["blurry; three legs"],
"num_images_per_prompt": 2
}
}
EOM

echo "$request_json" > $request_file

az ml online-endpoint invoke --name $huggingface_endpoint_name --request-file $request_file $workspace_info -o json > $response_file || {
echo "endpoint invoke failed"; exit 1;
}

python base64_to_jpeg.py --response_file $response_file


# 9. Delete the endpoint
az ml online-endpoint delete --name $huggingface_endpoint_name $workspace_info --yes || {
echo "endpoint delete failed"; exit 1;
}

# 10. Delete the request data file
rm $huggingface_sample_request_data
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline

experiment_name: AzureML-Train-Finetune-MultiModal-TextToImage-DreamBooth-Samples

inputs:
# # Model - specify the foundation model available in the azureml system registry
mlflow_model_path:
path: azureml://registries/azureml/models/stabilityai-stable-diffusion-2-1/labels/latest
type: mlflow_model
# model_name: stabilityai/stable-diffusion-2-1

# # compute
compute_model_import: sample-model-import-cluster
compute_finetune: sample-finetune-cluster-gpu
process_count_per_instance: 1
instance_count: 1

# # Dataset
instance_data_dir:
path: ./dog-example
type: uri_folder

outputs:
# Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint
trained_model:
type: mlflow_model

settings:
force_rerun: true
continue_on_step_failure: false
default_compute: azureml:sample-finetune-cluster-gpu

jobs:
huggingface_diffusers_model_finetune_job:
type: pipeline
component: azureml:diffusers_text_to_image_dreambooth_pipeline:latest
inputs:
# Compute
compute_model_import: ${{parent.inputs.compute_model_import}}
compute_finetune: ${{parent.inputs.compute_finetune}}
instance_data_dir: ${{parent.inputs.instance_data_dir}}

process_count_per_instance: ${{parent.inputs.process_count_per_instance}}
instance_count: ${{parent.inputs.instance_count}}

# Model import args
download_from_source: False # True for downloading a model directly from HuggingFace
model_family: HuggingFaceImage
# # Specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub
mlflow_model: ${{parent.inputs.mlflow_model_path}}
# model_name: ${{parent.inputs.model_name}}

# # Instance prompt
task_name: stable-diffusion-text-to-image
instance_prompt: "\"A photo of a sks dog\""
resolution: 512

# # Prior preservation loss
with_prior_preservation: True
# Class prompt - a prompt without the unique identifier. This prompt is used for generating "class images" for prior preservation
class_prompt: "\"a photo of dog\"" # Please note that we need to escape the double inverted comma.
num_class_images: 100 # Number of images to generate with the class prompt for prior preservation.
# class_data_dir: None # Specify Datastore URI of existing uri_folder containing class images if you have, and the training job will generate any additional images so that num_class_images are present in class_data_dir during training time.
prior_generation_precision: fp32
prior_loss_weight: 1.0
sample_batch_size: 2 # Number of samples to generate class images in each batch.

# # Lora parameters
# # LoRA reduces the number of trainable parameters by learning pairs of rank-decompostion matrices while freezing the original weights. This vastly reduces the storage requirement for large models adapted to specific tasks and enables efficient task-switching during deployment all without introducing inference latency. LoRA also outperforms several other adaptation methods including adapter, prefix-tuning, and fine-tuning.
apply_lora: True
# lora_alpha: 128
# lora_r: 16
# lora_dropout: 0.0
# tokenizer_max_length: 77

# # Text Encoder
pre_compute_text_embeddings: True
train_text_encoder: False
#text_encoder_type: CLIPTextModel
#text_encoder_name: openai/clip-vit-base-patch32 # Huggingface id of text encoder.
#text_encoder_use_attention_mask: False

# # UNET related
#class_labels_conditioning: timesteps

# # Noise Scheduler
noise_scheduler_name: DDPMScheduler # Optional, default is used from the base model. If following scheduler related parameters are not provided, it is taken from model's scheduler config.
# noise_scheduler_num_train_timesteps: 1000
# noise_scheduler_variance_type: fixed_small
# noise_scheduler_prediction_type: epsilon
# noise_scheduler_timestep_spacing: leading,
# extra_noise_scheduler_args: "clip_sample_range=1.0; clip_sample=True" # Optional additional arguments that are supplied to noise scheduler. The arguments should be semi-colon separated key value pairs and should be enclosed in double quotes.
# offset_noise: False

# # Training related
num_validation_images: 3 # Number of images to generate using instance_prompt. Images are stored in the output/checkpoint-* directories. Please note that this will increase the training time.
number_of_workers: 3
number_of_epochs: 15
max_steps: -1
training_batch_size: 3
auto_find_batch_size: False
learning_rate: 1e-4 # Learning rate is recommended to be set to a lower value, if not fine-tuning with Lora
# learning_rate_scheduler: warmup_linear,
# warmup_steps: 0
# optimizer: adamw_hf
# weight_decay: 0.0
# gradient_accumulation_step: 1
# max_grad_norm: 1.0
precision: 32
random_seed: 42
logging_strategy: epoch
# logging_steps: 500 # Number of update steps between two logs if logging_strategy='steps'.
save_total_limit: -1 # If you face issues related to disk space, you can limit the number of checkpoints saved.
save_as_mlflow_model: True

outputs:
mlflow_model_folder: ${{parent.outputs.trained_model}}
Loading

0 comments on commit 673c005

Please sign in to comment.