CLI and notebook examples for text to image diffusion models (#3154)

* CLI and notebook example for dreambooth finetuning * notebook update pipeline * notebook --------- Co-authored-by: gaurav <rajguru@microsoft.com> Co-authored-by: grajguru <grajguru@microsoft.com>
Azure · May 7, 2024 · 673c005 · 673c005
1 parent f8fe695
commit 673c005
Show file tree

Hide file tree

Showing 6 changed files with 439 additions and 13 deletions.
diff --git a/cli/foundation-models/system/finetune/text-to-image/base64_to_jpeg.py b/cli/foundation-models/system/finetune/text-to-image/base64_to_jpeg.py
@@ -0,0 +1,51 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+# Read base64 encoded image from txt file and convert it to image file
+
+import argparse
+import re
+import json
+import io
+import base64
+from PIL import Image
+
+
+INPUT_PROMPT_COLUMN = "prompt"
+OUTPUT_IMAGE_COLUMN = "generated_image"
+
+
+def base64_str_to_image(response_file: str):
+    """
+    Read file that contains response from online endpoint. Extarct base64 encoded images and save them as image files.
+
+    :param response_file: Path to json file, which has response received from endpoint.
+    :type response_file: str
+    :return: None
+    """
+    with open(response_file) as f:
+        json_str = json.load(f)
+
+    json_obj = json.loads(json_str)
+    for i, obj in enumerate(json_obj):
+        generated_image = obj[OUTPUT_IMAGE_COLUMN]
+        img = Image.open(io.BytesIO(base64.b64decode(generated_image)))
+        text_prompt = ""
+        if INPUT_PROMPT_COLUMN in obj:
+            text_prompt = obj[INPUT_PROMPT_COLUMN].strip()
+        text_prompt = f"Img_{i}_" + re.sub(r"[^a-zA-Z0-9 ]+", "", text_prompt)
+        text_prompt = text_prompt[:50]
+        img.save(text_prompt + ".jpg", "JPEG")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--response_file",
+        type=str,
+        default="generated_image.json",
+        help="File having image response from endpoint.",
+    )
+    args, unknown = parser.parse_known_args()
+
+    base64_str_to_image(args.response_file)
diff --git a/cli/foundation-models/system/finetune/text-to-image/deploy.yaml b/cli/foundation-models/system/finetune/text-to-image/deploy.yaml
@@ -0,0 +1,10 @@
+$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
+instance_type: Standard_NC6s_v3
+instance_count: 1
+liveness_probe:
+  initial_delay: 180
+  period: 180
+  failure_threshold: 49
+  timeout: 299
+request_settings:
+  request_timeout_ms: 90000
diff --git a/...foundation-models/system/finetune/text-to-image/diffusers-dreambooth-dog-text-to-image.sh b/...foundation-models/system/finetune/text-to-image/diffusers-dreambooth-dog-text-to-image.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+set -x
+
+# script inputs
+registry_name="azureml"
+subscription_id="<SUBSCRIPTION_ID>"
+resource_group_name="<RESOURCE_GROUP>"
+workspace_name="<WORKSPACE_NAME>"
+
+cluster_name="sample-finetune-cluster-gpu"
+
+# If above compute cluster does not exist, create it with the following vm size
+cluster_sku="Standard_NC6s_v3"
+
+# This is the number of GPUs in a single node of the selected 'vm_size' compute. 
+# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
+# Setting this to more than the number of GPUs will result in an error.
+gpus_per_node=1
+# This is the number of nodes in cluster
+instance_count=1
+
+# huggingFace model
+huggingface_model_name="runwayml/stable-diffusion-v1-5"
+# This is the foundation model for finetuning from azureml system registry
+aml_registry_model_name="runwayml-stable-diffusion-v1-5"
+model_label="latest"
+
+version=$(date +%s)
+finetuned_huggingface_model_name="runwayml-stable-diffusion-2-1-dog-text-to-image"
+huggingface_endpoint_name="text-to-image-dog-$version"
+deployment_name="text2img-dog-mlflow-deploy"
+deployment_sku="Standard_NC6s_v3"
+request_file="request.json"
+response_file="generated_image.json"
+
+# finetuning job parameters
+finetuning_pipeline_component="diffusers_text_to_image_dreambooth_pipeline"
+# Training settings
+process_count_per_instance=$gpus_per_node # set to the number of GPUs available in the compute
+instance_count=$instance_count
+
+# 1. Install dependencies
+pip install azure-ai-ml==1.8.0
+pip install azure-identity==1.13.0
+
+# 2. Setup pre-requisites
+az account set -s $subscription_id
+workspace_info="--resource-group $resource_group_name --workspace-name $workspace_name"
+
+# Check if $compute_cluster_finetune exists, else create it
+if az ml compute show --name $cluster_name $workspace_info
+then
+    echo "Compute cluster $cluster_name already exists"
+else
+    echo "Creating compute cluster $cluster_name"
+    az ml compute create --name $cluster_name --type amlcompute --min-instances 0 --max-instances $instance_count --size $cluster_sku $workspace_info || {
+        echo "Failed to create compute cluster $cluster_name"
+        exit 1
+    }
+fi
+
+# Check if the finetuning pipeline component exists
+if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
+then
+    echo "Finetuning pipeline component $finetuning_pipeline_component does not exist"
+    exit 1
+fi
+
+# 3. Check if the model exists in the registry
+# need to confirm model show command works for registries outside the tenant (aka system registry)
+if ! az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name 
+then
+    echo "Model $aml_registry_model_name:$model_label does not exist in registry $registry_name"
+    exit 1
+fi
+
+# Get the latest model version
+model_version=$(az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name --query version --output tsv)
+
+# 4. Prepare data
+# Git clone the DOG dataset
+dataset_dir="dog-example"
+dataset_url="https://datasets-server.huggingface.co/rows?dataset=diffusers%2Fdog-example&config=default&split=train&offset=0&length=100"
+python prepare_data.py --url $dataset_url --dataset_dir $dataset_dir
+
+# Check if training data exist
+if [ ! -d $dataset_dir ]; then
+    echo "Training data $train_data does not exist"
+    exit 1
+fi
+
+# 5. Submit finetuning job using pipeline.yaml for a stable diffusion model
+
+# # If you want to use a HuggingFace model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below
+# inputs.model_name=$huggingface_model_name
+
+parent_job_name=$( az ml job create --file "./diffusers-dreambooth-dog-text-to-image.yaml" $workspace_info --query name -o tsv \
+--set jobs.huggingface_diffusers_model_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
+inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$aml_registry_model_name/versions/$model_version" \
+inputs.compute_model_import=$cluster_name inputs.process_count_per_instance=$process_count_per_instance \
+inputs.instance_data_dir.path=$dataset_dir \
+inputs.instance_count=$instance_count inputs.compute_finetune=$cluster_name) || {
+    echo "Failed to submit finetuning job"
+    exit 1
+}
+
+az ml job stream --name $parent_job_name $workspace_info || {
+    echo "job stream failed"; exit 1;
+}
+
+# 6. Create model in workspace from train job output for fine-tuned HuggingFace Transformers model
+az ml model create --name $finetuned_huggingface_model_name --version $version --type mlflow_model \
+ --path azureml://jobs/$parent_job_name/outputs/trained_model $workspace_info  || {
+    echo "model create in workspace failed"; exit 1;
+}
+
+# 7. Deploy the fine-tuned HuggingFace Transformers model to an endpoint
+# Create online endpoint 
+az ml online-endpoint create --name $huggingface_endpoint_name $workspace_info  || {
+    echo "endpoint create failed"; exit 1;
+}
+
+# Deploy model from registry to endpoint in workspace
+
+az ml online-deployment create --file ./deploy.yaml --name=$deployment_name $workspace_info --set \
+  endpoint_name=$huggingface_endpoint_name model=azureml:$finetuned_huggingface_model_name:$version \
+  instance_type=$deployment_sku || {
+    echo "deployment create failed"; exit 1;
+}
+
+az ml online-endpoint update $workspace_info --name=$huggingface_endpoint_name --traffic="$deployment_name=100" || {
+    echo "Failed to set all traffic to the new deployment"
+    exit 1
+}
+
+# 8. Try a sample scoring request on the deployed HuggingFace Transformers model
+
+read -r -d '' request_json << EOM
+{
+    "input_data": {"columns": ["prompt"], "index": [0], "data": ["a photo of sks dog in a bucket"]},
+    "params": {
+        "height": 512,
+        "width": 512,
+        "num_inference_steps": 50,
+        "guidance_scale": 7.5,
+        "negative_prompt": ["blurry; three legs"],
+        "num_images_per_prompt": 2
+    }
+}
+EOM
+
+echo "$request_json" > $request_file
+
+az ml online-endpoint invoke --name $huggingface_endpoint_name --request-file $request_file $workspace_info  -o json > $response_file || {
+    echo "endpoint invoke failed"; exit 1;
+}
+
+python base64_to_jpeg.py --response_file $response_file
+
+
+# 9. Delete the endpoint
+az ml online-endpoint delete --name $huggingface_endpoint_name $workspace_info --yes || {
+    echo "endpoint delete failed"; exit 1;
+}
+
+# 10. Delete the request data file
+rm $huggingface_sample_request_data
diff --git a/...undation-models/system/finetune/text-to-image/diffusers-dreambooth-dog-text-to-image.yaml b/...undation-models/system/finetune/text-to-image/diffusers-dreambooth-dog-text-to-image.yaml
@@ -0,0 +1,118 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+experiment_name: AzureML-Train-Finetune-MultiModal-TextToImage-DreamBooth-Samples
+
+inputs:
+  # # Model - specify the foundation model available in the azureml system registry
+  mlflow_model_path:
+    path: azureml://registries/azureml/models/stabilityai-stable-diffusion-2-1/labels/latest
+    type: mlflow_model
+  # model_name: stabilityai/stable-diffusion-2-1
+
+  # # compute
+  compute_model_import: sample-model-import-cluster
+  compute_finetune: sample-finetune-cluster-gpu
+  process_count_per_instance: 1
+  instance_count: 1
+
+  # # Dataset
+  instance_data_dir:
+      path: ./dog-example
+      type: uri_folder
+
+outputs:
+  # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint
+  trained_model:
+    type: mlflow_model
+
+settings:
+  force_rerun: true
+  continue_on_step_failure: false
+  default_compute: azureml:sample-finetune-cluster-gpu
+
+jobs:
+  huggingface_diffusers_model_finetune_job:
+    type: pipeline
+    component: azureml:diffusers_text_to_image_dreambooth_pipeline:latest
+    inputs:
+      # Compute
+      compute_model_import: ${{parent.inputs.compute_model_import}}
+      compute_finetune: ${{parent.inputs.compute_finetune}}
+      instance_data_dir: ${{parent.inputs.instance_data_dir}}
+
+      process_count_per_instance: ${{parent.inputs.process_count_per_instance}}
+      instance_count: ${{parent.inputs.instance_count}}
+
+      # Model import args
+      download_from_source: False # True for downloading a model directly from HuggingFace
+      model_family: HuggingFaceImage
+      # # Specify the model_name instead of mlflow_model if you want to use a model from the huggingface hub
+      mlflow_model: ${{parent.inputs.mlflow_model_path}}
+      # model_name: ${{parent.inputs.model_name}}
+
+      # # Instance prompt
+      task_name: stable-diffusion-text-to-image
+      instance_prompt: "\"A photo of a sks dog\""
+      resolution: 512
+
+      # # Prior preservation loss
+      with_prior_preservation: True
+      # Class prompt - a prompt without the unique identifier. This prompt is used for generating "class images" for prior preservation
+      class_prompt: "\"a photo of dog\""  # Please note that we need to escape the double inverted comma.
+      num_class_images: 100  # Number of images to generate with the class prompt for prior preservation.
+      # class_data_dir: None  # Specify Datastore URI of existing uri_folder containing class images if you have, and the training job will generate any additional images so that num_class_images are present in class_data_dir during training time.
+      prior_generation_precision: fp32
+      prior_loss_weight: 1.0
+      sample_batch_size: 2  # Number of samples to generate class images in each batch.
+
+      # # Lora parameters
+      # # LoRA reduces the number of trainable parameters by learning pairs of rank-decompostion matrices while freezing the original weights. This vastly reduces the storage requirement for large models adapted to specific tasks and enables efficient task-switching during deployment all without introducing inference latency. LoRA also outperforms several other adaptation methods including adapter, prefix-tuning, and fine-tuning.
+      apply_lora: True
+      # lora_alpha: 128
+      # lora_r: 16
+      # lora_dropout: 0.0
+      # tokenizer_max_length: 77
+
+      # # Text Encoder
+      pre_compute_text_embeddings: True
+      train_text_encoder: False
+      #text_encoder_type: CLIPTextModel
+      #text_encoder_name: openai/clip-vit-base-patch32 # Huggingface id of text encoder.
+      #text_encoder_use_attention_mask: False
+
+      # # UNET related
+      #class_labels_conditioning: timesteps
+
+      # # Noise Scheduler
+      noise_scheduler_name: DDPMScheduler  # Optional, default is used from the base model. If following scheduler related parameters are not provided, it is taken from model's scheduler config.
+      # noise_scheduler_num_train_timesteps: 1000
+      # noise_scheduler_variance_type: fixed_small
+      # noise_scheduler_prediction_type: epsilon
+      # noise_scheduler_timestep_spacing: leading,
+      # extra_noise_scheduler_args: "clip_sample_range=1.0; clip_sample=True" # Optional additional arguments that are supplied to noise scheduler. The arguments should be semi-colon separated key value pairs and should be enclosed in double quotes.
+      # offset_noise: False
+
+      # # Training related
+      num_validation_images: 3 # Number of images to generate using instance_prompt. Images are stored in the output/checkpoint-* directories. Please note that this will increase the training time.
+      number_of_workers: 3
+      number_of_epochs: 15
+      max_steps: -1
+      training_batch_size: 3
+      auto_find_batch_size: False
+      learning_rate: 1e-4  # Learning rate is recommended to be set to a lower value, if not fine-tuning with Lora
+      # learning_rate_scheduler: warmup_linear,
+      # warmup_steps: 0
+      # optimizer: adamw_hf
+      # weight_decay: 0.0
+      # gradient_accumulation_step: 1
+      # max_grad_norm: 1.0
+      precision: 32
+      random_seed: 42
+      logging_strategy: epoch
+      # logging_steps: 500  # Number of update steps between two logs if logging_strategy='steps'.
+      save_total_limit: -1 # If you face issues related to disk space, you can limit the number of checkpoints saved.
+      save_as_mlflow_model: True
+
+    outputs:
+      mlflow_model_folder: ${{parent.outputs.trained_model}}