In [None]:
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - Flux with ComfyUI

<table><tbody><tr>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/model_garden/model_garden_pytorch_flux_comfyui_workbench.ipynb">
      <img alt="Workbench logo" src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" width="32px"><br> Run in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_pytorch_flux_comfyui_workbench.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_flux_comfyui_workbench.ipynb">
      <img alt="GitHub logo" src="https://github.githubassets.com/assets/GitHub-Mark-ea2971cee799.png" width="32px"><br> View on GitHub
    </a>
  </td>
</tr></tbody></table>

## Overview

This notebook demonstrates a powerful, end-to-end workflow for generative AI on Google Cloud. It covers both **deploying** a high-performance model and **using** it in a popular creative tool.

We will focus on the [FLUX.1 [schnell]](https://huggingface.co/black-forest-labs/FLUX.1-schnell) model, a fast, distilled text-to-image model.

### Objectives
This tutorial is split into two main parts that work together:

1.  **Part 1: Deploy a FLUX Model to a Vertex AI Endpoint**
    * You will upload the pre-trained FLUX model to the **Vertex AI Model Registry**.
    * You will then deploy the model to a scalable **Vertex AI Endpoint** for real-time online prediction.

2.  **Part 2: Run ComfyUI with a Custom Vertex AI Node**
    * You will run a custom-built Docker container that launches the **ComfyUI** web interface.
    * This ComfyUI instance includes a special **`VertexAIEndpointNode`** that allows you to connect your workflow directly to the endpoint you created in Part 1.

### File a bug

File a bug on [GitHub](https://github.com/GoogleCloudPlatform/vertex-ai-samples/issues/new) if you encounter any issue with the notebook.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Run the notebook

In [None]:
# @title Setup Google Cloud project

# @markdown 1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

# @markdown 2. **[Optional]** Set region. If not set, the region will be set automatically according to Colab Enterprise environment.

REGION = ""  # @param {type:"string"}

# @markdown 3. If you want to run predictions with A100 80GB or H100 GPUs, we recommend using the regions listed below. **NOTE:** Make sure you have associated quota in selected regions. Click the links to see your current quota for each GPU type: [Nvidia A100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_a100_80gb_gpus), [Nvidia H100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_h100_gpus). You can request for quota following the instructions at ["Request a higher quota"](https://cloud.google.com/docs/quota/view-manage#requesting_higher_quota).

# @markdown | Machine Type | Accelerator Type | Recommended Regions |
# @markdown | ----------- | ----------- | ----------- |
# @markdown | a2-ultragpu-1g | 1 NVIDIA_A100_80GB | us-central1, us-east4, europe-west4, asia-southeast1, us-east4 |
# @markdown | a3-highgpu-2g | 2 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |
# @markdown | a3-highgpu-4g | 4 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |
# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, europe-west4, us-west1, asia-southeast1 |

# Upgrade Vertex AI SDK.
! pip3 install --upgrade --quiet 'google-cloud-aiplatform==1.103.0'

import importlib
import os

from google.cloud import aiplatform

if os.environ.get("VERTEX_PRODUCT") != "COLAB_ENTERPRISE":
    ! pip install --upgrade tensorflow
! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git

common_util = importlib.import_module(
    "vertex-ai-samples.notebooks.community.model_garden.docker_source_codes.notebook_util.common_util"
)

models, endpoints = {}, {}
LABEL = "xdit_gpu"


# Get the default cloud project id.
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

# Get the default region for launching jobs.
if not REGION:
    REGION = os.environ["GOOGLE_CLOUD_REGION"]

# Initialize Vertex AI API.
print("Initializing Vertex AI API.")
aiplatform.init(project=PROJECT_ID, location=REGION)

! gcloud config set project $PROJECT_ID
import vertexai

vertexai.init(
    project=PROJECT_ID,
    location=REGION,
)

In [None]:
# @title Deploy Flux endpoints
# @markdown Note: If you already have Flux endpoints deployed, you can skip this section.

In [None]:
# @title Set the model parameters

base_model_name = "flux.1-schnell"
PUBLISHER_MODEL_NAME = (
    f"publishers/black-forest-labs/models/flux1-schnell@{base_model_name}"
)

MODEL_ID = "gs://vertex-model-garden-restricted-us/black-forest-labs/FLUX.1-schnell"
TASK = "text-to-image"

accelerator_type = "NVIDIA_TESLA_A100"  # @param ["NVIDIA_TESLA_A100", "NVIDIA_A100_80GB", "NVIDIA_H100_80GB"]
accelerator_count = 1

if accelerator_type == "NVIDIA_TESLA_A100":
    machine_type = "a2-highgpu-1g"
elif accelerator_type == "NVIDIA_A100_80GB":
    machine_type = "a2-ultragpu-1g"
elif accelerator_type == "NVIDIA_H100_80GB":
    machine_type = "a3-highgpu-2g"
    accelerator_count = 2
else:
    raise ValueError(f"Unsupported accelerator type: {accelerator_type}")

In [None]:
# @title [Option 1] Deploy with Model Garden SDK

# @markdown Set `use_dedicated_endpoint` to False if you don't want to use [dedicated endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment#create-dedicated-endpoint).
use_dedicated_endpoint = True  # @param {type:"boolean"}
# @markdown Deploy with Gen AI model-centric SDK. This section uploads the prebuilt model to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model. See [use open models with Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/open-models/use-open-models) for documentation on other use cases.
from vertexai import model_garden

model = model_garden.OpenModel(PUBLISHER_MODEL_NAME)
endpoints[LABEL] = model.deploy(
    machine_type=machine_type,
    accelerator_type=accelerator_type,
    accelerator_count=accelerator_count,
    use_dedicated_endpoint=use_dedicated_endpoint,
    accept_eula=True,  # Accept the End User License Agreement (EULA) on the model card before deploy. Otherwise, the deployment will be forbidden.
)

endpoint = endpoints[LABEL]

In [None]:
# @title [Option 2] Deploy with customized configs

# @markdown This section uploads the [black-forest-labs/FLUX.1-schnell](https://huggingface.co/black-forest-labs/FLUX.1-schnell) model to Model Registry and deploys it on the Endpoint with 1 A100 80G GPU.

# @markdown The deployment takes ~15 minutes to finish.

# @markdown Set `use_dedicated_endpoint` to False if you don't want to use [dedicated endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment#create-dedicated-endpoint).
use_dedicated_endpoint = True  # @param {type:"boolean"}

# The pre-built serving docker image. It contains serving scripts and models.
SERVE_DOCKER_URI = "us-docker.pkg.dev/deeplearning-platform-release/vertex-model-garden/xdit-serve.cu125.0-2.ubuntu2204.py310"


def deploy_model(
    model_id,
    task,
    machine_type,
    accelerator_type,
    accelerator_count,
    use_dedicated_endpoint,
):
    """Create a Vertex AI Endpoint and deploy the specified model to the endpoint."""
    common_util.check_quota(
        project_id=PROJECT_ID,
        region=REGION,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        is_for_training=False,
    )

    model_name = model_id

    endpoint = aiplatform.Endpoint.create(
        display_name=f"{model_name}-endpoint",
        dedicated_endpoint_enabled=use_dedicated_endpoint,
    )
    serving_env = {
        "MODEL_ID": model_id,
        "TASK": task,
        "DEPLOY_SOURCE": "notebook",
    }

    # xDiT serving parameters
    serving_env["USE_TORCH_COMPILE"] = "true"
    serving_env["N_GPUS"] = accelerator_count
    if accelerator_count == 2:
        serving_env["RING_DEGREE"] = "2"

    model = aiplatform.Model.upload(
        display_name=model_name,
        serving_container_image_uri=SERVE_DOCKER_URI,
        serving_container_ports=[7080],
        serving_container_predict_route="/predict",
        serving_container_health_route="/health",
        serving_container_environment_variables=serving_env,
        model_garden_source_model_name="publishers/black-forest-labs/models/flux1-schnell",
    )

    model.deploy(
        endpoint=endpoint,
        machine_type=machine_type,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        deploy_request_timeout=1800,
        system_labels={
            "NOTEBOOK_NAME": "model_garden_pytorch_flux.ipynb",
            "NOTEBOOK_ENVIRONMENT": common_util.get_deploy_source(),
        },
    )
    return model, endpoint


models[LABEL], endpoints[LABEL] = deploy_model(
    model_id=MODEL_ID,
    task=TASK,
    machine_type=machine_type,
    accelerator_type=accelerator_type,
    accelerator_count=accelerator_count,
    use_dedicated_endpoint=use_dedicated_endpoint,
)

print("endpoint_name:", endpoints[LABEL].name)

In [None]:
# @title ðŸš€ Run ComfyUI
# @markdown Run this cell to start the ComfyUI server.
# @markdown
# @markdown * It will create `comfyui_output` and `comfyui_input` folders in your `/home/jupyter/` directory.
# @markdown * **All generated outputs will be saved to `/home/jupyter/comfyui_output`.**
# @markdown * **Note:** The above folder paths can be changed in the **Configuration** section below.
# @markdown * Click the link that appears at the end to open the UI.

# @markdown ---
# @markdown **Configuration:**
# @markdown **[Optional]** Set region. If not set, the region will be set automatically set to us-central1

REGION = "us-central1"  # @param {type:"string"}
COMFY_UI_URL = "us-docker.pkg.dev/deeplearning-platform-release/vertex-model-garden/comfyui.cu125.0-1.ubuntu2204.py310"
PORT = 7080  # @param {type:"integer"}
# @markdown ---

import os

import requests

OUTPUT_DIR = "/home/jupyter/comfyui_output"  # @param {type:"string"}
INPUT_DIR = "/home/jupyter/comfyui_input"  # @param {type:"string"}

!mkdir -p {OUTPUT_DIR}
!mkdir -p {INPUT_DIR}

print(f"Pulling the ComfyUI image: {COMFY_UI_URL}...")
!docker pull {COMFY_UI_URL}
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

if not PROJECT_ID or not REGION:
    print(
        "WARNING: Could not automatically find GOOGLE_PROJECT_ID or GOOGLE_CLOUD_REGION."
    )
    print("Please set them manually if your Vertex AI nodes fail.")

docker_command = f"""
docker run -d --rm --gpus all \
    -p {PORT}:8080 \
    -e GOOGLE_PROJECT_ID={PROJECT_ID} \
    -e GOOGLE_CLOUD_REGION={REGION} \
    -v {OUTPUT_DIR}:/app/output \
    -v {INPUT_DIR}:/app/input \
    {COMFY_UI_URL}
"""

print("Starting ComfyUI container...")
!{docker_command}

proxy_url = None
proxy_port = PORT
try:
    headers = {"Metadata-Flavor": "Google"}
    url = "http://metadata.google.internal/computeMetadata/v1/instance/attributes/proxy-url"
    response = requests.get(url, headers=headers, timeout=5)
    response.raise_for_status()
    proxy_url = response.text.strip()
except requests.exceptions.RequestException:
    pass

if proxy_url:
    comfyui_url = f"https://{proxy_url}/proxy/{proxy_port}/?authuser=0"
else:
    comfyui_url = "Error: Could not retrieve PROXY_URL from metadata."

message = "Click the link below to open the interface in a new tab:"

print("====================================================================")
print("âœ… ComfyUI is starting up...This process may take up to 2 minutes")
print(message)
print("")
print(f"  >> {comfyui_url} <<")
print("")
print("====================================================================")

In [None]:
# @title Clean up resources
# @markdown To kill and remove the ComfyUI docker image from the runtime, check `stop_comfyui`.
stop_comfyui = True  # @param {type:"boolean"}
# @markdown  Delete the experiment models and endpoints to recycle the resources
# @markdown  and avoid unnecessary continuous charges that may incur.

# Undeploy model and delete endpoint.
for endpoint in endpoints.values():
    endpoint.delete(force=True)

# Delete models.
for model in models.values():
    model.delete()

if stop_comfyui:
    print("Stopping and removing ComfyUI container and image...")
    COMFY_UI_URL = "us-docker.pkg.dev/deeplearning-platform-release/vertex-model-garden/comfyui.cu125.0-1.ubuntu2204.py310"
    !CONTAINER_IDS=$(docker ps -q -f ancestor={COMFY_UI_URL}) && if [ ! -z "$CONTAINER_IDS" ]; then docker stop $CONTAINER_IDS; fi
    !docker rmi {COMFY_UI_URL}