In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Model Garden - Hugging Face Pytorch Local Inference

<table align="left">
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_pytorch_huggingface_local_inference.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_huggingface_local_inference.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
</table>

## Overview

This notebook demonstrates how to run Hugging Face inference pipeline locally in a Colab notebook. 

### Objective

- Run inference in a local pipeline.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Colab Enterprise

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloab Enterprise pricing](https://cloud.google.com/colab/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

# Run the examples

In [None]:
# @title Check if the Colab VM has GPU

# @markdown **Important:** This notebook requires a GPU runtime to function correctly.
# @markdown The default Colab runtime does not have a GPU and will not work. Follow instructions below to create a GPU runtime:
# @markdown   1. [Create a runtime template](https://cloud.google.com/vertex-ai/docs/colab/create-runtime-template#create)
# @markdown   2. [Create a runtime](https://cloud.google.com/vertex-ai/docs/colab/create-runtime#create) \
# @markdown
# @markdown Once you have created a GPU runtime, you can use this notebook to run local inference.

import subprocess

if subprocess.run("nvidia-smi").returncode:
    raise RuntimeError(
        "Cannot communicate with GPU. Make sure you are using a GPU Colab runtime. "
        "Go to the Runtimes menu and select/create a runtime with GPUs."
    )

In [None]:
# @title Prepare the virtual environment

# @markdown Run this section to install required packages for the virtual environment.

! pip install --upgrade pip
! pip install diffusers~=0.30.0

In [None]:
# @title Run inference for `text-to-image` task

# @markdown Text-to-image generates an image from a text description, which is also known as a `prompt`.

# @markdown This example runs [black-forest-labs/FLUX.1-schnell](https://huggingface.co/black-forest-labs/FLUX.1-schnell) model with Diffusers [FluxPipeline](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux). Note that this model requires at least 24GB GPU memory. `g2-standard-24` machine type is recommended.
import torch
from diffusers import FluxPipeline

model_id = "black-forest-labs/FLUX.1-schnell"
pipe = FluxPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="balanced",
    max_memory={0: "20GB", 1: "20GB"},
)

prompt = "A cat holding a sign that says hello world"  # @param {type:"string"}
image = pipe(prompt, num_inference_steps=4, guidance_scale=0.0).images[0]

display(image)