In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Gemini 2.5 Flash Image Generation in Vertex AI

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_5_image_gen.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fgetting-started%2Fintro_gemini_2_5_image_gen.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/getting-started/intro_gemini_2_5_image_gen.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_5_image_gen.ipynb">
      <img width="32px" src="https://www.svgrepo.com/download/217753/github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_5_image_gen.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_5_image_gen.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_5_image_gen.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_5_image_gen.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_5_image_gen.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>

| Authors |
| --- |
| [Katie Nguyen](https://github.com/katiemn) |

## Overview

Gemini 2.5 Flash Image is a powerful, generalist multimodal model that offers state-of-the-art image generation and conversational image editing capabilities. This enables you to converse with Gemini and create or edit images with interwoven text.

In this tutorial, you'll learn how to use Gemini 2.5 Flash Image in Vertex AI using the Google Gen AI SDK to try out the following scenarios:
  - Image generation:
    - Text-to-image generation
    - Interleaved image and text sequences
  - Image editing:
    - Image-to-image with subject customization and style transfer
    - Multi-turn image editing with localization
    - Editing with multiple reference images

## Get started

### Install Google Gen AI SDK for Python

In [None]:
%pip install --upgrade --quiet google-genai

### Authenticate your notebook environment (Colab only)

If you are running this notebook on Google Colab, run the following cell to authenticate your environment.

In [2]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Import libraries

In [3]:
from io import BytesIO

from IPython.display import Image, Markdown, display
from PIL import Image as PIL_Image
from google import genai
from google.genai.types import GenerateContentConfig, Part
import matplotlib.image as img
import matplotlib.pyplot as plt
import requests

### Set Google Cloud project information and create client

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [4]:
import os

PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = "global"

client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

### Load the image model


In [5]:
MODEL_ID = "gemini-2.5-flash-image-preview"

## Image generation

First, you'll send text prompts to Gemini 2.5 Flash Image describing the images you'd like to generate.


### Text to image

In the cell below, you'll call the `generate_content` method and pass in the following arguments:

  - `model`: The ID of the model you want to use.
  - `contents`: This is your prompt, in this case, a text only user message describing the image to be generated.
  - `config`: A config for specifying content settings.
    - `response_modalities`: To generate an image, you must include `IMAGE` in the `response_modalities` list. Note that `IMAGE` cannot be the only value specified; it must be accompanied by `TEXT`. If you omit `IMAGE` from the list, you will only receive a text response.
    - `candidate_count`: The number of candidates to generate.
    - `safety_settings`:
      - `method`: HARM_BLOCK_METHOD_UNSPECIFIED, SEVERITY, PROBABILITY
      - `category`: HARM_CATEGORY_UNSPECIFIED, HARM_CATEGORY_HATE_SPEECH, HARM_CATEGORY_DANGEROUS_CONTENT, HARM_CATEGORY_HARASSMENT, HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_CIVIC_INTEGRITY
      - `threshold`: HARM_BLOCK_THRESHOLD_UNSPECIFIED, BLOCK_LOW_AND_ABOVE, BLOCK_MEDIUM_AND_ABOVE, BLOCK_ONLY_HIGH, BLOCK_NONE, OFF

All generated images include a [SynthID watermark](https://deepmind.google/technologies/synthid/), which can be verified via the Media Studio in [Vertex AI Studio](https://cloud.google.com/generative-ai-studio?hl=en).

In [None]:
response = client.models.generate_content(
    model=MODEL_ID,
    contents="a cartoon infographic on flying sneakers",
    config=GenerateContentConfig(
        response_modalities=["TEXT", "IMAGE"],
        candidate_count=1,
        safety_settings=[
            {"method": "PROBABILITY"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT"},
            {"threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        ],
    ),
)

for part in response.candidates[0].content.parts:
    if part.text:
        display(Markdown(part.text))
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=400))

### Text to image and text

In addition to generating images, Gemini can also create interleaved sequences of images and text.

For example, you could ask the model to generate a recipe for banana bread with images showing different stages of the cooking process. Or, you could ask the model to generate images of different wildflowers with accompanying titles and descriptions.

Let's try out the interleaved text and image functionality by prompting Gemini 2.5 Flash Image to create a tutorial for assembling a peanut butter and jelly sandwich.

You'll notice that in the prompt we ask the model to generate both text and images for each step. This encourages the model to create interleaved text and images.

In [None]:
response = client.models.generate_content(
    model=MODEL_ID,
    contents="Create a tutorial explaining how to make a peanut butter and jelly sandwich in three easy steps. For each step, provide a title with the number of the step, an explanation, and also generate an image to illustrate the content. Label each image with the step number but no other words.",
    config=GenerateContentConfig(
        response_modalities=["TEXT", "IMAGE"],
        safety_settings=[
            {"method": "PROBABILITY"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT"},
            {"threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        ],
    ),
)

for part in response.candidates[0].content.parts:
    if part.text:
        display(Markdown(part.text))
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=400))

## Image editing

Gemini 2.5 Flash Image can generate image-to-image outputs from multiple reference images. This is useful for tasks like ensuring character consistency, generating logos, transferring styles, and inserting or removing objects.

### Subject customization

Let's try out a subject customization example by asking Gemini 2.5 Flash Image to create an image of this dog in both a pencil sketch and watercolor style.


#### Download the dog image

The following example uses an image from Cloud Storage. If you prefer to use a different image, you can either change the URL in the `wget` command or, if you have a local file, update the `subject_image` variable in the subsequent step.

In [None]:
!wget https://storage.googleapis.com/cloud-samples-data/generative-ai/image/dog-1.jpg

In [None]:
subject_image = "dog-1.jpg"  # @param {type: 'string'}

# Display the image
fig, axis = plt.subplots(1, 1, figsize=(6, 12))
axis.imshow(img.imread(subject_image))
axis.axis("off")
plt.show()

#### Send the request

Since you're including data from a local image in the request, you'll need to include `Part.from_bytes` to the request `contents`.

In [None]:
with open(subject_image, "rb") as f:
    image = f.read()

response = client.models.generate_content(
    model=MODEL_ID,
    contents=[
        Part.from_bytes(
            data=image,
            mime_type="image/jpeg",
        ),
        "Create a pencil sketch image of this dog wearing a cowboy hat in a western-themed setting. Generate another image of this dog in a watercolor style floating down a river on a paddle board.",
    ],
    config=GenerateContentConfig(
        response_modalities=["TEXT", "IMAGE"],
        candidate_count=1,
        safety_settings=[
            {"method": "PROBABILITY"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT"},
            {"threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        ],
    ),
)

for part in response.candidates[0].content.parts:
    if part.text:
        display(Markdown(part.text))
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=400))

### Style transfer

In this next example, you'll use the style from a living room to reimagine a kitchen in the same style.

#### Download the living room image

Again, the following example uses an image from Cloud Storage. If you prefer to use a different image, you can either change the URL in the `wget` command or, if you have a local file, update the `style_image` variable in the subsequent step.

In [None]:
!wget https://storage.googleapis.com/cloud-samples-data/generative-ai/image/living-room.png

In [None]:
style_image = "living-room.png"  # @param {type: 'string'}

# Display the image
fig, axis = plt.subplots(1, 1, figsize=(6, 12))
axis.imshow(img.imread(style_image))
axis.axis("off")
plt.show()

#### Send the request

In [None]:
with open(style_image, "rb") as f:
    image = f.read()

response = client.models.generate_content(
    model=MODEL_ID,
    contents=[
        Part.from_bytes(
            data=image,
            mime_type="image/png",
        ),
        "Using the concepts, colors, and themes from this living room generate a kitchen and dining room with the same aesthetic.",
    ],
    config=GenerateContentConfig(
        response_modalities=["TEXT", "IMAGE"],
        candidate_count=1,
        safety_settings=[
            {"method": "PROBABILITY"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT"},
            {"threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        ],
    ),
)

for part in response.candidates[0].content.parts:
    if part.text:
        display(Markdown(part.text))
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=400))

### Multi-turn image editing

In this next section, you supply a starting image and iteratively alter certain aspects of the image by chatting with Gemini 2.5 Flash Image.


Rather than using local images in the next example, you'll use images stored in Google Cloud Storage. Run the next step to view the starting image of a perfume bottle. If you'd like to use a different image from Cloud Storage, replace the `perfume_url` and `perfume_uri` below.

In [None]:
perfume_url = (
    "https://storage.googleapis.com/cloud-samples-data/generative-ai/image/perfume.jpg"
)
perfume_uri = "gs://cloud-samples-data/generative-ai/image/perfume.jpg"


# Display the image
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
perfume_image = PIL_Image.open(BytesIO(requests.get(perfume_url).content))
axes[0].imshow(perfume_image)
for i, ax in enumerate(axes):
    ax.axis("off")
plt.show()

#### Start a chat

In this next step, you'll start a chat so you can continuously edit your images by talking with Gemini. Since you're now using reference images stored in Cloud Storage, you'll use `Part.from_uri` in the `message` content.

In [None]:
chat = client.chats.create(model=MODEL_ID)

response = chat.send_message(
    message=[
        Part.from_uri(
            file_uri=perfume_uri,
            mime_type="image/jpeg",
        ),
        "change the perfume color to a light purple",
    ],
    config=GenerateContentConfig(
        response_modalities=["TEXT", "IMAGE"],
    ),
)

data = perfume_uri
for part in response.candidates[0].content.parts:
    if part.text:
        display(Markdown(part.text))
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=400))
        data = part.inline_data.data

Now, you'll include the previous image data in a new message in the existing chat, along with a new text prompt, to update the previously generated image. This time, you'll ask to write a word on the perfume bottle, and since Gemini is able to handle different languages, let's ask for the word in French.

In [None]:
response = chat.send_message(
    message=[
        Part.from_bytes(
            data=data,
            mime_type="image/jpeg",
        ),
        "inscribe the word flowers in French on the perfume bottle in a delicate white cursive font",
    ],
    config=GenerateContentConfig(
        response_modalities=["TEXT", "IMAGE"],
    ),
)

for part in response.candidates[0].content.parts:
    if part.text:
        display(Markdown(part.text))
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=400))

### Multiple reference images

When editing images with Gemini 2.5 Flash Image, you can also supply multiple input images to create new ones. In this next example, you'll prompt Gemini with an image of a woman and a suitcase. You'll then ask Gemini to combine the objects from these images in order to create a new one. You'll also ask Gemini to supply text to accompany the image.


Run the following cell to visualize the starting images stored in Cloud Storage.

In [None]:
person_url = (
    "https://storage.googleapis.com/cloud-samples-data/generative-ai/image/woman.jpg"
)
suitcase_url = (
    "https://storage.googleapis.com/cloud-samples-data/generative-ai/image/suitcase.png"
)

fig, axes = plt.subplots(1, 2, figsize=(12, 6))
person_image = PIL_Image.open(BytesIO(requests.get(person_url).content))
axes[0].imshow(person_image)
suitcase_image = PIL_Image.open(BytesIO(requests.get(suitcase_url).content))
axes[1].imshow(suitcase_image)
for i, ax in enumerate(axes):
    ax.axis("off")
plt.show()

Now, you'll send the request. The only difference from previous image editing calls is that you'll supply multiple `Part.from_uri` instances depending on the number of reference images you have.

In [None]:
response = client.models.generate_content(
    model=MODEL_ID,
    contents=[
        Part.from_uri(
            file_uri="gs://cloud-samples-data/generative-ai/image/suitcase.png",
            mime_type="image/png",
        ),
        Part.from_uri(
            file_uri="gs://cloud-samples-data/generative-ai/image/woman.jpg",
            mime_type="image/jpeg",
        ),
        "Generate an image of the woman pulling the suitcase in an airport. Separately, write a short caption for this image that would be suitable for a social media post.",
    ],
    config=GenerateContentConfig(
        response_modalities=["TEXT", "IMAGE"],
        candidate_count=1,
        safety_settings=[
            {"method": "PROBABILITY"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT"},
            {"threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        ],
    ),
)


for part in response.candidates[0].content.parts:
    if part.text:
        display(Markdown(part.text))
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=400))